/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1248 by ph10, Wed Feb 13 17:36:38 2013 UTC revision 1364 by ph10, Sat Oct 5 15:45:11 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59  /* The md->capture_last field uses the lower 16 bits for the last captured  /* The md->capture_last field uses the lower 16 bits for the last captured
60  substring (which can never be greater than 65535) and a bit in the top half  substring (which can never be greater than 65535) and a bit in the top half
61  to mean "capture vector overflowed". This odd way of doing things was  to mean "capture vector overflowed". This odd way of doing things was
62  implemented when it was realized that preserving and restoring the overflow bit  implemented when it was realized that preserving and restoring the overflow bit
63  whenever the last capture number was saved/restored made for a neater  whenever the last capture number was saved/restored made for a neater
64  interface, and doing it this way saved on (a) another variable, which would  interface, and doing it this way saved on (a) another variable, which would
65  have increased the stack frame size (a big NO-NO in PCRE) and (b) another  have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66  separate set of save/restore instructions. The following defines are used in  separate set of save/restore instructions. The following defines are used in
67  implementing this. */  implementing this. */
68    
69  #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */  #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
# Line 87  defined PCRE_ERROR_xxx codes, which are Line 87  defined PCRE_ERROR_xxx codes, which are
87  negative to avoid the external error codes. */  negative to avoid the external error codes. */
88    
89  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
90  #define MATCH_COMMIT       (-998)  #define MATCH_KETRPOS      (-998)
91  #define MATCH_KETRPOS      (-997)  #define MATCH_ONCE         (-997)
92  #define MATCH_ONCE         (-996)  /* The next 5 must be kept together and in sequence so that a test that checks
93    for any one of them can use a range. */
94    #define MATCH_COMMIT       (-996)
95  #define MATCH_PRUNE        (-995)  #define MATCH_PRUNE        (-995)
96  #define MATCH_SKIP         (-994)  #define MATCH_SKIP         (-994)
97  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
98  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
99    #define MATCH_BACKTRACK_MAX MATCH_THEN
100    #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101    
102  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
103  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 233  if (caseless) Line 237  if (caseless)
237      {      {
238      while (length-- > 0)      while (length-- > 0)
239        {        {
240        pcre_uchar cc, cp;        pcre_uint32 cc, cp;
241        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
242        cc = RAWUCHARTEST(eptr);        cc = RAWUCHARTEST(eptr);
243        cp = RAWUCHARTEST(p);        cp = RAWUCHARTEST(p);
# Line 308  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 312  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
312         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
313         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
314         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
315         RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
316    
317  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
318  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 777  for (;;) Line 781  for (;;)
781      case OP_FAIL:      case OP_FAIL:
782      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
783    
     /* COMMIT overrides PRUNE, SKIP, and THEN */  
   
784      case OP_COMMIT:      case OP_COMMIT:
785      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
786        eptrb, RM52);        eptrb, RM52);
787      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&  
         rrc != MATCH_THEN)  
       RRETURN(rrc);  
788      RRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
789    
     /* PRUNE overrides THEN */  
   
790      case OP_PRUNE:      case OP_PRUNE:
791      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
792        eptrb, RM51);        eptrb, RM51);
793      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
794      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
795    
796      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
# Line 803  for (;;) Line 800  for (;;)
800        eptrb, RM56);        eptrb, RM56);
801      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
802           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
803      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
804      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
805    
     /* SKIP overrides PRUNE and THEN */  
   
806      case OP_SKIP:      case OP_SKIP:
807      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
808        eptrb, RM53);        eptrb, RM53);
809      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       RRETURN(rrc);  
810      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
811      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
812    
813      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
814      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
815      pattern that ended with a SKIP for which there was not a matching MARK. */      not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
816        that failed and any that precede it (either they also failed, or were not
817        triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
818        SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
819        set to the count of the one that failed. */
820    
821      case OP_SKIP_ARG:      case OP_SKIP_ARG:
822      if (md->ignore_skip_arg)      md->skip_arg_count++;
823        if (md->skip_arg_count <= md->ignore_skip_arg)
824        {        {
825        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
826        break;        break;
827        }        }
828      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
829        eptrb, RM57);        eptrb, RM57);
830      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       RRETURN(rrc);  
831    
832      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
833      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
834      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
835      with the md->ignore_skip_arg flag set. */      with md->ignore_skip_arg set to the value of md->skip_arg_count. */
836    
837      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
838      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
# Line 1310  for (;;) Line 1308  for (;;)
1308          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1309          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1310          cb.capture_last     = md->capture_last & CAPLMASK;          cb.capture_last     = md->capture_last & CAPLMASK;
1311          /* Internal change requires this for API compatibility. */          /* Internal change requires this for API compatibility. */
1312          if (cb.capture_last == 0) cb.capture_last = -1;          if (cb.capture_last == 0) cb.capture_last = -1;
1313          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1314          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1315          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1316          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1317          }          }
1318        ecode += PRIV(OP_lengths)[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1319          codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1320        }        }
1321    
1322      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1604  for (;;) Line 1603  for (;;)
1603        }        }
1604      else condassert = FALSE;      else condassert = FALSE;
1605    
1606        /* Loop for each branch */
1607    
1608      do      do
1609        {        {
1610        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1611    
1612          /* A match means that the assertion is true; break out of the loop
1613          that matches its alternatives. */
1614    
1615        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1616          {          {
1617          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1618          break;          break;
1619          }          }
1620    
1621          /* If not matched, restore the previous mark setting. */
1622    
1623        md->mark = save_mark;        md->mark = save_mark;
1624    
1625        /* A COMMIT failure must fail the entire assertion, without trying any        /* See comment in the code for capturing groups above about handling
1626        subsequent branches. */        THEN. */
1627    
1628        if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_THEN)
1629            {
1630            next = ecode + GET(ecode,1);
1631            if (md->start_match_ptr < next &&
1632                (*ecode == OP_ALT || *next == OP_ALT))
1633              rrc = MATCH_NOMATCH;
1634            }
1635    
1636        /* PCRE does not allow THEN to escape beyond an assertion; it        /* Anything other than NOMATCH causes the entire assertion to fail,
1637        is treated as NOMATCH. */        passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1638          uncaptured THEN, which means they take their normal effect. This
1639          consistent approach does not always have exactly the same effect as in
1640          Perl. */
1641    
1642        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1643        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1644        }        }
1645      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);   /* Continue for next alternative */
1646    
1647        /* If we have tried all the alternative branches, the assertion has
1648        failed. If not, we broke out after a match. */
1649    
1650      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1651    
# Line 1633  for (;;) Line 1653  for (;;)
1653    
1654      if (condassert) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);
1655    
1656      /* Continue from after the assertion, updating the offsets high water      /* Continue from after a successful assertion, updating the offsets high
1657      mark, since extracts may have been taken during the assertion. */      water mark, since extracts may have been taken during the assertion. */
1658    
1659      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1660      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1661      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1662      continue;      continue;
1663    
1664      /* Negative assertion: all branches must fail to match. Encountering SKIP,      /* Negative assertion: all branches must fail to match for the assertion to
1665      PRUNE, or COMMIT means we must assume failure without checking subsequent      succeed. */
     branches. */  
1666    
1667      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1668      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1655  for (;;) Line 1674  for (;;)
1674        }        }
1675      else condassert = FALSE;      else condassert = FALSE;
1676    
1677        /* Loop for each alternative branch. */
1678    
1679      do      do
1680        {        {
1681        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1682        md->mark = save_mark;        md->mark = save_mark;   /* Always restore the mark setting */
1683        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);  
1684        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        switch(rrc)
1685          {          {
1686          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          case MATCH_MATCH:            /* A successful match means */
1687            case MATCH_ACCEPT:           /* the assertion has failed. */
1688            RRETURN(MATCH_NOMATCH);
1689    
1690            case MATCH_NOMATCH:          /* Carry on with next branch */
1691          break;          break;
1692    
1693            /* See comment in the code for capturing groups above about handling
1694            THEN. */
1695    
1696            case MATCH_THEN:
1697            next = ecode + GET(ecode,1);
1698            if (md->start_match_ptr < next &&
1699                (*ecode == OP_ALT || *next == OP_ALT))
1700              {
1701              rrc = MATCH_NOMATCH;
1702              break;
1703              }
1704            /* Otherwise fall through. */
1705    
1706            /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1707            assertion to fail to match, without considering any more alternatives.
1708            Failing to match means the assertion is true. This is a consistent
1709            approach, but does not always have the same effect as in Perl. */
1710    
1711            case MATCH_COMMIT:
1712            case MATCH_SKIP:
1713            case MATCH_SKIP_ARG:
1714            case MATCH_PRUNE:
1715            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1716            goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1717    
1718            /* Anything else is an error */
1719    
1720            default:
1721            RRETURN(rrc);
1722          }          }
1723    
1724        /* PCRE does not allow THEN to escape beyond an assertion; it is treated        /* Continue with next branch */
       as NOMATCH. */  
1725    
       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);  
1726        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1727        }        }
1728      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1729    
1730      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      /* All branches in the assertion failed to match. */
1731    
1732      ecode += 1 + LINK_SIZE;      NEG_ASSERT_TRUE:
1733        if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1734        ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1735      continue;      continue;
1736    
1737      /* Move the subject pointer back. This occurs only at the start of      /* Move the subject pointer back. This occurs only at the start of
# Line 1737  for (;;) Line 1792  for (;;)
1792        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1793        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1794        cb.capture_last     = md->capture_last & CAPLMASK;        cb.capture_last     = md->capture_last & CAPLMASK;
1795        /* Internal change requires this for API compatibility. */        /* Internal change requires this for API compatibility. */
1796        if (cb.capture_last == 0) cb.capture_last = -1;        if (cb.capture_last == 0) cb.capture_last = -1;
1797        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1798        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1799        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1784  for (;;) Line 1839  for (;;)
1839        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1840    
1841        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1842        new_recursive.saved_capture_last = md->capture_last;        new_recursive.saved_capture_last = md->capture_last;
1843        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1844        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1845        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1821  for (;;) Line 1876  for (;;)
1876            md, eptrb, RM6);            md, eptrb, RM6);
1877          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1878              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1879          md->capture_last = new_recursive.saved_capture_last;          md->capture_last = new_recursive.saved_capture_last;
1880          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1881          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1882            {            {
# Line 1838  for (;;) Line 1893  for (;;)
1893            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1894            }            }
1895    
1896          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it          /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1897          is treated as NOMATCH. */          recursion; they cause a NOMATCH for the entire recursion. These codes
1898            are defined in a range that can be tested for. */
1899    
1900            if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1901              RRETURN(MATCH_NOMATCH);
1902    
1903          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&          /* Any return code other than NOMATCH is an error. */
1904                   rrc != MATCH_COMMIT)  
1905            if (rrc != MATCH_NOMATCH)
1906            {            {
1907            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1908            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 2596  for (;;) Line 2656  for (;;)
2656            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2657          break;          break;
2658    
2659            /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2660            which means that Perl space and POSIX space are now identical. PCRE
2661            was changed at release 8.34. */
2662    
2663          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||  
              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)  
                == (op == OP_NOTPROP))  
           RRETURN(MATCH_NOMATCH);  
         break;  
   
2664          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2665          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2666               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
# Line 2629  for (;;) Line 2687  for (;;)
2687            }            }
2688          break;          break;
2689    
2690            case PT_UCNC:
2691            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2692                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2693                 c >= 0xe000) == (op == OP_NOTPROP))
2694              RRETURN(MATCH_NOMATCH);
2695            break;
2696    
2697          /* This should never occur */          /* This should never occur */
2698    
2699          default:          default:
# Line 2675  for (;;) Line 2740  for (;;)
2740      similar code to character type repeats - written out again for speed.      similar code to character type repeats - written out again for speed.
2741      However, if the referenced string is the empty string, always treat      However, if the referenced string is the empty string, always treat
2742      it as matched, any number of times (otherwise there could be infinite      it as matched, any number of times (otherwise there could be infinite
2743      loops). */      loops). If the reference is unset, there are two possibilities:
   
     case OP_REF:  
     case OP_REFI:  
     caseless = op == OP_REFI;  
     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */  
     ecode += 1 + IMM2_SIZE;  
   
     /* If the reference is unset, there are two possibilities:  
2744    
2745      (a) In the default, Perl-compatible state, set the length negative;      (a) In the default, Perl-compatible state, set the length negative;
2746      this ensures that every attempt at a match fails. We can't just fail      this ensures that every attempt at a match fails. We can't just fail
# Line 2693  for (;;) Line 2750  for (;;)
2750      so that the back reference matches an empty string.      so that the back reference matches an empty string.
2751    
2752      Otherwise, set the length to the length of what was matched by the      Otherwise, set the length to the length of what was matched by the
2753      referenced subpattern. */      referenced subpattern.
2754    
2755        The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2756        or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2757        and OP_DNREFI are used. In this case we must scan the list of groups to
2758        which the name refers, and use the first one that is set. */
2759    
2760        case OP_DNREF:
2761        case OP_DNREFI:
2762        caseless = op == OP_DNREFI;
2763          {
2764          int count = GET2(ecode, 1+IMM2_SIZE);
2765          pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2766          ecode += 1 + 2*IMM2_SIZE;
2767    
2768          while (count-- > 0)
2769            {
2770            offset = GET2(slot, 0) << 1;
2771            if (offset < offset_top && md->offset_vector[offset] >= 0) break;
2772            slot += md->name_entry_size;
2773            }
2774          if (count < 0)
2775            length = (md->jscript_compat)? 0 : -1;
2776          else
2777            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2778          }
2779        goto REF_REPEAT;
2780    
2781        case OP_REF:
2782        case OP_REFI:
2783        caseless = op == OP_REFI;
2784        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2785        ecode += 1 + IMM2_SIZE;
2786    
2787    
2788      if (offset >= offset_top || md->offset_vector[offset] < 0)      if (offset >= offset_top || md->offset_vector[offset] < 0)
2789        length = (md->jscript_compat)? 0 : -1;        length = (md->jscript_compat)? 0 : -1;
# Line 2702  for (;;) Line 2792  for (;;)
2792    
2793      /* Set up for repetition, or handle the non-repeated case */      /* Set up for repetition, or handle the non-repeated case */
2794    
2795        REF_REPEAT:
2796      switch (*ecode)      switch (*ecode)
2797        {        {
2798        case OP_CRSTAR:        case OP_CRSTAR:
# Line 3215  for (;;) Line 3306  for (;;)
3306    
3307        if (fc < 128)        if (fc < 128)
3308          {          {
3309          pcre_uchar cc = RAWUCHAR(eptr);          pcre_uint32 cc = RAWUCHAR(eptr);
3310          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3311          ecode++;          ecode++;
3312          eptr++;          eptr++;
# Line 3320  for (;;) Line 3411  for (;;)
3411      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3412      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3413    
3414      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. We first check
3415        for the minimum number of characters. If the minimum equals the maximum, we
3416        are done. Otherwise, if minimizing, check the rest of the pattern for a
3417        match; if there isn't one, advance up to the maximum, one character at a
3418        time.
3419    
3420        If maximizing, advance up to the maximum number of matching characters,
3421        until eptr is past the end of the maximum run. If possessive, we are
3422        then done (no backing up). Otherwise, match at this position; anything
3423        other than no match is immediately returned. For nomatch, back up one
3424        character, unless we are matching \R and the last thing matched was
3425        \r\n, in which case, back up two bytes. When we reach the first optional
3426        character position, we can save stack by doing a tail recurse.
3427    
3428        The various UTF/non-UTF and caseful/caseless cases are handled separately,
3429        for speed. */
3430    
3431      REPEATCHAR:      REPEATCHAR:
3432  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 3404  for (;;) Line 3510  for (;;)
3510                }                }
3511              }              }
3512    
3513            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
   
3514            for(;;)            for(;;)
3515              {              {
3516                if (eptr == pp) goto TAIL_RECURSE;
3517              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3518              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }  
3519  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3520              eptr--;              eptr--;
3521              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3509  for (;;) Line 3614  for (;;)
3614            if (fc != cc && foc != cc) break;            if (fc != cc && foc != cc) break;
3615            eptr++;            eptr++;
3616            }            }
3617            if (possessive) continue;       /* No backtracking */
3618          if (possessive) continue;          for (;;)
   
         while (eptr >= pp)  
3619            {            {
3620              if (eptr == pp) goto TAIL_RECURSE;
3621            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3622            eptr--;            eptr--;
3623            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3624            }            }
3625          RRETURN(MATCH_NOMATCH);          /* Control never gets here */
3626          }          }
       /* Control never gets here */  
3627        }        }
3628    
3629      /* Caseful comparisons (includes all multi-byte characters) */      /* Caseful comparisons (includes all multi-byte characters) */
# Line 3568  for (;;) Line 3671  for (;;)
3671            if (fc != RAWUCHARTEST(eptr)) break;            if (fc != RAWUCHARTEST(eptr)) break;
3672            eptr++;            eptr++;
3673            }            }
3674          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
3675            for (;;)
         while (eptr >= pp)  
3676            {            {
3677              if (eptr == pp) goto TAIL_RECURSE;
3678            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3679            eptr--;            eptr--;
3680            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3681            }            }
3682          RRETURN(MATCH_NOMATCH);          /* Control never gets here */
3683          }          }
3684        }        }
3685      /* Control never gets here */      /* Control never gets here */
# Line 3748  for (;;) Line 3851  for (;;)
3851            }            }
3852          }          }
3853        else        else
3854  #endif  #endif  /* SUPPORT_UTF */
3855        /* Not UTF mode */        /* Not UTF mode */
3856          {          {
3857          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3786  for (;;) Line 3889  for (;;)
3889              }              }
3890            }            }
3891          else          else
3892  #endif  #endif  /*SUPPORT_UTF */
3893          /* Not UTF mode */          /* Not UTF mode */
3894            {            {
3895            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3828  for (;;) Line 3931  for (;;)
3931              if (fc == d || (unsigned int)foc == d) break;              if (fc == d || (unsigned int)foc == d) break;
3932              eptr += len;              eptr += len;
3933              }              }
3934            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
3935            for(;;)            for(;;)
3936              {              {
3937                if (eptr == pp) goto TAIL_RECURSE;
3938              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3939              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3940              if (eptr-- == pp) break;        /* Stop if tried at original pos */              eptr--;
3941              BACKCHAR(eptr);              BACKCHAR(eptr);
3942              }              }
3943            }            }
3944          else          else
3945  #endif  #endif  /* SUPPORT_UTF */
3946          /* Not UTF mode */          /* Not UTF mode */
3947            {            {
3948            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3851  for (;;) Line 3955  for (;;)
3955              if (fc == *eptr || foc == *eptr) break;              if (fc == *eptr || foc == *eptr) break;
3956              eptr++;              eptr++;
3957              }              }
3958            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
3959            while (eptr >= pp)            for (;;)
3960              {              {
3961                if (eptr == pp) goto TAIL_RECURSE;
3962              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3963              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3964              eptr--;              eptr--;
3965              }              }
3966            }            }
3967            /* Control never gets here */
         RRETURN(MATCH_NOMATCH);  
3968          }          }
       /* Control never gets here */  
3969        }        }
3970    
3971      /* Caseful comparisons */      /* Caseful comparisons */
# Line 3963  for (;;) Line 4066  for (;;)
4066              if (fc == d) break;              if (fc == d) break;
4067              eptr += len;              eptr += len;
4068              }              }
4069            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
4070            for(;;)            for(;;)
4071              {              {
4072                if (eptr == pp) goto TAIL_RECURSE;
4073              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4074              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4075              if (eptr-- == pp) break;        /* Stop if tried at original pos */              eptr--;
4076              BACKCHAR(eptr);              BACKCHAR(eptr);
4077              }              }
4078            }            }
# Line 3986  for (;;) Line 4090  for (;;)
4090              if (fc == *eptr) break;              if (fc == *eptr) break;
4091              eptr++;              eptr++;
4092              }              }
4093            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
4094            while (eptr >= pp)            for (;;)
4095              {              {
4096                if (eptr == pp) goto TAIL_RECURSE;
4097              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4098              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4099              eptr--;              eptr--;
4100              }              }
4101            }            }
4102            /* Control never gets here */
         RRETURN(MATCH_NOMATCH);  
4103          }          }
4104        }        }
4105      /* Control never gets here */      /* Control never gets here */
# Line 4177  for (;;) Line 4281  for (;;)
4281              }              }
4282            break;            break;
4283    
4284              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4285              which means that Perl space and POSIX space are now identical. PCRE
4286              was changed at release 8.34. */
4287    
4288            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
           for (i = 1; i <= min; i++)  
             {  
             if (eptr >= md->end_subject)  
               {  
               SCHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
             GETCHARINCTEST(c, eptr);  
             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
                  c == CHAR_FF || c == CHAR_CR)  
                    == prop_fail_result)  
               RRETURN(MATCH_NOMATCH);  
             }  
           break;  
   
4289            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
4290            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4291              {              {
# Line 4247  for (;;) Line 4340  for (;;)
4340              }              }
4341            break;            break;
4342    
4343              case PT_UCNC:
4344              for (i = 1; i <= min; i++)
4345                {
4346                if (eptr >= md->end_subject)
4347                  {
4348                  SCHECK_PARTIAL();
4349                  RRETURN(MATCH_NOMATCH);
4350                  }
4351                GETCHARINCTEST(c, eptr);
4352                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4353                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4354                     c >= 0xe000) == prop_fail_result)
4355                  RRETURN(MATCH_NOMATCH);
4356                }
4357              break;
4358    
4359            /* This should not occur */            /* This should not occur */
4360    
4361            default:            default:
# Line 4452  for (;;) Line 4561  for (;;)
4561          case OP_DIGIT:          case OP_DIGIT:
4562          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4563            {            {
4564            pcre_uchar cc;            pcre_uint32 cc;
   
4565            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4566              {              {
4567              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4470  for (;;) Line 4578  for (;;)
4578          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4579          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4580            {            {
4581            pcre_uchar cc;            pcre_uint32 cc;
   
4582            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4583              {              {
4584              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4488  for (;;) Line 4595  for (;;)
4595          case OP_WHITESPACE:          case OP_WHITESPACE:
4596          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4597            {            {
4598            pcre_uchar cc;            pcre_uint32 cc;
   
4599            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4600              {              {
4601              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4506  for (;;) Line 4612  for (;;)
4612          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4613          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4614            {            {
4615            pcre_uchar cc;            pcre_uint32 cc;
   
4616            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4617              {              {
4618              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4524  for (;;) Line 4629  for (;;)
4629          case OP_WORDCHAR:          case OP_WORDCHAR:
4630          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4631            {            {
4632            pcre_uchar cc;            pcre_uint32 cc;
   
4633            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4634              {              {
4635              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4914  for (;;) Line 5018  for (;;)
5018              }              }
5019            /* Control never gets here */            /* Control never gets here */
5020    
5021              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5022              which means that Perl space and POSIX space are now identical. PCRE
5023              was changed at release 8.34. */
5024    
5025            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
           for (fi = min;; fi++)  
             {  
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);  
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
             if (fi >= max) RRETURN(MATCH_NOMATCH);  
             if (eptr >= md->end_subject)  
               {  
               SCHECK_PARTIAL();  
               RRETURN(MATCH_NOMATCH);  
               }  
             GETCHARINCTEST(c, eptr);  
             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
                  c == CHAR_FF || c == CHAR_CR)  
                    == prop_fail_result)  
               RRETURN(MATCH_NOMATCH);  
             }  
           /* Control never gets here */  
   
5026            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
5027            for (fi = min;; fi++)            for (fi = min;; fi++)
5028              {              {
# Line 4998  for (;;) Line 5088  for (;;)
5088              }              }
5089            /* Control never gets here */            /* Control never gets here */
5090    
5091              case PT_UCNC:
5092              for (fi = min;; fi++)
5093                {
5094                RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
5095                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5096                if (fi >= max) RRETURN(MATCH_NOMATCH);
5097                if (eptr >= md->end_subject)
5098                  {
5099                  SCHECK_PARTIAL();
5100                  RRETURN(MATCH_NOMATCH);
5101                  }
5102                GETCHARINCTEST(c, eptr);
5103                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5104                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5105                     c >= 0xe000) == prop_fail_result)
5106                  RRETURN(MATCH_NOMATCH);
5107                }
5108              /* Control never gets here */
5109    
5110            /* This should never occur */            /* This should never occur */
5111            default:            default:
5112            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
# Line 5413  for (;;) Line 5522  for (;;)
5522              }              }
5523            break;            break;
5524    
5525              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5526              which means that Perl space and POSIX space are now identical. PCRE
5527              was changed at release 8.34. */
5528    
5529            case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
           for (i = min; i < max; i++)  
             {  
             int len = 1;  
             if (eptr >= md->end_subject)  
               {  
               SCHECK_PARTIAL();  
               break;  
               }  
             GETCHARLENTEST(c, eptr, len);  
             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
                  c == CHAR_FF || c == CHAR_CR)  
                  == prop_fail_result)  
               break;  
             eptr+= len;  
             }  
           break;  
   
5530            case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
5531            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5532              {              {
# Line 5492  for (;;) Line 5588  for (;;)
5588            GOT_MAX:            GOT_MAX:
5589            break;            break;
5590    
5591              case PT_UCNC:
5592              for (i = min; i < max; i++)
5593                {
5594                int len = 1;
5595                if (eptr >= md->end_subject)
5596                  {
5597                  SCHECK_PARTIAL();
5598                  break;
5599                  }
5600                GETCHARLENTEST(c, eptr, len);
5601                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5602                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5603                     c >= 0xe000) == prop_fail_result)
5604                  break;
5605                eptr += len;
5606                }
5607              break;
5608    
5609            default:            default:
5610            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5611            }            }
5612    
5613          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
5614    
5615          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
5616          for(;;)          for(;;)
5617            {            {
5618              if (eptr == pp) goto TAIL_RECURSE;
5619            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5620            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5621            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5622            if (utf) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5623            }            }
5624          }          }
5625    
5626        /* Match extended Unicode sequences. We will get here only if the        /* Match extended Unicode grapheme clusters. We will get here only if the
5627        support is in the binary; otherwise a compile-time error occurs. */        support is in the binary; otherwise a compile-time error occurs. */
5628    
5629        else if (ctype == OP_EXTUNI)        else if (ctype == OP_EXTUNI)
# Line 5540  for (;;) Line 5655  for (;;)
5655    
5656          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
5657    
5658          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
5659    
5660          for(;;)          for(;;)
5661            {            {
5662              int lgb, rgb;
5663              PCRE_PUCHAR fptr;
5664    
5665              if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
5666            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5667            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5668            if (eptr-- == pp) break;        /* Stop if tried at original pos */  
5669            for (;;)                        /* Move back over one extended */            /* Backtracking over an extended grapheme cluster involves inspecting
5670              the previous two characters (if present) to see if a break is
5671              permitted between them. */
5672    
5673              eptr--;
5674              if (!utf) c = *eptr; else
5675              {              {
5676              if (!utf) c = *eptr; else              BACKCHAR(eptr);
5677                {              GETCHAR(c, eptr);
5678                BACKCHAR(eptr);              }
5679                GETCHAR(c, eptr);            rgb = UCD_GRAPHBREAK(c);
5680                }  
5681              if (UCD_CATEGORY(c) != ucp_M) break;            for (;;)
5682              eptr--;              {
5683                if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
5684                fptr = eptr - 1;
5685                if (!utf) c = *fptr; else
5686                  {
5687                  BACKCHAR(fptr);
5688                  GETCHAR(c, fptr);
5689                  }
5690                lgb = UCD_GRAPHBREAK(c);
5691                if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5692                eptr = fptr;
5693                rgb = lgb;
5694              }              }
5695            }            }
5696          }          }
# Line 5821  for (;;) Line 5956  for (;;)
5956            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5957            }            }
5958    
5959          /* eptr is now past the end of the maximum run. If possessive, we are          if (possessive) continue;    /* No backtracking */
         done (no backing up). Otherwise, match at this position; anything other  
         than no match is immediately returned. For nomatch, back up one  
         character, unless we are matching \R and the last thing matched was  
         \r\n, in which case, back up two bytes. */  
   
         if (possessive) continue;  
5960          for(;;)          for(;;)
5961            {            {
5962              if (eptr == pp) goto TAIL_RECURSE;
5963            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5964            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5965            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5966            BACKCHAR(eptr);            BACKCHAR(eptr);
5967            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
5968                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
# Line 6070  for (;;) Line 6200  for (;;)
6200            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
6201            }            }
6202    
6203          /* eptr is now past the end of the maximum run. If possessive, we are          if (possessive) continue;    /* No backtracking */
6204          done (no backing up). Otherwise, match at this position; anything other          for (;;)
         than no match is immediately returned. For nomatch, back up one  
         character (byte), unless we are matching \R and the last thing matched  
         was \r\n, in which case, back up two bytes. */  
   
         if (possessive) continue;  
         while (eptr >= pp)  
6205            {            {
6206              if (eptr == pp) goto TAIL_RECURSE;
6207            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6208            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6209            eptr--;            eptr--;
# Line 6086  for (;;) Line 6211  for (;;)
6211                eptr[-1] == CHAR_CR) eptr--;                eptr[-1] == CHAR_CR) eptr--;
6212            }            }
6213          }          }
6214    
6215        /* Get here if we can't make it match with any permitted repetitions */        /* Control never gets here */
   
       RRETURN(MATCH_NOMATCH);  
6216        }        }
     /* Control never gets here */  
6217    
6218      /* There's been some horrible disaster. Arrival here can only mean there is      /* There's been some horrible disaster. Arrival here can only mean there is
6219      something seriously wrong in the code above or the OP_xxx definitions. */      something seriously wrong in the code above or the OP_xxx definitions. */
# Line 6133  switch (frame->Xwhere) Line 6255  switch (frame->Xwhere)
6255    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6256  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6257    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6258    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
6259  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6260  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6261    default:    default:
# Line 6286  const pcre_uint8 *start_bits = NULL; Line 6408  const pcre_uint8 *start_bits = NULL;
6408  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6409  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6410  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6411    PCRE_PUCHAR match_partial = NULL;
6412  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6413    
6414  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6415  md->callout_data = NULL; Line 6538  md->callout_data = NULL;
6538    
6539  tables = re->tables;  tables = re->tables;
6540    
6541    /* The two limit values override the defaults, whatever their value. */
6542    
6543  if (extra_data != NULL)  if (extra_data != NULL)
6544    {    {
6545    register unsigned int flags = extra_data->flags;    register unsigned int flags = extra_data->flags;
# Line 6429  if (extra_data != NULL) Line 6554  if (extra_data != NULL)
6554    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6555    }    }
6556    
6557    /* Limits in the regex override only if they are smaller. */
6558    
6559    if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
6560      md->match_limit = re->limit_match;
6561    
6562    if ((re->flags & PCRE_RLSET) != 0 &&
6563        re->limit_recursion < md->match_limit_recursion)
6564      md->match_limit_recursion = re->limit_recursion;
6565    
6566  /* If the exec call supplied NULL for tables, use the inbuilt ones. This  /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6567  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6568  in other programs later. */  in other programs later. */
# Line 6454  end_subject = md->end_subject; Line 6588  end_subject = md->end_subject;
6588  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6589  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
6590  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6591  md->ignore_skip_arg = FALSE;  md->ignore_skip_arg = 0;
6592    
6593  /* Some options are unpacked into BOOL variables in the hope that testing  /* Some options are unpacked into BOOL variables in the hope that testing
6594  them will be faster than individual option bits. */  them will be faster than individual option bits. */
# Line 6836  for(;;) Line 6970  for(;;)
6970    md->match_call_count = 0;    md->match_call_count = 0;
6971    md->match_function_type = 0;    md->match_function_type = 0;
6972    md->end_offset_top = 0;    md->end_offset_top = 0;
6973      md->skip_arg_count = 0;
6974    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6975    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
6976        {
6977        start_partial = md->start_used_ptr;
6978        match_partial = start_match;
6979        }
6980    
6981    switch(rc)    switch(rc)
6982      {      {
# Line 6850  for(;;) Line 6989  for(;;)
6989    
6990      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6991      new_start_match = start_match;      new_start_match = start_match;
6992      md->ignore_skip_arg = TRUE;      md->ignore_skip_arg = md->skip_arg_count;
6993      break;      break;
6994    
6995      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is no
6996      same as the match we have just done, treat it as NOMATCH. */      greater than the match we have just done, treat it as NOMATCH. */
6997    
6998      case MATCH_SKIP:      case MATCH_SKIP:
6999      if (md->start_match_ptr != start_match)      if (md->start_match_ptr > start_match)
7000        {        {
7001        new_start_match = md->start_match_ptr;        new_start_match = md->start_match_ptr;
7002        break;        break;
# Line 6865  for(;;) Line 7004  for(;;)
7004      /* Fall through */      /* Fall through */
7005    
7006      /* NOMATCH and PRUNE advance by one character. THEN at this level acts      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7007      exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */      exactly like PRUNE. Unset ignore SKIP-with-argument. */
7008    
7009      case MATCH_NOMATCH:      case MATCH_NOMATCH:
7010      case MATCH_PRUNE:      case MATCH_PRUNE:
7011      case MATCH_THEN:      case MATCH_THEN:
7012      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = 0;
7013      new_start_match = start_match + 1;      new_start_match = start_match + 1;
7014  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
7015      if (utf)      if (utf)
# Line 6971  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7110  if (rc == MATCH_MATCH || rc == MATCH_ACC
7110    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
7111    too many to fit into the vector. */    too many to fit into the vector. */
7112    
7113    rc = ((md->capture_last & OVFLBIT) != 0 &&    rc = ((md->capture_last & OVFLBIT) != 0 &&
7114           md->end_offset_top >= arg_offset_max)?           md->end_offset_top >= arg_offset_max)?
7115      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
7116    
# Line 7037  if (rc != MATCH_NOMATCH && rc != PCRE_ER Line 7176  if (rc != MATCH_NOMATCH && rc != PCRE_ER
7176    
7177  /* Handle partial matches - disable any mark data */  /* Handle partial matches - disable any mark data */
7178    
7179  if (start_partial != NULL)  if (match_partial != NULL)
7180    {    {
7181    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7182    md->mark = NULL;    md->mark = NULL;
# Line 7045  if (start_partial != NULL) Line 7184  if (start_partial != NULL)
7184      {      {
7185      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7186      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7187        if (offsetcount > 2)
7188          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7189      }      }
7190    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7191    }    }

Legend:
Removed from v.1248  
changed lines
  Added in v.1364

  ViewVC Help
Powered by ViewVC 1.1.5