/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1248 by ph10, Wed Feb 13 17:36:38 2013 UTC revision 1361 by ph10, Fri Sep 6 17:47:32 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59  /* The md->capture_last field uses the lower 16 bits for the last captured  /* The md->capture_last field uses the lower 16 bits for the last captured
60  substring (which can never be greater than 65535) and a bit in the top half  substring (which can never be greater than 65535) and a bit in the top half
61  to mean "capture vector overflowed". This odd way of doing things was  to mean "capture vector overflowed". This odd way of doing things was
62  implemented when it was realized that preserving and restoring the overflow bit  implemented when it was realized that preserving and restoring the overflow bit
63  whenever the last capture number was saved/restored made for a neater  whenever the last capture number was saved/restored made for a neater
64  interface, and doing it this way saved on (a) another variable, which would  interface, and doing it this way saved on (a) another variable, which would
65  have increased the stack frame size (a big NO-NO in PCRE) and (b) another  have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66  separate set of save/restore instructions. The following defines are used in  separate set of save/restore instructions. The following defines are used in
67  implementing this. */  implementing this. */
68    
69  #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */  #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
# Line 87  defined PCRE_ERROR_xxx codes, which are Line 87  defined PCRE_ERROR_xxx codes, which are
87  negative to avoid the external error codes. */  negative to avoid the external error codes. */
88    
89  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
90  #define MATCH_COMMIT       (-998)  #define MATCH_KETRPOS      (-998)
91  #define MATCH_KETRPOS      (-997)  #define MATCH_ONCE         (-997)
92  #define MATCH_ONCE         (-996)  /* The next 5 must be kept together and in sequence so that a test that checks
93    for any one of them can use a range. */
94    #define MATCH_COMMIT       (-996)
95  #define MATCH_PRUNE        (-995)  #define MATCH_PRUNE        (-995)
96  #define MATCH_SKIP         (-994)  #define MATCH_SKIP         (-994)
97  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
98  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
99    #define MATCH_BACKTRACK_MAX MATCH_THEN
100    #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101    
102  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
103  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 233  if (caseless) Line 237  if (caseless)
237      {      {
238      while (length-- > 0)      while (length-- > 0)
239        {        {
240        pcre_uchar cc, cp;        pcre_uint32 cc, cp;
241        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
242        cc = RAWUCHARTEST(eptr);        cc = RAWUCHARTEST(eptr);
243        cp = RAWUCHARTEST(p);        cp = RAWUCHARTEST(p);
# Line 308  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 312  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
312         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
313         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
314         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
315         RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
316    
317  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
318  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 777  for (;;) Line 781  for (;;)
781      case OP_FAIL:      case OP_FAIL:
782      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
783    
     /* COMMIT overrides PRUNE, SKIP, and THEN */  
   
784      case OP_COMMIT:      case OP_COMMIT:
785      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
786        eptrb, RM52);        eptrb, RM52);
787      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&  
         rrc != MATCH_THEN)  
       RRETURN(rrc);  
788      RRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
789    
     /* PRUNE overrides THEN */  
   
790      case OP_PRUNE:      case OP_PRUNE:
791      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
792        eptrb, RM51);        eptrb, RM51);
793      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
794      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
795    
796      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
# Line 803  for (;;) Line 800  for (;;)
800        eptrb, RM56);        eptrb, RM56);
801      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
802           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
803      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
804      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
805    
     /* SKIP overrides PRUNE and THEN */  
   
806      case OP_SKIP:      case OP_SKIP:
807      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
808        eptrb, RM53);        eptrb, RM53);
809      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       RRETURN(rrc);  
810      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
811      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
812    
813      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
814      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
815      pattern that ended with a SKIP for which there was not a matching MARK. */      not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
816        that failed and any that precede it (either they also failed, or were not
817        triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
818        SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
819        set to the count of the one that failed. */
820    
821      case OP_SKIP_ARG:      case OP_SKIP_ARG:
822      if (md->ignore_skip_arg)      md->skip_arg_count++;
823        if (md->skip_arg_count <= md->ignore_skip_arg)
824        {        {
825        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
826        break;        break;
827        }        }
828      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
829        eptrb, RM57);        eptrb, RM57);
830      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       RRETURN(rrc);  
831    
832      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
833      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
834      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
835      with the md->ignore_skip_arg flag set. */      with md->ignore_skip_arg set to the value of md->skip_arg_count. */
836    
837      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
838      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
# Line 1310  for (;;) Line 1308  for (;;)
1308          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1309          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1310          cb.capture_last     = md->capture_last & CAPLMASK;          cb.capture_last     = md->capture_last & CAPLMASK;
1311          /* Internal change requires this for API compatibility. */          /* Internal change requires this for API compatibility. */
1312          if (cb.capture_last == 0) cb.capture_last = -1;          if (cb.capture_last == 0) cb.capture_last = -1;
1313          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1314          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1315          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1316          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1317          }          }
1318        ecode += PRIV(OP_lengths)[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1319          codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1320        }        }
1321    
1322      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1604  for (;;) Line 1603  for (;;)
1603        }        }
1604      else condassert = FALSE;      else condassert = FALSE;
1605    
1606        /* Loop for each branch */
1607    
1608      do      do
1609        {        {
1610        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1611    
1612          /* A match means that the assertion is true; break out of the loop
1613          that matches its alternatives. */
1614    
1615        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1616          {          {
1617          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1618          break;          break;
1619          }          }
1620    
1621          /* If not matched, restore the previous mark setting. */
1622    
1623        md->mark = save_mark;        md->mark = save_mark;
1624    
1625        /* A COMMIT failure must fail the entire assertion, without trying any        /* See comment in the code for capturing groups above about handling
1626        subsequent branches. */        THEN. */
1627    
1628        if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_THEN)
1629            {
1630            next = ecode + GET(ecode,1);
1631            if (md->start_match_ptr < next &&
1632                (*ecode == OP_ALT || *next == OP_ALT))
1633              rrc = MATCH_NOMATCH;
1634            }
1635    
1636        /* PCRE does not allow THEN to escape beyond an assertion; it        /* Anything other than NOMATCH causes the entire assertion to fail,
1637        is treated as NOMATCH. */        passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1638          uncaptured THEN, which means they take their normal effect. This
1639          consistent approach does not always have exactly the same effect as in
1640          Perl. */
1641    
1642        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1643        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1644        }        }
1645      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);   /* Continue for next alternative */
1646    
1647        /* If we have tried all the alternative branches, the assertion has
1648        failed. If not, we broke out after a match. */
1649    
1650      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1651    
# Line 1633  for (;;) Line 1653  for (;;)
1653    
1654      if (condassert) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);
1655    
1656      /* Continue from after the assertion, updating the offsets high water      /* Continue from after a successful assertion, updating the offsets high
1657      mark, since extracts may have been taken during the assertion. */      water mark, since extracts may have been taken during the assertion. */
1658    
1659      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1660      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1661      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1662      continue;      continue;
1663    
1664      /* Negative assertion: all branches must fail to match. Encountering SKIP,      /* Negative assertion: all branches must fail to match for the assertion to
1665      PRUNE, or COMMIT means we must assume failure without checking subsequent      succeed. */
     branches. */  
1666    
1667      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1668      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1655  for (;;) Line 1674  for (;;)
1674        }        }
1675      else condassert = FALSE;      else condassert = FALSE;
1676    
1677        /* Loop for each alternative branch. */
1678    
1679      do      do
1680        {        {
1681        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1682        md->mark = save_mark;        md->mark = save_mark;   /* Always restore the mark setting */
1683        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);  
1684        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        switch(rrc)
1685          {          {
1686          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          case MATCH_MATCH:            /* A successful match means */
1687            case MATCH_ACCEPT:           /* the assertion has failed. */
1688            RRETURN(MATCH_NOMATCH);
1689    
1690            case MATCH_NOMATCH:          /* Carry on with next branch */
1691          break;          break;
1692    
1693            /* See comment in the code for capturing groups above about handling
1694            THEN. */
1695    
1696            case MATCH_THEN:
1697            next = ecode + GET(ecode,1);
1698            if (md->start_match_ptr < next &&
1699                (*ecode == OP_ALT || *next == OP_ALT))
1700              {
1701              rrc = MATCH_NOMATCH;
1702              break;
1703              }
1704            /* Otherwise fall through. */
1705    
1706            /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1707            assertion to fail to match, without considering any more alternatives.
1708            Failing to match means the assertion is true. This is a consistent
1709            approach, but does not always have the same effect as in Perl. */
1710    
1711            case MATCH_COMMIT:
1712            case MATCH_SKIP:
1713            case MATCH_SKIP_ARG:
1714            case MATCH_PRUNE:
1715            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1716            goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1717    
1718            /* Anything else is an error */
1719    
1720            default:
1721            RRETURN(rrc);
1722          }          }
1723    
1724        /* PCRE does not allow THEN to escape beyond an assertion; it is treated        /* Continue with next branch */
       as NOMATCH. */  
1725    
       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);  
1726        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1727        }        }
1728      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1729    
1730      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      /* All branches in the assertion failed to match. */
1731    
1732      ecode += 1 + LINK_SIZE;      NEG_ASSERT_TRUE:
1733        if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1734        ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1735      continue;      continue;
1736    
1737      /* Move the subject pointer back. This occurs only at the start of      /* Move the subject pointer back. This occurs only at the start of
# Line 1737  for (;;) Line 1792  for (;;)
1792        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1793        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1794        cb.capture_last     = md->capture_last & CAPLMASK;        cb.capture_last     = md->capture_last & CAPLMASK;
1795        /* Internal change requires this for API compatibility. */        /* Internal change requires this for API compatibility. */
1796        if (cb.capture_last == 0) cb.capture_last = -1;        if (cb.capture_last == 0) cb.capture_last = -1;
1797        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1798        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1799        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1784  for (;;) Line 1839  for (;;)
1839        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1840    
1841        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1842        new_recursive.saved_capture_last = md->capture_last;        new_recursive.saved_capture_last = md->capture_last;
1843        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1844        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1845        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1821  for (;;) Line 1876  for (;;)
1876            md, eptrb, RM6);            md, eptrb, RM6);
1877          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1878              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1879          md->capture_last = new_recursive.saved_capture_last;          md->capture_last = new_recursive.saved_capture_last;
1880          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1881          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1882            {            {
# Line 1838  for (;;) Line 1893  for (;;)
1893            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1894            }            }
1895    
1896          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it          /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1897          is treated as NOMATCH. */          recursion; they cause a NOMATCH for the entire recursion. These codes
1898            are defined in a range that can be tested for. */
1899    
1900            if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1901              RRETURN(MATCH_NOMATCH);
1902    
1903            /* Any return code other than NOMATCH is an error. */
1904    
1905          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&          if (rrc != MATCH_NOMATCH)
                  rrc != MATCH_COMMIT)  
1906            {            {
1907            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1908            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 2629  for (;;) Line 2689  for (;;)
2689            }            }
2690          break;          break;
2691    
2692            case PT_UCNC:
2693            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2694                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2695                 c >= 0xe000) == (op == OP_NOTPROP))
2696              RRETURN(MATCH_NOMATCH);
2697            break;
2698    
2699          /* This should never occur */          /* This should never occur */
2700    
2701          default:          default:
# Line 2675  for (;;) Line 2742  for (;;)
2742      similar code to character type repeats - written out again for speed.      similar code to character type repeats - written out again for speed.
2743      However, if the referenced string is the empty string, always treat      However, if the referenced string is the empty string, always treat
2744      it as matched, any number of times (otherwise there could be infinite      it as matched, any number of times (otherwise there could be infinite
2745      loops). */      loops). If the reference is unset, there are two possibilities:
   
     case OP_REF:  
     case OP_REFI:  
     caseless = op == OP_REFI;  
     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */  
     ecode += 1 + IMM2_SIZE;  
   
     /* If the reference is unset, there are two possibilities:  
2746    
2747      (a) In the default, Perl-compatible state, set the length negative;      (a) In the default, Perl-compatible state, set the length negative;
2748      this ensures that every attempt at a match fails. We can't just fail      this ensures that every attempt at a match fails. We can't just fail
# Line 2693  for (;;) Line 2752  for (;;)
2752      so that the back reference matches an empty string.      so that the back reference matches an empty string.
2753    
2754      Otherwise, set the length to the length of what was matched by the      Otherwise, set the length to the length of what was matched by the
2755      referenced subpattern. */      referenced subpattern.
2756    
2757        The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2758        or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2759        and OP_DNREFI are used. In this case we must scan the list of groups to
2760        which the name refers, and use the first one that is set. */
2761    
2762        case OP_DNREF:
2763        case OP_DNREFI:
2764        caseless = op == OP_DNREFI;
2765          {
2766          int count = GET2(ecode, 1+IMM2_SIZE);
2767          pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2768          ecode += 1 + 2*IMM2_SIZE;
2769    
2770          while (count-- > 0)
2771            {
2772            offset = GET2(slot, 0) << 1;
2773            if (offset < offset_top && md->offset_vector[offset] >= 0) break;
2774            slot += md->name_entry_size;
2775            }
2776          if (count < 0)
2777            length = (md->jscript_compat)? 0 : -1;
2778          else
2779            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2780          }
2781        goto REF_REPEAT;
2782    
2783        case OP_REF:
2784        case OP_REFI:
2785        caseless = op == OP_REFI;
2786        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2787        ecode += 1 + IMM2_SIZE;
2788    
2789    
2790      if (offset >= offset_top || md->offset_vector[offset] < 0)      if (offset >= offset_top || md->offset_vector[offset] < 0)
2791        length = (md->jscript_compat)? 0 : -1;        length = (md->jscript_compat)? 0 : -1;
# Line 2702  for (;;) Line 2794  for (;;)
2794    
2795      /* Set up for repetition, or handle the non-repeated case */      /* Set up for repetition, or handle the non-repeated case */
2796    
2797        REF_REPEAT:
2798      switch (*ecode)      switch (*ecode)
2799        {        {
2800        case OP_CRSTAR:        case OP_CRSTAR:
# Line 3215  for (;;) Line 3308  for (;;)
3308    
3309        if (fc < 128)        if (fc < 128)
3310          {          {
3311          pcre_uchar cc = RAWUCHAR(eptr);          pcre_uint32 cc = RAWUCHAR(eptr);
3312          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3313          ecode++;          ecode++;
3314          eptr++;          eptr++;
# Line 3320  for (;;) Line 3413  for (;;)
3413      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3414      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3415    
3416      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. We first check
3417        for the minimum number of characters. If the minimum equals the maximum, we
3418        are done. Otherwise, if minimizing, check the rest of the pattern for a
3419        match; if there isn't one, advance up to the maximum, one character at a
3420        time.
3421    
3422        If maximizing, advance up to the maximum number of matching characters,
3423        until eptr is past the end of the maximum run. If possessive, we are
3424        then done (no backing up). Otherwise, match at this position; anything
3425        other than no match is immediately returned. For nomatch, back up one
3426        character, unless we are matching \R and the last thing matched was
3427        \r\n, in which case, back up two bytes. When we reach the first optional
3428        character position, we can save stack by doing a tail recurse.
3429    
3430        The various UTF/non-UTF and caseful/caseless cases are handled separately,
3431        for speed. */
3432    
3433      REPEATCHAR:      REPEATCHAR:
3434  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 3404  for (;;) Line 3512  for (;;)
3512                }                }
3513              }              }
3514    
3515            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
   
3516            for(;;)            for(;;)
3517              {              {
3518                if (eptr == pp) goto TAIL_RECURSE;
3519              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3520              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }  
3521  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3522              eptr--;              eptr--;
3523              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3509  for (;;) Line 3616  for (;;)
3616            if (fc != cc && foc != cc) break;            if (fc != cc && foc != cc) break;
3617            eptr++;            eptr++;
3618            }            }
3619            if (possessive) continue;       /* No backtracking */
3620          if (possessive) continue;          for (;;)
   
         while (eptr >= pp)  
3621            {            {
3622              if (eptr == pp) goto TAIL_RECURSE;
3623            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3624            eptr--;            eptr--;
3625            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3626            }            }
3627          RRETURN(MATCH_NOMATCH);          /* Control never gets here */
3628          }          }
       /* Control never gets here */  
3629        }        }
3630    
3631      /* Caseful comparisons (includes all multi-byte characters) */      /* Caseful comparisons (includes all multi-byte characters) */
# Line 3568  for (;;) Line 3673  for (;;)
3673            if (fc != RAWUCHARTEST(eptr)) break;            if (fc != RAWUCHARTEST(eptr)) break;
3674            eptr++;            eptr++;
3675            }            }
3676          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
3677            for (;;)
         while (eptr >= pp)  
3678            {            {
3679              if (eptr == pp) goto TAIL_RECURSE;
3680            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3681            eptr--;            eptr--;
3682            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3683            }            }
3684          RRETURN(MATCH_NOMATCH);          /* Control never gets here */
3685          }          }
3686        }        }
3687      /* Control never gets here */      /* Control never gets here */
# Line 3748  for (;;) Line 3853  for (;;)
3853            }            }
3854          }          }
3855        else        else
3856  #endif  #endif  /* SUPPORT_UTF */
3857        /* Not UTF mode */        /* Not UTF mode */
3858          {          {
3859          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3786  for (;;) Line 3891  for (;;)
3891              }              }
3892            }            }
3893          else          else
3894  #endif  #endif  /*SUPPORT_UTF */
3895          /* Not UTF mode */          /* Not UTF mode */
3896            {            {
3897            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3828  for (;;) Line 3933  for (;;)
3933              if (fc == d || (unsigned int)foc == d) break;              if (fc == d || (unsigned int)foc == d) break;
3934              eptr += len;              eptr += len;
3935              }              }
3936            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
3937            for(;;)            for(;;)
3938              {              {
3939                if (eptr == pp) goto TAIL_RECURSE;
3940              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3941              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3942              if (eptr-- == pp) break;        /* Stop if tried at original pos */              eptr--;
3943              BACKCHAR(eptr);              BACKCHAR(eptr);
3944              }              }
3945            }            }
3946          else          else
3947  #endif  #endif  /* SUPPORT_UTF */
3948          /* Not UTF mode */          /* Not UTF mode */
3949            {            {
3950            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3851  for (;;) Line 3957  for (;;)
3957              if (fc == *eptr || foc == *eptr) break;              if (fc == *eptr || foc == *eptr) break;
3958              eptr++;              eptr++;
3959              }              }
3960            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
3961            while (eptr >= pp)            for (;;)
3962              {              {
3963                if (eptr == pp) goto TAIL_RECURSE;
3964              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3965              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3966              eptr--;              eptr--;
3967              }              }
3968            }            }
3969            /* Control never gets here */
         RRETURN(MATCH_NOMATCH);  
3970          }          }
       /* Control never gets here */  
3971        }        }
3972    
3973      /* Caseful comparisons */      /* Caseful comparisons */
# Line 3963  for (;;) Line 4068  for (;;)
4068              if (fc == d) break;              if (fc == d) break;
4069              eptr += len;              eptr += len;
4070              }              }
4071            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
4072            for(;;)            for(;;)
4073              {              {
4074                if (eptr == pp) goto TAIL_RECURSE;
4075              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4076              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4077              if (eptr-- == pp) break;        /* Stop if tried at original pos */              eptr--;
4078              BACKCHAR(eptr);              BACKCHAR(eptr);
4079              }              }
4080            }            }
# Line 3986  for (;;) Line 4092  for (;;)
4092              if (fc == *eptr) break;              if (fc == *eptr) break;
4093              eptr++;              eptr++;
4094              }              }
4095            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
4096            while (eptr >= pp)            for (;;)
4097              {              {
4098                if (eptr == pp) goto TAIL_RECURSE;
4099              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4100              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4101              eptr--;              eptr--;
4102              }              }
4103            }            }
4104            /* Control never gets here */
         RRETURN(MATCH_NOMATCH);  
4105          }          }
4106        }        }
4107      /* Control never gets here */      /* Control never gets here */
# Line 4247  for (;;) Line 4353  for (;;)
4353              }              }
4354            break;            break;
4355    
4356              case PT_UCNC:
4357              for (i = 1; i <= min; i++)
4358                {
4359                if (eptr >= md->end_subject)
4360                  {
4361                  SCHECK_PARTIAL();
4362                  RRETURN(MATCH_NOMATCH);
4363                  }
4364                GETCHARINCTEST(c, eptr);
4365                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4366                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4367                     c >= 0xe000) == prop_fail_result)
4368                  RRETURN(MATCH_NOMATCH);
4369                }
4370              break;
4371    
4372            /* This should not occur */            /* This should not occur */
4373    
4374            default:            default:
# Line 4452  for (;;) Line 4574  for (;;)
4574          case OP_DIGIT:          case OP_DIGIT:
4575          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4576            {            {
4577            pcre_uchar cc;            pcre_uint32 cc;
   
4578            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4579              {              {
4580              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4470  for (;;) Line 4591  for (;;)
4591          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4592          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4593            {            {
4594            pcre_uchar cc;            pcre_uint32 cc;
   
4595            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4596              {              {
4597              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4488  for (;;) Line 4608  for (;;)
4608          case OP_WHITESPACE:          case OP_WHITESPACE:
4609          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4610            {            {
4611            pcre_uchar cc;            pcre_uint32 cc;
   
4612            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4613              {              {
4614              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4506  for (;;) Line 4625  for (;;)
4625          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4626          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4627            {            {
4628            pcre_uchar cc;            pcre_uint32 cc;
   
4629            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4630              {              {
4631              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4524  for (;;) Line 4642  for (;;)
4642          case OP_WORDCHAR:          case OP_WORDCHAR:
4643          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4644            {            {
4645            pcre_uchar cc;            pcre_uint32 cc;
   
4646            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4647              {              {
4648              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4998  for (;;) Line 5115  for (;;)
5115              }              }
5116            /* Control never gets here */            /* Control never gets here */
5117    
5118              case PT_UCNC:
5119              for (fi = min;; fi++)
5120                {
5121                RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
5122                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5123                if (fi >= max) RRETURN(MATCH_NOMATCH);
5124                if (eptr >= md->end_subject)
5125                  {
5126                  SCHECK_PARTIAL();
5127                  RRETURN(MATCH_NOMATCH);
5128                  }
5129                GETCHARINCTEST(c, eptr);
5130                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5131                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5132                     c >= 0xe000) == prop_fail_result)
5133                  RRETURN(MATCH_NOMATCH);
5134                }
5135              /* Control never gets here */
5136    
5137            /* This should never occur */            /* This should never occur */
5138            default:            default:
5139            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
# Line 5492  for (;;) Line 5628  for (;;)
5628            GOT_MAX:            GOT_MAX:
5629            break;            break;
5630    
5631              case PT_UCNC:
5632              for (i = min; i < max; i++)
5633                {
5634                int len = 1;
5635                if (eptr >= md->end_subject)
5636                  {
5637                  SCHECK_PARTIAL();
5638                  break;
5639                  }
5640                GETCHARLENTEST(c, eptr, len);
5641                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5642                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5643                     c >= 0xe000) == prop_fail_result)
5644                  break;
5645                eptr += len;
5646                }
5647              break;
5648    
5649            default:            default:
5650            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5651            }            }
5652    
5653          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
5654    
5655          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
5656          for(;;)          for(;;)
5657            {            {
5658              if (eptr == pp) goto TAIL_RECURSE;
5659            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5660            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5661            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5662            if (utf) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5663            }            }
5664          }          }
5665    
5666        /* Match extended Unicode sequences. We will get here only if the        /* Match extended Unicode grapheme clusters. We will get here only if the
5667        support is in the binary; otherwise a compile-time error occurs. */        support is in the binary; otherwise a compile-time error occurs. */
5668    
5669        else if (ctype == OP_EXTUNI)        else if (ctype == OP_EXTUNI)
# Line 5540  for (;;) Line 5695  for (;;)
5695    
5696          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
5697    
5698          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
5699    
5700          for(;;)          for(;;)
5701            {            {
5702              int lgb, rgb;
5703              PCRE_PUCHAR fptr;
5704    
5705              if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
5706            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5707            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5708            if (eptr-- == pp) break;        /* Stop if tried at original pos */  
5709            for (;;)                        /* Move back over one extended */            /* Backtracking over an extended grapheme cluster involves inspecting
5710              the previous two characters (if present) to see if a break is
5711              permitted between them. */
5712    
5713              eptr--;
5714              if (!utf) c = *eptr; else
5715              {              {
5716              if (!utf) c = *eptr; else              BACKCHAR(eptr);
5717                {              GETCHAR(c, eptr);
5718                BACKCHAR(eptr);              }
5719                GETCHAR(c, eptr);            rgb = UCD_GRAPHBREAK(c);
5720                }  
5721              if (UCD_CATEGORY(c) != ucp_M) break;            for (;;)
5722              eptr--;              {
5723                if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
5724                fptr = eptr - 1;
5725                if (!utf) c = *fptr; else
5726                  {
5727                  BACKCHAR(fptr);
5728                  GETCHAR(c, fptr);
5729                  }
5730                lgb = UCD_GRAPHBREAK(c);
5731                if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5732                eptr = fptr;
5733                rgb = lgb;
5734              }              }
5735            }            }
5736          }          }
# Line 5821  for (;;) Line 5996  for (;;)
5996            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5997            }            }
5998    
5999          /* eptr is now past the end of the maximum run. If possessive, we are          if (possessive) continue;    /* No backtracking */
         done (no backing up). Otherwise, match at this position; anything other  
         than no match is immediately returned. For nomatch, back up one  
         character, unless we are matching \R and the last thing matched was  
         \r\n, in which case, back up two bytes. */  
   
         if (possessive) continue;  
6000          for(;;)          for(;;)
6001            {            {
6002              if (eptr == pp) goto TAIL_RECURSE;
6003            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
6004            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6005            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
6006            BACKCHAR(eptr);            BACKCHAR(eptr);
6007            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
6008                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
# Line 6070  for (;;) Line 6240  for (;;)
6240            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
6241            }            }
6242    
6243          /* eptr is now past the end of the maximum run. If possessive, we are          if (possessive) continue;    /* No backtracking */
6244          done (no backing up). Otherwise, match at this position; anything other          for (;;)
         than no match is immediately returned. For nomatch, back up one  
         character (byte), unless we are matching \R and the last thing matched  
         was \r\n, in which case, back up two bytes. */  
   
         if (possessive) continue;  
         while (eptr >= pp)  
6245            {            {
6246              if (eptr == pp) goto TAIL_RECURSE;
6247            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6248            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6249            eptr--;            eptr--;
# Line 6086  for (;;) Line 6251  for (;;)
6251                eptr[-1] == CHAR_CR) eptr--;                eptr[-1] == CHAR_CR) eptr--;
6252            }            }
6253          }          }
6254    
6255        /* Get here if we can't make it match with any permitted repetitions */        /* Control never gets here */
   
       RRETURN(MATCH_NOMATCH);  
6256        }        }
     /* Control never gets here */  
6257    
6258      /* There's been some horrible disaster. Arrival here can only mean there is      /* There's been some horrible disaster. Arrival here can only mean there is
6259      something seriously wrong in the code above or the OP_xxx definitions. */      something seriously wrong in the code above or the OP_xxx definitions. */
# Line 6133  switch (frame->Xwhere) Line 6295  switch (frame->Xwhere)
6295    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6296  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6297    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6298    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
6299  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6300  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6301    default:    default:
# Line 6286  const pcre_uint8 *start_bits = NULL; Line 6448  const pcre_uint8 *start_bits = NULL;
6448  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6449  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6450  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6451    PCRE_PUCHAR match_partial = NULL;
6452  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6453    
6454  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6415  md->callout_data = NULL; Line 6578  md->callout_data = NULL;
6578    
6579  tables = re->tables;  tables = re->tables;
6580    
6581    /* The two limit values override the defaults, whatever their value. */
6582    
6583  if (extra_data != NULL)  if (extra_data != NULL)
6584    {    {
6585    register unsigned int flags = extra_data->flags;    register unsigned int flags = extra_data->flags;
# Line 6429  if (extra_data != NULL) Line 6594  if (extra_data != NULL)
6594    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6595    }    }
6596    
6597    /* Limits in the regex override only if they are smaller. */
6598    
6599    if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
6600      md->match_limit = re->limit_match;
6601    
6602    if ((re->flags & PCRE_RLSET) != 0 &&
6603        re->limit_recursion < md->match_limit_recursion)
6604      md->match_limit_recursion = re->limit_recursion;
6605    
6606  /* If the exec call supplied NULL for tables, use the inbuilt ones. This  /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6607  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6608  in other programs later. */  in other programs later. */
# Line 6454  end_subject = md->end_subject; Line 6628  end_subject = md->end_subject;
6628  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6629  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
6630  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6631  md->ignore_skip_arg = FALSE;  md->ignore_skip_arg = 0;
6632    
6633  /* Some options are unpacked into BOOL variables in the hope that testing  /* Some options are unpacked into BOOL variables in the hope that testing
6634  them will be faster than individual option bits. */  them will be faster than individual option bits. */
# Line 6836  for(;;) Line 7010  for(;;)
7010    md->match_call_count = 0;    md->match_call_count = 0;
7011    md->match_function_type = 0;    md->match_function_type = 0;
7012    md->end_offset_top = 0;    md->end_offset_top = 0;
7013      md->skip_arg_count = 0;
7014    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
7015    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
7016        {
7017        start_partial = md->start_used_ptr;
7018        match_partial = start_match;
7019        }
7020    
7021    switch(rc)    switch(rc)
7022      {      {
# Line 6850  for(;;) Line 7029  for(;;)
7029    
7030      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
7031      new_start_match = start_match;      new_start_match = start_match;
7032      md->ignore_skip_arg = TRUE;      md->ignore_skip_arg = md->skip_arg_count;
7033      break;      break;
7034    
7035      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is no
7036      same as the match we have just done, treat it as NOMATCH. */      greater than the match we have just done, treat it as NOMATCH. */
7037    
7038      case MATCH_SKIP:      case MATCH_SKIP:
7039      if (md->start_match_ptr != start_match)      if (md->start_match_ptr > start_match)
7040        {        {
7041        new_start_match = md->start_match_ptr;        new_start_match = md->start_match_ptr;
7042        break;        break;
# Line 6865  for(;;) Line 7044  for(;;)
7044      /* Fall through */      /* Fall through */
7045    
7046      /* NOMATCH and PRUNE advance by one character. THEN at this level acts      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7047      exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */      exactly like PRUNE. Unset ignore SKIP-with-argument. */
7048    
7049      case MATCH_NOMATCH:      case MATCH_NOMATCH:
7050      case MATCH_PRUNE:      case MATCH_PRUNE:
7051      case MATCH_THEN:      case MATCH_THEN:
7052      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = 0;
7053      new_start_match = start_match + 1;      new_start_match = start_match + 1;
7054  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
7055      if (utf)      if (utf)
# Line 6971  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7150  if (rc == MATCH_MATCH || rc == MATCH_ACC
7150    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
7151    too many to fit into the vector. */    too many to fit into the vector. */
7152    
7153    rc = ((md->capture_last & OVFLBIT) != 0 &&    rc = ((md->capture_last & OVFLBIT) != 0 &&
7154           md->end_offset_top >= arg_offset_max)?           md->end_offset_top >= arg_offset_max)?
7155      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
7156    
# Line 7037  if (rc != MATCH_NOMATCH && rc != PCRE_ER Line 7216  if (rc != MATCH_NOMATCH && rc != PCRE_ER
7216    
7217  /* Handle partial matches - disable any mark data */  /* Handle partial matches - disable any mark data */
7218    
7219  if (start_partial != NULL)  if (match_partial != NULL)
7220    {    {
7221    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7222    md->mark = NULL;    md->mark = NULL;
# Line 7045  if (start_partial != NULL) Line 7224  if (start_partial != NULL)
7224      {      {
7225      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7226      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7227        if (offsetcount > 2)
7228          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7229      }      }
7230    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7231    }    }

Legend:
Removed from v.1248  
changed lines
  Added in v.1361

  ViewVC Help
Powered by ViewVC 1.1.5