/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1155 by ph10, Sun Oct 21 17:13:28 2012 UTC revision 1311 by ph10, Mon Apr 22 17:35:23 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59    /* The md->capture_last field uses the lower 16 bits for the last captured
60    substring (which can never be greater than 65535) and a bit in the top half
61    to mean "capture vector overflowed". This odd way of doing things was
62    implemented when it was realized that preserving and restoring the overflow bit
63    whenever the last capture number was saved/restored made for a neater
64    interface, and doing it this way saved on (a) another variable, which would
65    have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66    separate set of save/restore instructions. The following defines are used in
67    implementing this. */
68    
69    #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70    #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71    #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72    
73  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
74  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
75  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
# Line 73  defined PCRE_ERROR_xxx codes, which are Line 87  defined PCRE_ERROR_xxx codes, which are
87  negative to avoid the external error codes. */  negative to avoid the external error codes. */
88    
89  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
90  #define MATCH_COMMIT       (-998)  #define MATCH_KETRPOS      (-998)
91  #define MATCH_KETRPOS      (-997)  #define MATCH_ONCE         (-997)
92  #define MATCH_ONCE         (-996)  /* The next 5 must be kept together and in sequence so that a test that checks
93    for any one of them can use a range. */
94    #define MATCH_COMMIT       (-996)
95  #define MATCH_PRUNE        (-995)  #define MATCH_PRUNE        (-995)
96  #define MATCH_SKIP         (-994)  #define MATCH_SKIP         (-994)
97  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
98  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
99    #define MATCH_BACKTRACK_MAX MATCH_THEN
100    #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101    
102  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
103  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 199  if (caseless) Line 217  if (caseless)
217        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
218        GETCHARINC(d, p);        GETCHARINC(d, p);
219        ur = GET_UCD(d);        ur = GET_UCD(d);
220        if (c != d && c != d + ur->other_case)        if (c != d && c != d + ur->other_case)
221          {          {
222          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
223          for (;;)          for (;;)
224            {            {
225            if (c < *pp) return -1;            if (c < *pp) return -1;
# Line 219  if (caseless) Line 237  if (caseless)
237      {      {
238      while (length-- > 0)      while (length-- > 0)
239        {        {
240        pcre_uchar cc, cp;        pcre_uint32 cc, cp;
241        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
242        cc = RAWUCHARTEST(eptr);        cc = RAWUCHARTEST(eptr);
243        cp = RAWUCHARTEST(p);        cp = RAWUCHARTEST(p);
# Line 294  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 312  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
312         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
313         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
314         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
315         RM61,  RM62, RM63, RM64, RM65, RM66 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
316    
317  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
318  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 416  typedef struct heapframe { Line 434  typedef struct heapframe {
434    int Xlength;    int Xlength;
435    int Xmax;    int Xmax;
436    int Xmin;    int Xmin;
437    int Xnumber;    unsigned int Xnumber;
438    int Xoffset;    int Xoffset;
439    int Xop;    unsigned int Xop;
440    int Xsave_capture_last;    pcre_int32 Xsave_capture_last;
441    int Xsave_offset1, Xsave_offset2, Xsave_offset3;    int Xsave_offset1, Xsave_offset2, Xsave_offset3;
442    int Xstacksave[REC_STACK_SAVE_MAX];    int Xstacksave[REC_STACK_SAVE_MAX];
443    
# Line 634  int max; Line 652  int max;
652  int min;  int min;
653  unsigned int number;  unsigned int number;
654  int offset;  int offset;
655  pcre_uchar op;  unsigned int op;
656  int save_capture_last;  pcre_int32 save_capture_last;
657  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
658  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
659    
# Line 763  for (;;) Line 781  for (;;)
781      case OP_FAIL:      case OP_FAIL:
782      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
783    
     /* COMMIT overrides PRUNE, SKIP, and THEN */  
   
784      case OP_COMMIT:      case OP_COMMIT:
785      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
786        eptrb, RM52);        eptrb, RM52);
787      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&  
         rrc != MATCH_THEN)  
       RRETURN(rrc);  
788      RRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
789    
     /* PRUNE overrides THEN */  
   
790      case OP_PRUNE:      case OP_PRUNE:
791      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
792        eptrb, RM51);        eptrb, RM51);
793      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
794      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
795    
796      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
# Line 789  for (;;) Line 800  for (;;)
800        eptrb, RM56);        eptrb, RM56);
801      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
802           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
803      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
804      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
805    
     /* SKIP overrides PRUNE and THEN */  
   
806      case OP_SKIP:      case OP_SKIP:
807      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
808        eptrb, RM53);        eptrb, RM53);
809      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       RRETURN(rrc);  
810      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
811      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
812    
813      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
814      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
815      pattern that ended with a SKIP for which there was not a matching MARK. */      not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
816        that failed and any that preceed it (either they also failed, or were not
817        triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
818        SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
819        set to the count of the one that failed. */
820    
821      case OP_SKIP_ARG:      case OP_SKIP_ARG:
822      if (md->ignore_skip_arg)      md->skip_arg_count++;
823        if (md->skip_arg_count <= md->ignore_skip_arg)
824        {        {
825        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
826        break;        break;
827        }        }
828      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
829        eptrb, RM57);        eptrb, RM57);
830      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
831        RRETURN(rrc);  
   
832      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
833      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
834      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
835      with the md->ignore_skip_arg flag set. */      with md->ignore_skip_arg set to the value of md->skip_arg_count. */
836    
837      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
838      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
# Line 1066  for (;;) Line 1078  for (;;)
1078        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1079    
1080        save_mark = md->mark;        save_mark = md->mark;
1081          save_capture_last = md->capture_last;
1082        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1083          RM2);          RM2);
1084    
# Line 1097  for (;;) Line 1110  for (;;)
1110        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1111        md->mark = save_mark;        md->mark = save_mark;
1112        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1113          md->capture_last = save_capture_last;
1114        }        }
1115    
1116      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
# Line 1218  for (;;) Line 1232  for (;;)
1232      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1233      matched_once = FALSE;      matched_once = FALSE;
1234      code_offset = (int)(ecode - md->start_code);      code_offset = (int)(ecode - md->start_code);
1235        save_capture_last = md->capture_last;
1236    
1237      for (;;)      for (;;)
1238        {        {
# Line 1247  for (;;) Line 1262  for (;;)
1262        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1263        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1264        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1265          md->capture_last = save_capture_last;
1266        }        }
1267    
1268      if (matched_once || allow_zero)      if (matched_once || allow_zero)
# Line 1291  for (;;) Line 1307  for (;;)
1307          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1308          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1309          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1310          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last & CAPLMASK;
1311            /* Internal change requires this for API compatibility. */
1312            if (cb.capture_last == 0) cb.capture_last = -1;
1313          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1314          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1315          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1316          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1317          }          }
1318        ecode += PRIV(OP_lengths)[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1319          codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1320        }        }
1321    
1322      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1513  for (;;) Line 1532  for (;;)
1532      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1533    
1534      case OP_CLOSE:      case OP_CLOSE:
1535      number = GET2(ecode, 1);      number = GET2(ecode, 1);   /* Must be less than 65536 */
1536      offset = number << 1;      offset = number << 1;
1537    
1538  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 1521  for (;;) Line 1540  for (;;)
1540        printf("\n");        printf("\n");
1541  #endif  #endif
1542    
1543      md->capture_last = number;      md->capture_last = (md->capture_last & OVFLMASK) | number;
1544      if (offset >= md->offset_max) md->offset_overflow = TRUE; else      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1545        {        {
1546        md->offset_vector[offset] =        md->offset_vector[offset] =
1547          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
# Line 1584  for (;;) Line 1603  for (;;)
1603        }        }
1604      else condassert = FALSE;      else condassert = FALSE;
1605    
1606        /* Loop for each branch */
1607    
1608      do      do
1609        {        {
1610        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1611    
1612          /* A match means that the assertion is true; break out of the loop
1613          that matches its alternatives. */
1614    
1615        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1616          {          {
1617          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1618          break;          break;
1619          }          }
1620    
1621          /* If not matched, restore the previous mark setting. */
1622    
1623        md->mark = save_mark;        md->mark = save_mark;
1624    
1625        /* A COMMIT failure must fail the entire assertion, without trying any        /* See comment in the code for capturing groups above about handling
1626        subsequent branches. */        THEN. */
   
       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);  
1627    
1628        /* PCRE does not allow THEN to escape beyond an assertion; it        if (rrc == MATCH_THEN)
1629        is treated as NOMATCH. */          {
1630            next = ecode + GET(ecode,1);
1631            if (md->start_match_ptr < next &&
1632                (*ecode == OP_ALT || *next == OP_ALT))
1633              rrc = MATCH_NOMATCH;
1634            }
1635    
1636          /* Anything other than NOMATCH causes the entire assertion to fail,
1637          passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1638          uncaptured THEN, which means they take their normal effect. This
1639          consistent approach does not always have exactly the same effect as in
1640          Perl. */
1641    
1642        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1643        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1644        }        }
1645      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);   /* Continue for next alternative */
1646    
1647        /* If we have tried all the alternative branches, the assertion has
1648        failed. If not, we broke out after a match. */
1649    
1650      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1651    
# Line 1613  for (;;) Line 1653  for (;;)
1653    
1654      if (condassert) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);
1655    
1656      /* Continue from after the assertion, updating the offsets high water      /* Continue from after a successful assertion, updating the offsets high
1657      mark, since extracts may have been taken during the assertion. */      water mark, since extracts may have been taken during the assertion. */
1658    
1659      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1660      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1661      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1662      continue;      continue;
1663    
1664      /* Negative assertion: all branches must fail to match. Encountering SKIP,      /* Negative assertion: all branches must fail to match for the assertion to
1665      PRUNE, or COMMIT means we must assume failure without checking subsequent      succeed. */
     branches. */  
1666    
1667      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1668      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1635  for (;;) Line 1674  for (;;)
1674        }        }
1675      else condassert = FALSE;      else condassert = FALSE;
1676    
1677        /* Loop for each alternative branch. */
1678    
1679      do      do
1680        {        {
1681        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1682        md->mark = save_mark;        md->mark = save_mark;   /* Always restore the mark setting */
1683        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);  
1684        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        switch(rrc)
1685          {          {
1686          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          case MATCH_MATCH:            /* A successful match means */
1687          break;          case MATCH_ACCEPT:           /* the assertion has failed. */
1688          }          RRETURN(MATCH_NOMATCH);
1689    
1690            case MATCH_NOMATCH:          /* Carry on with next branch */
1691            break;
1692    
1693        /* PCRE does not allow THEN to escape beyond an assertion; it is treated          /* See comment in the code for capturing groups above about handling
1694        as NOMATCH. */          THEN. */
1695    
1696        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          case MATCH_THEN:
1697            next = ecode + GET(ecode,1);
1698            if (md->start_match_ptr < next &&
1699                (*ecode == OP_ALT || *next == OP_ALT))
1700              {
1701              rrc = MATCH_NOMATCH;
1702              break;
1703              }
1704            /* Otherwise fall through. */
1705    
1706            /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1707            assertion to fail to match, without considering any more alternatives.
1708            Failing to match means the assertion is true. This is a consistent
1709            approach, but does not always have the same effect as in Perl. */
1710    
1711            case MATCH_COMMIT:
1712            case MATCH_SKIP:
1713            case MATCH_SKIP_ARG:
1714            case MATCH_PRUNE:
1715            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1716            goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1717    
1718            /* Anything else is an error */
1719    
1720            default:
1721            RRETURN(rrc);
1722            }
1723    
1724          /* Continue with next branch */
1725    
1726        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1727        }        }
1728      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1729    
1730        /* All branches in the assertion failed to match. */
1731    
1732        NEG_ASSERT_TRUE:
1733      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1734        ecode += 1 + LINK_SIZE;                /* Continue with current branch */
     ecode += 1 + LINK_SIZE;  
1735      continue;      continue;
1736    
1737      /* Move the subject pointer back. This occurs only at the start of      /* Move the subject pointer back. This occurs only at the start of
# Line 1716  for (;;) Line 1791  for (;;)
1791        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1792        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1793        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1794        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last & CAPLMASK;
1795          /* Internal change requires this for API compatibility. */
1796          if (cb.capture_last == 0) cb.capture_last = -1;
1797        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1798        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1799        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1762  for (;;) Line 1839  for (;;)
1839        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1840    
1841        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1842          new_recursive.saved_capture_last = md->capture_last;
1843        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1844        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1845        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1785  for (;;) Line 1863  for (;;)
1863              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1864    
1865        /* OK, now we can do the recursion. After processing each alternative,        /* OK, now we can do the recursion. After processing each alternative,
1866        restore the offset data. If there were nested recursions, md->recursive        restore the offset data and the last captured value. If there were nested
1867        might be changed, so reset it before looping. */        recursions, md->recursive might be changed, so reset it before looping.
1868          */
1869    
1870        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1871        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
# Line 1797  for (;;) Line 1876  for (;;)
1876            md, eptrb, RM6);            md, eptrb, RM6);
1877          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1878              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1879            md->capture_last = new_recursive.saved_capture_last;
1880          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1881          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1882            {            {
# Line 1813  for (;;) Line 1893  for (;;)
1893            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1894            }            }
1895    
1896          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it          /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1897          is treated as NOMATCH. */          recursion; they cause a NOMATCH for the entire recursion. These codes
1898            are defined in a range that can be tested for. */
1899    
1900            if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1901              RRETURN(MATCH_NOMATCH);
1902    
1903            /* Any return code other than NOMATCH is an error. */
1904    
1905          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&          if (rrc != MATCH_NOMATCH)
                  rrc != MATCH_COMMIT)  
1906            {            {
1907            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1908            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1947  for (;;) Line 2032  for (;;)
2032    
2033        /* Deal with capturing */        /* Deal with capturing */
2034    
2035        md->capture_last = number;        md->capture_last = (md->capture_last & OVFLMASK) | number;
2036        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
2037          {          {
2038          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
2039          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
# Line 2532  for (;;) Line 2617  for (;;)
2617        }        }
2618      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2619        {        {
2620        const pcre_uint32 *cp;        const pcre_uint32 *cp;
2621        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2622    
2623        switch(ecode[1])        switch(ecode[1])
# Line 2594  for (;;) Line 2679  for (;;)
2679          break;          break;
2680    
2681          case PT_CLIST:          case PT_CLIST:
2682          cp = PRIV(ucd_caseless_sets) + prop->caseset;          cp = PRIV(ucd_caseless_sets) + ecode[2];
2683          for (;;)          for (;;)
2684            {            {
2685            if (c < *cp)            if (c < *cp)
# Line 2604  for (;;) Line 2689  for (;;)
2689            }            }
2690          break;          break;
2691    
2692            case PT_UCNC:
2693            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2694                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2695                 c >= 0xe000) == (op == OP_NOTPROP))
2696              RRETURN(MATCH_NOMATCH);
2697            break;
2698    
2699          /* This should never occur */          /* This should never occur */
2700    
2701          default:          default:
# Line 3190  for (;;) Line 3282  for (;;)
3282    
3283        if (fc < 128)        if (fc < 128)
3284          {          {
3285          pcre_uchar cc = RAWUCHAR(eptr);          pcre_uint32 cc = RAWUCHAR(eptr);
3286          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3287          ecode++;          ecode++;
3288          eptr++;          eptr++;
# Line 3295  for (;;) Line 3387  for (;;)
3387      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3388      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3389    
3390      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. We first check
3391        for the minimum number of characters. If the minimum equals the maximum, we
3392        are done. Otherwise, if minimizing, check the rest of the pattern for a
3393        match; if there isn't one, advance up to the maximum, one character at a
3394        time.
3395    
3396        If maximizing, advance up to the maximum number of matching characters,
3397        until eptr is past the end of the maximum run. If possessive, we are
3398        then done (no backing up). Otherwise, match at this position; anything
3399        other than no match is immediately returned. For nomatch, back up one
3400        character, unless we are matching \R and the last thing matched was
3401        \r\n, in which case, back up two bytes. When we reach the first optional
3402        character position, we can save stack by doing a tail recurse.
3403    
3404        The various UTF/non-UTF and caseful/caseless cases are handled separately,
3405        for speed. */
3406    
3407      REPEATCHAR:      REPEATCHAR:
3408  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 3379  for (;;) Line 3486  for (;;)
3486                }                }
3487              }              }
3488    
3489            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
   
3490            for(;;)            for(;;)
3491              {              {
3492                if (eptr == pp) goto TAIL_RECURSE;
3493              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3494              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3495              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }              /* if (eptr == pp) { RRETURN(MATCH_NOMATCH); } */
3496  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3497              eptr--;              eptr--;
3498              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3439  for (;;) Line 3546  for (;;)
3546    
3547        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3548          {          {
3549          pcre_uchar cc;          pcre_uint32 cc;                 /* Faster than pcre_uchar */
   
3550          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3551            {            {
3552            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 3455  for (;;) Line 3561  for (;;)
3561          {          {
3562          for (fi = min;; fi++)          for (fi = min;; fi++)
3563            {            {
3564            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3565            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3566            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3567            if (fi >= max) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
# Line 3476  for (;;) Line 3581  for (;;)
3581          pp = eptr;          pp = eptr;
3582          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3583            {            {
3584            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3585            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3586              {              {
3587              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 3488  for (;;) Line 3592  for (;;)
3592            eptr++;            eptr++;
3593            }            }
3594    
3595          if (possessive) continue;          if (possessive) continue;       /* No backtracking */
3596            for (;;)
         while (eptr >= pp)  
3597            {            {
3598              if (eptr == pp) goto TAIL_RECURSE;
3599            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3600            eptr--;            eptr--;
3601            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3546  for (;;) Line 3650  for (;;)
3650            if (fc != RAWUCHARTEST(eptr)) break;            if (fc != RAWUCHARTEST(eptr)) break;
3651            eptr++;            eptr++;
3652            }            }
3653          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
3654            for (;;)
         while (eptr >= pp)  
3655            {            {
3656              if (eptr == pp) goto TAIL_RECURSE;
3657            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3658            eptr--;            eptr--;
3659            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3726  for (;;) Line 3830  for (;;)
3830            }            }
3831          }          }
3832        else        else
3833  #endif  #endif  /* SUPPORT_UTF */
3834        /* Not UTF mode */        /* Not UTF mode */
3835          {          {
3836          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3764  for (;;) Line 3868  for (;;)
3868              }              }
3869            }            }
3870          else          else
3871  #endif  #endif  /*SUPPORT_UTF */
3872          /* Not UTF mode */          /* Not UTF mode */
3873            {            {
3874            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3806  for (;;) Line 3910  for (;;)
3910              if (fc == d || (unsigned int)foc == d) break;              if (fc == d || (unsigned int)foc == d) break;
3911              eptr += len;              eptr += len;
3912              }              }
3913            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
3914            for(;;)            for(;;)
3915              {              {
3916                if (eptr == pp) goto TAIL_RECURSE;
3917              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3918              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3919              if (eptr-- == pp) break;        /* Stop if tried at original pos */              eptr--;
3920              BACKCHAR(eptr);              BACKCHAR(eptr);
3921              }              }
3922            }            }
3923          else          else
3924  #endif  #endif  /* SUPPORT_UTF */
3925          /* Not UTF mode */          /* Not UTF mode */
3926            {            {
3927            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3829  for (;;) Line 3934  for (;;)
3934              if (fc == *eptr || foc == *eptr) break;              if (fc == *eptr || foc == *eptr) break;
3935              eptr++;              eptr++;
3936              }              }
3937            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
3938            while (eptr >= pp)            for (;;)
3939              {              {
3940                if (eptr == pp) goto TAIL_RECURSE;
3941              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3942              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3943              eptr--;              eptr--;
# Line 3941  for (;;) Line 4047  for (;;)
4047              if (fc == d) break;              if (fc == d) break;
4048              eptr += len;              eptr += len;
4049              }              }
4050            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
4051            for(;;)            for(;;)
4052              {              {
4053                if (eptr == pp) goto TAIL_RECURSE;
4054              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4055              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4056              if (eptr-- == pp) break;        /* Stop if tried at original pos */              eptr--;
4057              BACKCHAR(eptr);              BACKCHAR(eptr);
4058              }              }
4059            }            }
# Line 3964  for (;;) Line 4071  for (;;)
4071              if (fc == *eptr) break;              if (fc == *eptr) break;
4072              eptr++;              eptr++;
4073              }              }
4074            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
4075            while (eptr >= pp)            for (;;)
4076              {              {
4077                if (eptr == pp) goto TAIL_RECURSE;
4078              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4079              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4080              eptr--;              eptr--;
# Line 4203  for (;;) Line 4311  for (;;)
4311                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4312              }              }
4313            break;            break;
4314    
4315            case PT_CLIST:            case PT_CLIST:
4316            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4317              {              {
4318              const pcre_uint32 *cp;              const pcre_uint32 *cp;
4319              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4320                {                {
# Line 4214  for (;;) Line 4322  for (;;)
4322                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4323                }                }
4324              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4325              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
4326              for (;;)              for (;;)
4327                {                {
4328                if (c < *cp)                if (c < *cp)
4329                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4330                if (c == *cp++)                if (c == *cp++)
4331                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4332                }                }
4333              }              }
4334            break;            break;
4335    
4336              case PT_UCNC:
4337              for (i = 1; i <= min; i++)
4338                {
4339                if (eptr >= md->end_subject)
4340                  {
4341                  SCHECK_PARTIAL();
4342                  RRETURN(MATCH_NOMATCH);
4343                  }
4344                GETCHARINCTEST(c, eptr);
4345                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4346                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4347                     c >= 0xe000) == prop_fail_result)
4348                  RRETURN(MATCH_NOMATCH);
4349                }
4350              break;
4351    
4352            /* This should not occur */            /* This should not occur */
4353    
4354            default:            default:
# Line 4430  for (;;) Line 4554  for (;;)
4554          case OP_DIGIT:          case OP_DIGIT:
4555          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4556            {            {
4557            pcre_uchar cc;            pcre_uint32 cc;
   
4558            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4559              {              {
4560              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4448  for (;;) Line 4571  for (;;)
4571          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4572          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4573            {            {
4574            pcre_uchar cc;            pcre_uint32 cc;
   
4575            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4576              {              {
4577              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4466  for (;;) Line 4588  for (;;)
4588          case OP_WHITESPACE:          case OP_WHITESPACE:
4589          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4590            {            {
4591            pcre_uchar cc;            pcre_uint32 cc;
   
4592            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4593              {              {
4594              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4484  for (;;) Line 4605  for (;;)
4605          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4606          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4607            {            {
4608            pcre_uchar cc;            pcre_uint32 cc;
   
4609            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4610              {              {
4611              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4502  for (;;) Line 4622  for (;;)
4622          case OP_WORDCHAR:          case OP_WORDCHAR:
4623          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4624            {            {
4625            pcre_uchar cc;            pcre_uint32 cc;
   
4626            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4627              {              {
4628              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4954  for (;;) Line 5073  for (;;)
5073    
5074            case PT_CLIST:            case PT_CLIST:
5075            for (fi = min;; fi++)            for (fi = min;; fi++)
5076              {              {
5077              const pcre_uint32 *cp;              const pcre_uint32 *cp;
5078              RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5079              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5080              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
5081              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4965  for (;;) Line 5084  for (;;)
5084                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5085                }                }
5086              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
5087              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5088              for (;;)              for (;;)
5089                {                {
5090                if (c < *cp)                if (c < *cp)
# Line 4976  for (;;) Line 5095  for (;;)
5095              }              }
5096            /* Control never gets here */            /* Control never gets here */
5097    
5098              case PT_UCNC:
5099              for (fi = min;; fi++)
5100                {
5101                RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
5102                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5103                if (fi >= max) RRETURN(MATCH_NOMATCH);
5104                if (eptr >= md->end_subject)
5105                  {
5106                  SCHECK_PARTIAL();
5107                  RRETURN(MATCH_NOMATCH);
5108                  }
5109                GETCHARINCTEST(c, eptr);
5110                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5111                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5112                     c >= 0xe000) == prop_fail_result)
5113                  RRETURN(MATCH_NOMATCH);
5114                }
5115              /* Control never gets here */
5116    
5117            /* This should never occur */            /* This should never occur */
5118            default:            default:
5119            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
# Line 5445  for (;;) Line 5583  for (;;)
5583              eptr+= len;              eptr+= len;
5584              }              }
5585            break;            break;
5586    
5587            case PT_CLIST:            case PT_CLIST:
5588            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5589              {              {
# Line 5457  for (;;) Line 5595  for (;;)
5595                break;                break;
5596                }                }
5597              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5598              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5599              for (;;)              for (;;)
5600                {                {
5601                if (c < *cp)                if (c < *cp)
5602                  { if (prop_fail_result) break; else goto GOT_MAX; }                  { if (prop_fail_result) break; else goto GOT_MAX; }
5603                if (c == *cp++)                if (c == *cp++)
5604                  { if (prop_fail_result) goto GOT_MAX; else break; }                  { if (prop_fail_result) goto GOT_MAX; else break; }
5605                }                }
5606              eptr += len;              eptr += len;
5607                }
5608              GOT_MAX:
5609              break;
5610    
5611              case PT_UCNC:
5612              for (i = min; i < max; i++)
5613                {
5614                int len = 1;
5615                if (eptr >= md->end_subject)
5616                  {
5617                  SCHECK_PARTIAL();
5618                  break;
5619                  }
5620                GETCHARLENTEST(c, eptr, len);
5621                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5622                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5623                     c >= 0xe000) == prop_fail_result)
5624                  break;
5625                eptr += len;
5626              }              }
           GOT_MAX:  
5627            break;            break;
5628    
5629            default:            default:
# Line 5476  for (;;) Line 5632  for (;;)
5632    
5633          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
5634    
5635          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
5636          for(;;)          for(;;)
5637            {            {
5638              if (eptr == pp) goto TAIL_RECURSE;
5639            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5640            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5641            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5642            if (utf) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5643            }            }
5644          }          }
# Line 5518  for (;;) Line 5675  for (;;)
5675    
5676          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
5677    
5678          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
   
5679          for(;;)          for(;;)
5680            {            {
5681              if (eptr == pp) goto TAIL_RECURSE;
5682            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5683            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5684            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5685            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5686              {              {
5687              if (!utf) c = *eptr; else              if (!utf) c = *eptr; else
# Line 5799  for (;;) Line 5956  for (;;)
5956            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5957            }            }
5958    
5959          /* eptr is now past the end of the maximum run. If possessive, we are          if (possessive) continue;    /* No backtracking */
         done (no backing up). Otherwise, match at this position; anything other  
         than no match is immediately returned. For nomatch, back up one  
         character, unless we are matching \R and the last thing matched was  
         \r\n, in which case, back up two bytes. */  
   
         if (possessive) continue;  
5960          for(;;)          for(;;)
5961            {            {
5962              if (eptr == pp) goto TAIL_RECURSE;
5963            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5964            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5965            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5966            BACKCHAR(eptr);            BACKCHAR(eptr);
5967            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
5968                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
# Line 6048  for (;;) Line 6200  for (;;)
6200            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
6201            }            }
6202    
6203          /* eptr is now past the end of the maximum run. If possessive, we are          if (possessive) continue;    /* No backtracking */
6204          done (no backing up). Otherwise, match at this position; anything other          for (;;)
         than no match is immediately returned. For nomatch, back up one  
         character (byte), unless we are matching \R and the last thing matched  
         was \r\n, in which case, back up two bytes. */  
   
         if (possessive) continue;  
         while (eptr >= pp)  
6205            {            {
6206              if (eptr == pp) goto TAIL_RECURSE;
6207            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6208            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6209            eptr--;            eptr--;
# Line 6111  switch (frame->Xwhere) Line 6258  switch (frame->Xwhere)
6258    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6259  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6260    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6261    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
6262  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6263  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6264    default:    default:
6265    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
   
 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);  
   
6266    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6267    }    }
6268  #undef LBL  #undef LBL
# Line 6267  const pcre_uint8 *start_bits = NULL; Line 6411  const pcre_uint8 *start_bits = NULL;
6411  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6412  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6413  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6414    PCRE_PUCHAR match_partial;
6415  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6416    
6417  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6298  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 6443  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
6443  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6444    return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6445  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6446    if (length < 0) return PCRE_ERROR_BADLENGTH;
6447  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6448    
6449  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 6364  if (extra_data != NULL Line 6510  if (extra_data != NULL
6510      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6511                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6512      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
6513      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |  
                     PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)  
6514    {    {
6515    rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,    rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6516         start_offset, options, offsets, offsetcount);         start_offset, options, offsets, offsetcount);
6517    
6518    /* PCRE_ERROR_NULL means that the selected normal or partial matching    /* PCRE_ERROR_NULL means that the selected normal or partial matching
6519    mode is not compiled. In this case we simply fallback to interpreter. */    mode is not compiled. In this case we simply fallback to interpreter. */
6520    
6521    if (rc != PCRE_ERROR_NULL) return rc;    if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6522    }    }
6523  #endif  #endif
6524    
# Line 6436  end_subject = md->end_subject; Line 6580  end_subject = md->end_subject;
6580  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6581  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
6582  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6583  md->ignore_skip_arg = FALSE;  md->ignore_skip_arg = 0;
6584    
6585  /* Some options are unpacked into BOOL variables in the hope that testing  /* Some options are unpacked into BOOL variables in the hope that testing
6586  them will be faster than individual option bits. */  them will be faster than individual option bits. */
# Line 6546  if (re->top_backref > 0 && re->top_backr Line 6690  if (re->top_backref > 0 && re->top_backr
6690    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
6691    }    }
6692  else md->offset_vector = offsets;  else md->offset_vector = offsets;
   
6693  md->offset_end = ocount;  md->offset_end = ocount;
6694  md->offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
6695  md->offset_overflow = FALSE;  md->capture_last = 0;
 md->capture_last = -1;  
6696    
6697  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6698  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
# Line 6820  for(;;) Line 6962  for(;;)
6962    md->match_call_count = 0;    md->match_call_count = 0;
6963    md->match_function_type = 0;    md->match_function_type = 0;
6964    md->end_offset_top = 0;    md->end_offset_top = 0;
6965      md->skip_arg_count = 0;
6966    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6967    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
6968        {
6969        start_partial = md->start_used_ptr;
6970        match_partial = start_match;
6971        }
6972    
6973    switch(rc)    switch(rc)
6974      {      {
# Line 6834  for(;;) Line 6981  for(;;)
6981    
6982      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6983      new_start_match = start_match;      new_start_match = start_match;
6984      md->ignore_skip_arg = TRUE;      md->ignore_skip_arg = md->skip_arg_count;
6985      break;      break;
6986    
6987      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is no
6988      same as the match we have just done, treat it as NOMATCH. */      greater than the match we have just done, treat it as NOMATCH. */
6989    
6990      case MATCH_SKIP:      case MATCH_SKIP:
6991      if (md->start_match_ptr != start_match)      if (md->start_match_ptr > start_match)
6992        {        {
6993        new_start_match = md->start_match_ptr;        new_start_match = md->start_match_ptr;
6994        break;        break;
# Line 6849  for(;;) Line 6996  for(;;)
6996      /* Fall through */      /* Fall through */
6997    
6998      /* NOMATCH and PRUNE advance by one character. THEN at this level acts      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6999      exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */      exactly like PRUNE. Unset ignore SKIP-with-argument. */
7000    
7001      case MATCH_NOMATCH:      case MATCH_NOMATCH:
7002      case MATCH_PRUNE:      case MATCH_PRUNE:
7003      case MATCH_THEN:      case MATCH_THEN:
7004      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = 0;
7005      new_start_match = start_match + 1;      new_start_match = start_match + 1;
7006  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
7007      if (utf)      if (utf)
# Line 6947  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7094  if (rc == MATCH_MATCH || rc == MATCH_ACC
7094          (arg_offset_max - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
7095        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
7096        }        }
7097      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7098      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
7099      (PUBL(free))(md->offset_vector);      (PUBL(free))(md->offset_vector);
7100      }      }
# Line 6955  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7102  if (rc == MATCH_MATCH || rc == MATCH_ACC
7102    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
7103    too many to fit into the vector. */    too many to fit into the vector. */
7104    
7105    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?    rc = ((md->capture_last & OVFLBIT) != 0 &&
7106             md->end_offset_top >= arg_offset_max)?
7107      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
7108    
7109    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
# Line 7028  if (start_partial != NULL) Line 7176  if (start_partial != NULL)
7176      {      {
7177      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7178      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7179        if (offsetcount > 2)
7180          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7181      }      }
7182    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7183    }    }

Legend:
Removed from v.1155  
changed lines
  Added in v.1311

  ViewVC Help
Powered by ViewVC 1.1.5