/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 609 by ph10, Wed Jun 15 18:09:23 2011 UTC revision 742 by zherczeg, Sun Nov 6 08:05:33 2011 UTC
# Line 57  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
60  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
61  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
62  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
63    
64  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */  #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
# Line 76  negative to avoid the external error cod Line 76  negative to avoid the external error cod
76  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
77  #define MATCH_COMMIT       (-998)  #define MATCH_COMMIT       (-998)
78  #define MATCH_KETRPOS      (-997)  #define MATCH_KETRPOS      (-997)
79  #define MATCH_PRUNE        (-996)  #define MATCH_ONCE         (-996)
80  #define MATCH_SKIP         (-995)  #define MATCH_PRUNE        (-995)
81  #define MATCH_SKIP_ARG     (-994)  #define MATCH_SKIP         (-994)
82  #define MATCH_THEN         (-993)  #define MATCH_SKIP_ARG     (-993)
83    #define MATCH_THEN         (-992)
84    
85  /* This is a convenience macro for code that occurs many times. */  /* This is a convenience macro for code that occurs many times. */
86    
# Line 137  while (length-- > 0) Line 138  while (length-- > 0)
138    
139  /* Normally, if a back reference hasn't been set, the length that is passed is  /* Normally, if a back reference hasn't been set, the length that is passed is
140  negative, so the match always fails. However, in JavaScript compatibility mode,  negative, so the match always fails. However, in JavaScript compatibility mode,
141  the length passed is zero. Note that in caseless UTF-8 mode, the number of  the length passed is zero. Note that in caseless UTF-8 mode, the number of
142  subject bytes matched may be different to the number of reference bytes.  subject bytes matched may be different to the number of reference bytes.
143    
144  Arguments:  Arguments:
# Line 184  if (caseless) Line 185  if (caseless)
185  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
186    if (md->utf8)    if (md->utf8)
187      {      {
188      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
189      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
190      lower case versions code as different numbers of bytes. For example, U+023A      lower case versions code as different numbers of bytes. For example, U+023A
191      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
192      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
193      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
194      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
195    
196      USPTR endptr = p + length;      USPTR endptr = p + length;
197      while (p < endptr)      while (p < endptr)
198        {        {
# Line 209  if (caseless) Line 210  if (caseless)
210    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
211    is no UCP support. */    is no UCP support. */
212      {      {
213      if (eptr + length > md->end_subject) return -1;      if (eptr + length > md->end_subject) return -1;
214      while (length-- > 0)      while (length-- > 0)
215        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
216      }      }
217    }    }
218    
219  /* In the caseful case, we can just compare the bytes, whether or not we  /* In the caseful case, we can just compare the bytes, whether or not we
220  are in UTF-8 mode. */  are in UTF-8 mode. */
221    
222  else  else
223    {    {
224    if (eptr + length > md->end_subject) return -1;    if (eptr + length > md->end_subject) return -1;
225    while (length-- > 0) if (*p++ != *eptr++) return -1;    while (length-- > 0) if (*p++ != *eptr++) return -1;
226    }    }
227    
228  return eptr - eptr_start;  return eptr - eptr_start;
# Line 276  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 277  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
277         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
278         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
279         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
280         RM61,  RM62, RM63};         RM61,  RM62, RM63, RM64, RM65, RM66 };
281    
282  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
283  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 383  typedef struct heapframe { Line 384  typedef struct heapframe {
384    int Xprop_type;    int Xprop_type;
385    int Xprop_value;    int Xprop_value;
386    int Xprop_fail_result;    int Xprop_fail_result;
   int Xprop_category;  
   int Xprop_chartype;  
   int Xprop_script;  
387    int Xoclength;    int Xoclength;
388    uschar Xocchars[8];    uschar Xocchars[8];
389  #endif  #endif
# Line 477  Returns:       MATCH_MATCH if matched Line 475  Returns:       MATCH_MATCH if matched
475    
476  static int  static int
477  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
478    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,
479    unsigned int rdepth)    unsigned int rdepth)
480  {  {
481  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 550  HEAP_RECURSE: Line 548  HEAP_RECURSE:
548  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
549  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
550  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
 #define prop_category      frame->Xprop_category  
 #define prop_chartype      frame->Xprop_chartype  
 #define prop_script        frame->Xprop_script  
551  #define oclength           frame->Xoclength  #define oclength           frame->Xoclength
552  #define occhars            frame->Xocchars  #define occhars            frame->Xocchars
553  #endif  #endif
# Line 590  declarations can be cut out in a block. Line 585  declarations can be cut out in a block.
585  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
586  to RMATCH(). */  to RMATCH(). */
587    
588  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
589  const uschar *charptr;  const uschar *charptr;
590  #endif  #endif
591  const uschar *callpat;  const uschar *callpat;
592  const uschar *data;  const uschar *data;
593  const uschar *next;  const uschar *next;
594  USPTR         pp;  USPTR         pp;
595  const uschar *prev;  const uschar *prev;
596  USPTR         saved_eptr;  USPTR         saved_eptr;
597    
598  recursion_info new_recursive;  recursion_info new_recursive;
599    
600  BOOL cur_is_word;  BOOL cur_is_word;
601  BOOL condition;  BOOL condition;
602  BOOL prev_is_word;  BOOL prev_is_word;
603    
# Line 610  BOOL prev_is_word; Line 605  BOOL prev_is_word;
605  int prop_type;  int prop_type;
606  int prop_value;  int prop_value;
607  int prop_fail_result;  int prop_fail_result;
 int prop_category;  
 int prop_chartype;  
 int prop_script;  
608  int oclength;  int oclength;
609  uschar occhars[8];  uschar occhars[8];
610  #endif  #endif
# Line 632  int stacksave[REC_STACK_SAVE_MAX]; Line 624  int stacksave[REC_STACK_SAVE_MAX];
624  eptrblock newptrb;  eptrblock newptrb;
625  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
626    
627  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
628  of the local variables that are used only in localised parts of the code, but  of the local variables that are used only in localised parts of the code, but
629  still need to be preserved over recursive calls of match(). These macros define  still need to be preserved over recursive calls of match(). These macros define
630  the alternative names that are used. */  the alternative names that are used. */
631    
632  #define allow_zero    cur_is_word  #define allow_zero    cur_is_word
# Line 680  if (md->match_call_count++ >= md->match_ Line 672  if (md->match_call_count++ >= md->match_
672  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
673    
674  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
675  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is  string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
676  done this way to save having to use another function argument, which would take  done this way to save having to use another function argument, which would take
677  up space on the stack. See also MATCH_CONDASSERT below.  up space on the stack. See also MATCH_CONDASSERT below.
678    
679  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of  When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
# Line 705  for (;;) Line 697  for (;;)
697    {    {
698    minimize = possessive = FALSE;    minimize = possessive = FALSE;
699    op = *ecode;    op = *ecode;
700    
701    switch(op)    switch(op)
702      {      {
703      case OP_MARK:      case OP_MARK:
# Line 783  for (;;) Line 775  for (;;)
775      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
776      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
777    
778      /* For THEN (and THEN_ARG) we pass back the address of the bracket or      /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
779      the alt that is at the start of the current branch. This makes it possible      the branch in which it occurs can be determined. Overload the start of
780      to skip back past alternatives that precede the THEN within the current      match pointer to do this. */
     branch. */  
781    
782      case OP_THEN:      case OP_THEN:
783      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
784        eptrb, RM54);        eptrb, RM54);
785      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
786      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
787      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
788    
789      case OP_THEN_ARG:      case OP_THEN_ARG:
790      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,
791        offset_top, md, eptrb, RM58);        md, eptrb, RM58);
792      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
793      md->start_match_ptr = ecode - GET(ecode, 1);      md->start_match_ptr = ecode;
794      md->mark = ecode + LINK_SIZE + 2;      md->mark = ecode + 2;
795      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
796    
797        /* Handle an atomic group that does not contain any capturing parentheses.
798        This can be handled like an assertion. Prior to 8.13, all atomic groups
799        were handled this way. In 8.13, the code was changed as below for ONCE, so
800        that backups pass through the group and thereby reset captured values.
801        However, this uses a lot more stack, so in 8.20, atomic groups that do not
802        contain any captures generate OP_ONCE_NC, which can be handled in the old,
803        less stack intensive way.
804    
805        Check the alternative branches in turn - the matching won't pass the KET
806        for this kind of subpattern. If any one branch matches, we carry on as at
807        the end of a normal bracket, leaving the subject pointer, but resetting
808        the start-of-match value in case it was changed by \K. */
809    
810        case OP_ONCE_NC:
811        prev = ecode;
812        saved_eptr = eptr;
813        do
814          {
815          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
816          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
817            {
818            mstart = md->start_match_ptr;
819            break;
820            }
821          if (rrc == MATCH_THEN)
822            {
823            next = ecode + GET(ecode,1);
824            if (md->start_match_ptr < next &&
825                (*ecode == OP_ALT || *next == OP_ALT))
826              rrc = MATCH_NOMATCH;
827            }
828    
829          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
830          ecode += GET(ecode,1);
831          }
832        while (*ecode == OP_ALT);
833    
834        /* If hit the end of the group (which could be repeated), fail */
835    
836        if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
837    
838        /* Continue as from after the group, updating the offsets high water
839        mark, since extracts may have been taken. */
840    
841        do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
842    
843        offset_top = md->end_offset_top;
844        eptr = md->end_match_ptr;
845    
846        /* For a non-repeating ket, just continue at this level. This also
847        happens for a repeating ket if no characters were matched in the group.
848        This is the forcible breaking of infinite loops as implemented in Perl
849        5.005. */
850    
851        if (*ecode == OP_KET || eptr == saved_eptr)
852          {
853          ecode += 1+LINK_SIZE;
854          break;
855          }
856    
857        /* The repeating kets try the rest of the pattern or restart from the
858        preceding bracket, in the appropriate order. The second "call" of match()
859        uses tail recursion, to avoid using another stack frame. */
860    
861        if (*ecode == OP_KETRMIN)
862          {
863          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
864          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
865          ecode = prev;
866          goto TAIL_RECURSE;
867          }
868        else  /* OP_KETRMAX */
869          {
870          md->match_function_type = MATCH_CBEGROUP;
871          RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
872          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
873          ecode += 1 + LINK_SIZE;
874          goto TAIL_RECURSE;
875          }
876        /* Control never gets here */
877    
878      /* Handle a capturing bracket, other than those that are possessive with an      /* Handle a capturing bracket, other than those that are possessive with an
879      unlimited repeat. If there is space in the offset vector, save the current      unlimited repeat. If there is space in the offset vector, save the current
880      subject position in the working slot at the top of the vector. We mustn't      subject position in the working slot at the top of the vector. We mustn't
881      change the current values of the data slot, because they may be set from a      change the current values of the data slot, because they may be set from a
882      previous iteration of this group, and be referred to by a reference inside      previous iteration of this group, and be referred to by a reference inside
883      the group. If we fail to match, we need to restore this value and also the      the group. A failure to match might occur after the group has succeeded,
884      values of the final offsets, in case they were set by a previous iteration      if something later on doesn't match. For this reason, we need to restore
885      of the same bracket.      the working value and also the values of the final offsets, in case they
886        were set by a previous iteration of the same bracket.
887    
888      If there isn't enough space in the offset vector, treat this as if it were      If there isn't enough space in the offset vector, treat this as if it were
889      a non-capturing bracket. Don't worry about setting the flag for the error      a non-capturing bracket. Don't worry about setting the flag for the error
# Line 820  for (;;) Line 893  for (;;)
893      case OP_SCBRA:      case OP_SCBRA:
894      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
895      offset = number << 1;      offset = number << 1;
896    
897  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
898      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
899      printf("subject=");      printf("subject=");
# Line 841  for (;;) Line 914  for (;;)
914    
915        for (;;)        for (;;)
916          {          {
917          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
918          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
919            eptrb, RM1);            eptrb, RM1);
920          if (rrc != MATCH_NOMATCH &&          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
921              (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
922            RRETURN(rrc);          /* If we backed up to a THEN, check whether it is within the current
923            branch by comparing the address of the THEN that is passed back with
924            the end of the branch. If it is within the current branch, and the
925            branch is one of two or more alternatives (it either starts or ends
926            with OP_ALT), we have reached the limit of THEN's action, so convert
927            the return code to NOMATCH, which will cause normal backtracking to
928            happen from now on. Otherwise, THEN is passed back to an outer
929            alternative. This implements Perl's treatment of parenthesized groups,
930            where a group not containing | does not affect the current alternative,
931            that is, (X) is NOT the same as (X|(*F)). */
932    
933            if (rrc == MATCH_THEN)
934              {
935              next = ecode + GET(ecode,1);
936              if (md->start_match_ptr < next &&
937                  (*ecode == OP_ALT || *next == OP_ALT))
938                rrc = MATCH_NOMATCH;
939              }
940    
941            /* Anything other than NOMATCH is passed back. */
942    
943            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
944          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
945          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
946          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
947          }          }
948    
949        DPRINTF(("bracket %d failed\n", number));        DPRINTF(("bracket %d failed\n", number));
   
950        md->offset_vector[offset] = save_offset1;        md->offset_vector[offset] = save_offset1;
951        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
952        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
953    
954        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;        /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
955        RRETURN(MATCH_NOMATCH);  
956          if (md->mark == NULL) md->mark = markptr;
957          RRETURN(rrc);
958        }        }
959    
960      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
# Line 873  for (;;) Line 968  for (;;)
968      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
969      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
970    
971      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop      /* Non-capturing or atomic group, except for possessive with unlimited
972      for all the alternatives. When we get to the final alternative within the      repeat and ONCE group with no captures. Loop for all the alternatives.
973      brackets, we used to return the result of a recursive call to match()  
974      whatever happened so it was possible to reduce stack usage by turning this      When we get to the final alternative within the brackets, we used to return
975      into a tail recursion, except in the case of a possibly empty group.      the result of a recursive call to match() whatever happened so it was
976      However, now that there is the possiblity of (*THEN) occurring in the final      possible to reduce stack usage by turning this into a tail recursion,
977      alternative, this optimization is no longer possible. */      except in the case of a possibly empty group. However, now that there is
978        the possiblity of (*THEN) occurring in the final alternative, this
979        optimization is no longer always possible.
980    
981        We can optimize if we know there are no (*THEN)s in the pattern; at present
982        this is the best that can be done.
983    
984        MATCH_ONCE is returned when the end of an atomic group is successfully
985        reached, but subsequent matching fails. It passes back up the tree (causing
986        captured values to be reset) until the original atomic group level is
987        reached. This is tested by comparing md->once_target with the start of the
988        group. At this point, the return is converted into MATCH_NOMATCH so that
989        previous backup points can be taken. */
990    
991        case OP_ONCE:
992      case OP_BRA:      case OP_BRA:
993      case OP_SBRA:      case OP_SBRA:
994      DPRINTF(("start non-capturing bracket\n"));      DPRINTF(("start non-capturing bracket\n"));
995    
996      for (;;)      for (;;)
997        {        {
998        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
999        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,  
1000          /* If this is not a possibly empty group, and there are no (*THEN)s in
1001          the pattern, and this is the final alternative, optimize as described
1002          above. */
1003    
1004          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1005            {
1006            ecode += _pcre_OP_lengths[*ecode];
1007            goto TAIL_RECURSE;
1008            }
1009    
1010          /* In all other cases, we have to make another call to match(). */
1011    
1012          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
1013          RM2);          RM2);
1014        if (rrc != MATCH_NOMATCH &&  
1015            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1016          THEN. */
1017    
1018          if (rrc == MATCH_THEN)
1019            {
1020            next = ecode + GET(ecode,1);
1021            if (md->start_match_ptr < next &&
1022                (*ecode == OP_ALT || *next == OP_ALT))
1023              rrc = MATCH_NOMATCH;
1024            }
1025    
1026          if (rrc != MATCH_NOMATCH)
1027            {
1028            if (rrc == MATCH_ONCE)
1029              {
1030              const uschar *scode = ecode;
1031              if (*scode != OP_ONCE)           /* If not at start, find it */
1032                {
1033                while (*scode == OP_ALT) scode += GET(scode, 1);
1034                scode -= GET(scode, 1);
1035                }
1036              if (md->once_target == scode) rrc = MATCH_NOMATCH;
1037              }
1038          RRETURN(rrc);          RRETURN(rrc);
1039            }
1040        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1041        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1042        }        }
1043    
1044      if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;      if (md->mark == NULL) md->mark = markptr;
1045      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1046    
1047      /* Handle possessive capturing brackets with an unlimited repeat. We come      /* Handle possessive capturing brackets with an unlimited repeat. We come
1048      here from BRAZERO with allow_zero set TRUE. The offset_vector values are      here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1049      handled similarly to the normal case above. However, the matching is      handled similarly to the normal case above. However, the matching is
1050      different. The end of these brackets will always be OP_KETRPOS, which      different. The end of these brackets will always be OP_KETRPOS, which
1051      returns MATCH_KETRPOS without going further in the pattern. By this means      returns MATCH_KETRPOS without going further in the pattern. By this means
1052      we can handle the group by iteration rather than recursion, thereby      we can handle the group by iteration rather than recursion, thereby
1053      reducing the amount of stack needed. */      reducing the amount of stack needed. */
1054    
1055      case OP_CBRAPOS:      case OP_CBRAPOS:
1056      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1057      allow_zero = FALSE;      allow_zero = FALSE;
1058    
1059      POSSESSIVE_CAPTURE:      POSSESSIVE_CAPTURE:
1060      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
1061      offset = number << 1;      offset = number << 1;
# Line 925  for (;;) Line 1070  for (;;)
1070      if (offset < md->offset_max)      if (offset < md->offset_max)
1071        {        {
1072        matched_once = FALSE;        matched_once = FALSE;
1073        code_offset = ecode - md->start_code;        code_offset = ecode - md->start_code;
1074    
1075        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1076        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 933  for (;;) Line 1078  for (;;)
1078        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
1079    
1080        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1081    
1082        /* Each time round the loop, save the current subject position for use        /* Each time round the loop, save the current subject position for use
1083        when the group matches. For MATCH_MATCH, the group has matched, so we        when the group matches. For MATCH_MATCH, the group has matched, so we
1084        restart it with a new subject starting position, remembering that we had        restart it with a new subject starting position, remembering that we had
1085        at least one match. For MATCH_NOMATCH, carry on with the alternatives, as        at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1086        usual. If we haven't matched any alternatives in any iteration, check to        usual. If we haven't matched any alternatives in any iteration, check to
1087        see if a previous iteration matched. If so, the group has matched;        see if a previous iteration matched. If so, the group has matched;
1088        continue from afterwards. Otherwise it has failed; restore the previous        continue from afterwards. Otherwise it has failed; restore the previous
1089        capture values before returning NOMATCH. */        capture values before returning NOMATCH. */
1090    
1091        for (;;)        for (;;)
1092          {          {
1093          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1094            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1095          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1096          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
1097            eptrb, RM63);            eptrb, RM63);
1098          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1099            {            {
1100            offset_top = md->end_offset_top;            offset_top = md->end_offset_top;
1101            eptr = md->end_match_ptr;            eptr = md->end_match_ptr;
1102            ecode = md->start_code + code_offset;            ecode = md->start_code + code_offset;
1103            save_capture_last = md->capture_last;            save_capture_last = md->capture_last;
1104            matched_once = TRUE;            matched_once = TRUE;
1105            continue;            continue;
1106            }            }
1107          if (rrc != MATCH_NOMATCH &&  
1108              (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* See comment in the code for capturing groups above about handling
1109            RRETURN(rrc);          THEN. */
1110    
1111            if (rrc == MATCH_THEN)
1112              {
1113              next = ecode + GET(ecode,1);
1114              if (md->start_match_ptr < next &&
1115                  (*ecode == OP_ALT || *next == OP_ALT))
1116                rrc = MATCH_NOMATCH;
1117              }
1118    
1119            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1120          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1121          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1122          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
1123          }          }
1124    
1125        if (!matched_once)        if (!matched_once)
1126          {          {
1127          md->offset_vector[offset] = save_offset1;          md->offset_vector[offset] = save_offset1;
1128          md->offset_vector[offset+1] = save_offset2;          md->offset_vector[offset+1] = save_offset2;
1129          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
1130          }          }
1131    
1132        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;        if (md->mark == NULL) md->mark = markptr;
1133        if (allow_zero || matched_once)        if (allow_zero || matched_once)
1134          {          {
1135          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
1136          break;          break;
1137          }          }
1138    
1139        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1140        }        }
1141    
1142      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1143      as a non-capturing bracket. */      as a non-capturing bracket. */
1144    
# Line 995  for (;;) Line 1150  for (;;)
1150      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1151      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1152    
1153      /* Non-capturing possessive bracket with unlimited repeat. We come here      /* Non-capturing possessive bracket with unlimited repeat. We come here
1154      from BRAZERO with allow_zero = TRUE. The code is similar to the above,      from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1155      without the capturing complication. It is written out separately for speed      without the capturing complication. It is written out separately for speed
1156      and cleanliness. */      and cleanliness. */
1157    
1158      case OP_BRAPOS:      case OP_BRAPOS:
1159      case OP_SBRAPOS:      case OP_SBRAPOS:
1160      allow_zero = FALSE;      allow_zero = FALSE;
1161    
1162      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1163      matched_once = FALSE;      matched_once = FALSE;
1164      code_offset = ecode - md->start_code;      code_offset = ecode - md->start_code;
1165    
1166      for (;;)      for (;;)
1167        {        {
1168        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1169        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
1170          eptrb, RM48);          eptrb, RM48);
1171        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1172          {          {
1173            offset_top = md->end_offset_top;
1174          eptr = md->end_match_ptr;          eptr = md->end_match_ptr;
1175          ecode = md->start_code + code_offset;          ecode = md->start_code + code_offset;
1176          matched_once = TRUE;          matched_once = TRUE;
1177          continue;          continue;
1178          }          }
1179        if (rrc != MATCH_NOMATCH &&  
1180            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* See comment in the code for capturing groups above about handling
1181          RRETURN(rrc);        THEN. */
1182    
1183          if (rrc == MATCH_THEN)
1184            {
1185            next = ecode + GET(ecode,1);
1186            if (md->start_match_ptr < next &&
1187                (*ecode == OP_ALT || *next == OP_ALT))
1188              rrc = MATCH_NOMATCH;
1189            }
1190    
1191          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1192        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1193        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1194        }        }
1195    
1196      if (matched_once || allow_zero)      if (matched_once || allow_zero)
1197        {        {
1198        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1199        break;        break;
1200        }        }
1201      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1202    
1203      /* Control never reaches here. */      /* Control never reaches here. */
# Line 1053  for (;;) Line 1219  for (;;)
1219        if (pcre_callout != NULL)        if (pcre_callout != NULL)
1220          {          {
1221          pcre_callout_block cb;          pcre_callout_block cb;
1222          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1223          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1224          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1225          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
# Line 1065  for (;;) Line 1231  for (;;)
1231          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1232          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1233          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1234            cb.mark             = markptr;
1235          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1236          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1237          }          }
# Line 1223  for (;;) Line 1390  for (;;)
1390    
1391      else      else
1392        {        {
1393        md->match_function_type = MATCH_CONDASSERT;        md->match_function_type = MATCH_CONDASSERT;
1394        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1395        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1396          {          {
1397            if (md->end_offset_top > offset_top)
1398              offset_top = md->end_offset_top;  /* Captures may have happened */
1399          condition = TRUE;          condition = TRUE;
1400          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1401          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1402          }          }
1403        else if (rrc != MATCH_NOMATCH &&  
1404                (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1405          assertion; it is therefore treated as NOMATCH. */
1406    
1407          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1408          {          {
1409          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1410          }          }
# Line 1243  for (;;) Line 1415  for (;;)
1415          }          }
1416        }        }
1417    
1418      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one, can
1419      we used to use tail recursion to avoid using another stack frame, except      use tail recursion to avoid using another stack frame, except when there is
1420      when there was unlimited repeat of a possibly empty group. However, that      unlimited repeat of a possibly empty group. In the latter case, a recursive
1421      strategy no longer works because of the possibilty of (*THEN) being      call to match() is always required, unless the second alternative doesn't
1422      encountered in the branch. A recursive call to match() is always required,      exist, in which case we can just plough on. Note that, for compatibility
1423      unless the second alternative doesn't exist, in which case we can just      with Perl, the | in a conditional group is NOT treated as creating two
1424      plough on. */      alternatives. If a THEN is encountered in the branch, it propagates out to
1425        the enclosing alternative (unless nested in a deeper set of alternatives,
1426        of course). */
1427    
1428      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1429        {        {
1430        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;        if (op != OP_SCOND)
1431            {
1432            ecode += 1 + LINK_SIZE;
1433            goto TAIL_RECURSE;
1434            }
1435    
1436          md->match_function_type = MATCH_CBEGROUP;
1437        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
       if (rrc == MATCH_THEN && md->start_match_ptr == ecode)  
         rrc = MATCH_NOMATCH;  
1438        RRETURN(rrc);        RRETURN(rrc);
1439        }        }
1440      else                         /* Condition false & no alternative */  
1441         /* Condition false & no alternative; continue after the group. */
1442    
1443        else
1444        {        {
1445        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1446        }        }
# Line 1290  for (;;) Line 1471  for (;;)
1471      break;      break;
1472    
1473    
1474      /* End of the pattern, either real or forced. If we are in a recursion, we      /* End of the pattern, either real or forced. */
     should restore the offsets appropriately, and if it's a top-level  
     recursion, continue from after the call. */  
1475    
     case OP_ACCEPT:  
1476      case OP_END:      case OP_END:
1477      if (md->recursive != NULL)      case OP_ACCEPT:
1478        {      case OP_ASSERT_ACCEPT:
       recursion_info *rec = md->recursive;  
       md->recursive = rec->prevrec;  
       memmove(md->offset_vector, rec->offset_save,  
         rec->saved_max * sizeof(int));  
       offset_top = rec->save_offset_top;  
       if (rec->group_num == 0)  
         {  
         ecode = rec->after_call;  
         break;  
         }  
       }  
   
     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is  
     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of  
     the subject. In both cases, backtracking will then try other alternatives,  
     if any. */  
1479    
1480      else if (eptr == mstart &&      /* If we have matched an empty string, fail if not in an assertion and not
1481          (md->notempty ||      in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1482            (md->notempty_atstart &&      is set and we have matched at the start of the subject. In both cases,
1483              mstart == md->start_subject + md->start_offset)))      backtracking will then try other alternatives, if any. */
1484    
1485        if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1486             md->recursive == NULL &&
1487             (md->notempty ||
1488               (md->notempty_atstart &&
1489                 mstart == md->start_subject + md->start_offset)))
1490        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1491    
1492      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1493    
1494      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1495      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1496      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
# Line 1337  for (;;) Line 1505  for (;;)
1505      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
1506      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1507      start of each branch to move the current point backwards, so the code at      start of each branch to move the current point backwards, so the code at
1508      this level is identical to the lookahead case. When the assertion is part      this level is identical to the lookahead case. When the assertion is part
1509      of a condition, we want to return immediately afterwards. The caller of      of a condition, we want to return immediately afterwards. The caller of
1510      this incarnation of the match() function will have set MATCH_CONDASSERT in      this incarnation of the match() function will have set MATCH_CONDASSERT in
1511      md->match_function type, and one of these opcodes will be the first opcode      md->match_function type, and one of these opcodes will be the first opcode
1512      that is processed. We use a local variable that is preserved over calls to      that is processed. We use a local variable that is preserved over calls to
1513      match() to remember this case. */      match() to remember this case. */
1514    
1515      case OP_ASSERT:      case OP_ASSERT:
# Line 1351  for (;;) Line 1519  for (;;)
1519        condassert = TRUE;        condassert = TRUE;
1520        md->match_function_type = 0;        md->match_function_type = 0;
1521        }        }
1522      else condassert = FALSE;      else condassert = FALSE;
1523    
1524      do      do
1525        {        {
1526        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1527        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1528          {          {
1529          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1530            markptr = md->mark;
1531          break;          break;
1532          }          }
1533        if (rrc != MATCH_NOMATCH &&  
1534            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1535          RRETURN(rrc);        as NOMATCH. */
1536    
1537          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1538        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1539        }        }
1540      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1541    
1542      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1543    
1544      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
# Line 1393  for (;;) Line 1564  for (;;)
1564        condassert = TRUE;        condassert = TRUE;
1565        md->match_function_type = 0;        md->match_function_type = 0;
1566        }        }
1567      else condassert = FALSE;      else condassert = FALSE;
1568    
1569      do      do
1570        {        {
# Line 1404  for (;;) Line 1575  for (;;)
1575          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1576          break;          break;
1577          }          }
1578        if (rrc != MATCH_NOMATCH &&  
1579            (rrc != MATCH_THEN || md->start_match_ptr != ecode))        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1580          RRETURN(rrc);        as NOMATCH. */
1581    
1582          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1583        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1584        }        }
1585      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1586    
1587      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1588    
1589      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1590      continue;      continue;
1591    
# Line 1457  for (;;) Line 1630  for (;;)
1630      if (pcre_callout != NULL)      if (pcre_callout != NULL)
1631        {        {
1632        pcre_callout_block cb;        pcre_callout_block cb;
1633        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1634        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1635        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1636        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
# Line 1469  for (;;) Line 1642  for (;;)
1642        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1643        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1644        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1645          cb.mark             = markptr;
1646        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1647        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1648        }        }
# Line 1479  for (;;) Line 1653  for (;;)
1653      offset data is the offset to the starting bracket from the start of the      offset data is the offset to the starting bracket from the start of the
1654      whole pattern. (This is so that it works from duplicated subpatterns.)      whole pattern. (This is so that it works from duplicated subpatterns.)
1655    
1656      If there are any capturing brackets started but not finished, we have to      The state of the capturing groups is preserved over recursion, and
1657      save their starting points and reinstate them after the recursion. However,      re-instated afterwards. We don't know how many are started and not yet
1658      we don't know how many such there are (offset_top records the completed      finished (offset_top records the completed total) so we just have to save
1659      total) so we just have to save all the potential data. There may be up to      all the potential data. There may be up to 65535 such values, which is too
1660      65535 such values, which is too large to put on the stack, but using malloc      large to put on the stack, but using malloc for small numbers seems
1661      for small numbers seems expensive. As a compromise, the stack is used when      expensive. As a compromise, the stack is used when there are no more than
1662      there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc      REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
     is used. A problem is what to do if the malloc fails ... there is no way of  
     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX  
     values on the stack, and accept that the rest may be wrong.  
1663    
1664      There are also other values that have to be saved. We use a chained      There are also other values that have to be saved. We use a chained
1665      sequence of blocks that actually live on the stack. Thanks to Robin Houston      sequence of blocks that actually live on the stack. Thanks to Robin Houston
1666      for the original version of this logic. */      for the original version of this logic. It has, however, been hacked around
1667        a lot, so he is not to blame for the current way it works. */
1668    
1669      case OP_RECURSE:      case OP_RECURSE:
1670        {        {
1671          recursion_info *ri;
1672          int recno;
1673    
1674        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1675        new_recursive.group_num = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
1676          GET2(callpat, 1 + LINK_SIZE);          GET2(callpat, 1 + LINK_SIZE);
1677    
1678          /* Check for repeating a recursion without advancing the subject pointer.
1679          This should catch convoluted mutual recursions. (Some simple cases are
1680          caught at compile time.) */
1681    
1682          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1683            if (recno == ri->group_num && eptr == ri->subject_position)
1684              RRETURN(PCRE_ERROR_RECURSELOOP);
1685    
1686        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1687    
1688          new_recursive.group_num = recno;
1689          new_recursive.subject_position = eptr;
1690        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1691        md->recursive = &new_recursive;        md->recursive = &new_recursive;
1692    
1693        /* Find where to continue from afterwards */        /* Where to continue from afterwards */
1694    
1695        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       new_recursive.after_call = ecode;  
1696    
1697        /* Now save the offset data. */        /* Now save the offset data */
1698    
1699        new_recursive.saved_max = md->offset_end;        new_recursive.saved_max = md->offset_end;
1700        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)        if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
# Line 1521  for (;;) Line 1705  for (;;)
1705            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1706          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1707          }          }
   
1708        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1709              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1710        new_recursive.save_offset_top = offset_top;  
1711          /* OK, now we can do the recursion. After processing each alternative,
1712        /* OK, now we can do the recursion. For each top-level alternative we        restore the offset data. If there were nested recursions, md->recursive
1713        restore the offset and recursion data. */        might be changed, so reset it before looping. */
1714    
1715        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1716        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
# Line 1536  for (;;) Line 1719  for (;;)
1719          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1720          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1721            md, eptrb, RM6);            md, eptrb, RM6);
1722            memcpy(md->offset_vector, new_recursive.offset_save,
1723                new_recursive.saved_max * sizeof(int));
1724            md->recursive = new_recursive.prevrec;
1725          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1726            {            {
1727            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;  
1728            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1729              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1730            MRRETURN(MATCH_MATCH);  
1731              /* Set where we got to in the subject, and reset the start in case
1732              it was changed by \K. This *is* propagated back out of a recursion,
1733              for Perl compatibility. */
1734    
1735              eptr = md->end_match_ptr;
1736              mstart = md->start_match_ptr;
1737              goto RECURSION_MATCHED;        /* Exit loop; end processing */
1738            }            }
1739          else if (rrc != MATCH_NOMATCH &&  
1740                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))          /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1741            as NOMATCH. */
1742    
1743            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1744            {            {
1745            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1746            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1554  for (;;) Line 1749  for (;;)
1749            }            }
1750    
1751          md->recursive = &new_recursive;          md->recursive = &new_recursive;
         memcpy(md->offset_vector, new_recursive.offset_save,  
             new_recursive.saved_max * sizeof(int));  
1752          callpat += GET(callpat, 1);          callpat += GET(callpat, 1);
1753          }          }
1754        while (*callpat == OP_ALT);        while (*callpat == OP_ALT);
# Line 1566  for (;;) Line 1759  for (;;)
1759          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1760        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1761        }        }
     /* Control never reaches here */  
   
     /* "Once" brackets are like assertion brackets except that after a match,  
     the point in the subject string is not moved back. Thus there can never be  
     a move back into the brackets. Friedl calls these "atomic" subpatterns.  
     Check the alternative branches in turn - the matching won't pass the KET  
     for this kind of subpattern. If any one branch matches, we carry on as at  
     the end of a normal bracket, leaving the subject pointer, but resetting  
     the start-of-match value in case it was changed by \K. */  
   
     case OP_ONCE:  
     prev = ecode;  
     saved_eptr = eptr;  
   
     do  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);  
       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */  
         {  
         mstart = md->start_match_ptr;  
         break;  
         }  
       if (rrc != MATCH_NOMATCH &&  
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))  
         RRETURN(rrc);  
       ecode += GET(ecode,1);  
       }  
     while (*ecode == OP_ALT);  
   
     /* If hit the end of the group (which could be repeated), fail */  
   
     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);  
1762    
1763      /* Continue as from after the assertion, updating the offsets high water      RECURSION_MATCHED:
1764      mark, since extracts may have been taken. */      break;
   
     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);  
   
     offset_top = md->end_offset_top;  
     eptr = md->end_match_ptr;  
   
     /* For a non-repeating ket, just continue at this level. This also  
     happens for a repeating ket if no characters were matched in the group.  
     This is the forcible breaking of infinite loops as implemented in Perl  
     5.005. If there is an options reset, it will get obeyed in the normal  
     course of events. */  
   
     if (*ecode == OP_KET || eptr == saved_eptr)  
       {  
       ecode += 1+LINK_SIZE;  
       break;  
       }  
   
     /* The repeating kets try the rest of the pattern or restart from the  
     preceding bracket, in the appropriate order. The second "call" of match()  
     uses tail recursion, to avoid using another stack frame. */  
   
     if (*ecode == OP_KETRMIN)  
       {  
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode = prev;  
       goto TAIL_RECURSE;  
       }  
     else  /* OP_KETRMAX */  
       {  
       md->match_function_type = MATCH_CBEGROUP;  
       RMATCH(eptr, prev, offset_top, md, eptrb, RM9);  
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
       ecode += 1 + LINK_SIZE;  
       goto TAIL_RECURSE;  
       }  
     /* Control never gets here */  
1765    
1766      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1767      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1652  for (;;) Line 1775  for (;;)
1775      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1776      with fixed upper repeat limits are compiled as a number of copies, with the      with fixed upper repeat limits are compiled as a number of copies, with the
1777      optional ones preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1778    
1779      case OP_BRAZERO:      case OP_BRAZERO:
1780      next = ecode + 1;      next = ecode + 1;
1781      RMATCH(eptr, next, offset_top, md, eptrb, RM10);      RMATCH(eptr, next, offset_top, md, eptrb, RM10);
# Line 1660  for (;;) Line 1783  for (;;)
1783      do next += GET(next, 1); while (*next == OP_ALT);      do next += GET(next, 1); while (*next == OP_ALT);
1784      ecode = next + 1 + LINK_SIZE;      ecode = next + 1 + LINK_SIZE;
1785      break;      break;
1786    
1787      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1788      next = ecode + 1;      next = ecode + 1;
1789      do next += GET(next, 1); while (*next == OP_ALT);      do next += GET(next, 1); while (*next == OP_ALT);
# Line 1674  for (;;) Line 1797  for (;;)
1797      do next += GET(next,1); while (*next == OP_ALT);      do next += GET(next,1); while (*next == OP_ALT);
1798      ecode = next + 1 + LINK_SIZE;      ecode = next + 1 + LINK_SIZE;
1799      break;      break;
1800    
1801      /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything      /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1802      here; just jump to the group, with allow_zero set TRUE. */      here; just jump to the group, with allow_zero set TRUE. */
1803    
1804      case OP_BRAPOSZERO:      case OP_BRAPOSZERO:
1805      op = *(++ecode);      op = *(++ecode);
1806      allow_zero = TRUE;      allow_zero = TRUE;
1807      if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;      if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1808        goto POSSESSIVE_NON_CAPTURE;        goto POSSESSIVE_NON_CAPTURE;
# Line 1689  for (;;) Line 1812  for (;;)
1812      case OP_KET:      case OP_KET:
1813      case OP_KETRMIN:      case OP_KETRMIN:
1814      case OP_KETRMAX:      case OP_KETRMAX:
1815      case OP_KETRPOS:      case OP_KETRPOS:
1816      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
1817    
1818      /* If this was a group that remembered the subject start, in order to break      /* If this was a group that remembered the subject start, in order to break
1819      infinite repeats of empty string matches, retrieve the subject start from      infinite repeats of empty string matches, retrieve the subject start from
1820      the chain. Otherwise, set it NULL. */      the chain. Otherwise, set it NULL. */
1821    
1822      if (*prev >= OP_SBRA)      if (*prev >= OP_SBRA || *prev == OP_ONCE)
1823        {        {
1824        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1825        eptrb = eptrb->epb_prev;              /* Backup to previous group */        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1826        }        }
1827      else saved_eptr = NULL;      else saved_eptr = NULL;
1828    
1829      /* If we are at the end of an assertion group or an atomic group, stop      /* If we are at the end of an assertion group or a non-capturing atomic
1830      matching and return MATCH_MATCH, but record the current high water mark for      group, stop matching and return MATCH_MATCH, but record the current high
1831      use by positive assertions. We also need to record the match start in case      water mark for use by positive assertions. We also need to record the match
1832      it was changed by \K. */      start in case it was changed by \K. */
1833    
1834      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1835          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||           *prev == OP_ONCE_NC)
         *prev == OP_ONCE)  
1836        {        {
1837        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1838        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1839        md->start_match_ptr = mstart;        md->start_match_ptr = mstart;
1840        MRRETURN(MATCH_MATCH);        MRRETURN(MATCH_MATCH);         /* Sets md->mark */
1841        }        }
1842    
1843      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
1844      and if necessary complete handling an extraction by setting the offsets and      and if necessary complete handling an extraction by setting the offsets and
1845      bumping the high water mark. Note that whole-pattern recursion is coded as      bumping the high water mark. Whole-pattern recursion is coded as a recurse
1846      a recurse into group 0, so it won't be picked up here. Instead, we catch it      into group 0, so it won't be picked up here. Instead, we catch it when the
1847      when the OP_END is reached. Other recursion is handled here. */      OP_END is reached. Other recursion is handled here. We just have to record
1848        the current subject position and start match pointer and give a MATCH
1849        return. */
1850    
1851      if (*prev == OP_CBRA || *prev == OP_SCBRA ||      if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1852          *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)          *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
# Line 1735  for (;;) Line 1859  for (;;)
1859        printf("\n");        printf("\n");
1860  #endif  #endif
1861    
1862          /* Handle a recursively called group. */
1863    
1864          if (md->recursive != NULL && md->recursive->group_num == number)
1865            {
1866            md->end_match_ptr = eptr;
1867            md->start_match_ptr = mstart;
1868            RRETURN(MATCH_MATCH);
1869            }
1870    
1871          /* Deal with capturing */
1872    
1873        md->capture_last = number;        md->capture_last = number;
1874        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1875          {          {
1876            /* If offset is greater than offset_top, it means that we are
1877            "skipping" a capturing group, and that group's offsets must be marked
1878            unset. In earlier versions of PCRE, all the offsets were unset at the
1879            start of matching, but this doesn't work because atomic groups and
1880            assertions can cause a value to be set that should later be unset.
1881            Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1882            part of the atomic group, but this is not on the final matching path,
1883            so must be unset when 2 is set. (If there is no group 2, there is no
1884            problem, because offset_top will then be 2, indicating no capture.) */
1885    
1886            if (offset > offset_top)
1887              {
1888              register int *iptr = md->offset_vector + offset_top;
1889              register int *iend = md->offset_vector + offset;
1890              while (iptr < iend) *iptr++ = -1;
1891              }
1892    
1893            /* Now make the extraction */
1894    
1895          md->offset_vector[offset] =          md->offset_vector[offset] =
1896            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1897          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1898          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1899          }          }
   
       /* Handle a recursively called group. Restore the offsets  
       appropriately and continue from after the call. */  
   
       if (md->recursive != NULL && md->recursive->group_num == number)  
         {  
         recursion_info *rec = md->recursive;  
         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
         md->recursive = rec->prevrec;  
         memcpy(md->offset_vector, rec->offset_save,  
           rec->saved_max * sizeof(int));  
         offset_top = rec->save_offset_top;  
         ecode = rec->after_call;  
         break;  
         }  
1900        }        }
1901    
1902      /* For a non-repeating ket, just continue at this level. This also      /* For an ordinary non-repeating ket, just continue at this level. This
1903      happens for a repeating ket if no characters were matched in the group.      also happens for a repeating ket if no characters were matched in the
1904      This is the forcible breaking of infinite loops as implemented in Perl      group. This is the forcible breaking of infinite loops as implemented in
1905      5.005. If there is an options reset, it will get obeyed in the normal      Perl 5.005. For a non-repeating atomic group that includes captures,
1906      course of events. */      establish a backup point by processing the rest of the pattern at a lower
1907        level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1908        original OP_ONCE level, thereby bypassing intermediate backup points, but
1909        resetting any captures that happened along the way. */
1910    
1911      if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1912        {        {
1913        ecode += 1 + LINK_SIZE;        if (*prev == OP_ONCE)
1914            {
1915            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1916            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1917            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1918            RRETURN(MATCH_ONCE);
1919            }
1920          ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1921        break;        break;
1922        }        }
1923    
1924      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,      /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1925      and return the MATCH_KETRPOS. This makes it possible to do the repeats one      and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1926      at a time from the outer level, thus saving stack. */      at a time from the outer level, thus saving stack. */
1927    
1928      if (*ecode == OP_KETRPOS)      if (*ecode == OP_KETRPOS)
1929        {        {
1930        md->end_match_ptr = eptr;        md->end_match_ptr = eptr;
1931        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1932        RRETURN(MATCH_KETRPOS);        RRETURN(MATCH_KETRPOS);
1933        }        }
1934    
1935      /* The normal repeating kets try the rest of the pattern or restart from      /* The normal repeating kets try the rest of the pattern or restart from
1936      the preceding bracket, in the appropriate order. In the second case, we can      the preceding bracket, in the appropriate order. In the second case, we can
1937      use tail recursion to avoid using another stack frame, unless we have an      use tail recursion to avoid using another stack frame, unless we have an
1938      unlimited repeat of a group that can match an empty string. */      an atomic group or an unlimited repeat of a group that can match an empty
1939        string. */
1940    
1941      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1942        {        {
1943        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1944        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1945          if (*prev == OP_ONCE)
1946            {
1947            RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
1948            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1949            md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1950            RRETURN(MATCH_ONCE);
1951            }
1952        if (*prev >= OP_SBRA)    /* Could match an empty string */        if (*prev >= OP_SBRA)    /* Could match an empty string */
1953          {          {
1954          md->match_function_type = MATCH_CBEGROUP;          md->match_function_type = MATCH_CBEGROUP;
1955          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1956          RRETURN(rrc);          RRETURN(rrc);
1957          }          }
# Line 1803  for (;;) Line 1960  for (;;)
1960        }        }
1961      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1962        {        {
1963        if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1964        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
1965          if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
1966        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1967          if (*prev == OP_ONCE)
1968            {
1969            RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
1970            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1971            md->once_target = prev;
1972            RRETURN(MATCH_ONCE);
1973            }
1974        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1975        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1976        }        }
# Line 1815  for (;;) Line 1980  for (;;)
1980    
1981      case OP_CIRC:      case OP_CIRC:
1982      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1983    
1984      /* Start of subject assertion */      /* Start of subject assertion */
1985    
1986      case OP_SOD:      case OP_SOD:
1987      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1988      ecode++;      ecode++;
1989      break;      break;
1990    
1991      /* Multiline mode: start of subject unless notbol, or after any newline. */      /* Multiline mode: start of subject unless notbol, or after any newline. */
1992    
1993      case OP_CIRCM:      case OP_CIRCM:
# Line 1861  for (;;) Line 2026  for (;;)
2026      ecode++;      ecode++;
2027      break;      break;
2028    
2029      /* Not multiline mode: assert before a terminating newline or before end of      /* Not multiline mode: assert before a terminating newline or before end of
2030      subject unless noteol is set. */      subject unless noteol is set. */
2031    
2032      case OP_DOLL:      case OP_DOLL:
# Line 2017  for (;;) Line 2182  for (;;)
2182      /* Fall through */      /* Fall through */
2183    
2184      case OP_ALLANY:      case OP_ALLANY:
2185      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2186        {        {                            /* not be updated before SCHECK_PARTIAL. */
2187        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2188        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2189        }        }
2190        eptr++;
2191      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2192      ecode++;      ecode++;
2193      break;      break;
# Line 2030  for (;;) Line 2196  for (;;)
2196      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
2197    
2198      case OP_ANYBYTE:      case OP_ANYBYTE:
2199      if (eptr++ >= md->end_subject)      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2200        {        {                            /* not be updated before SCHECK_PARTIAL. */
2201        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2202        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2203        }        }
2204        eptr++;
2205      ecode++;      ecode++;
2206      break;      break;
2207    
# Line 2150  for (;;) Line 2317  for (;;)
2317      switch(c)      switch(c)
2318        {        {
2319        default: MRRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2320    
2321        case 0x000d:        case 0x000d:
2322        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2323        break;        break;
# Line 2374  for (;;) Line 2541  for (;;)
2541        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2542        }        }
2543      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2544        if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
2545        while (eptr < md->end_subject)
2546        {        {
2547        int category = UCD_CATEGORY(c);        int len = 1;
2548        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2549        while (eptr < md->end_subject)        if (UCD_CATEGORY(c) != ucp_M) break;
2550          {        eptr += len;
         int len = 1;  
         if (!utf8) c = *eptr; else  
           {  
           GETCHARLEN(c, eptr, len);  
           }  
         category = UCD_CATEGORY(c);  
         if (category != ucp_M) break;  
         eptr += len;  
         }  
2551        }        }
2552      ecode++;      ecode++;
2553      break;      break;
# Line 2403  for (;;) Line 2563  for (;;)
2563      loops). */      loops). */
2564    
2565      case OP_REF:      case OP_REF:
2566      case OP_REFI:      case OP_REFI:
2567      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2568      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2569      ecode += 3;      ecode += 3;
2570    
# Line 2472  for (;;) Line 2632  for (;;)
2632    
2633      for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2634        {        {
2635        int slength;        int slength;
2636        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2637          {          {
2638          CHECK_PARTIAL();          CHECK_PARTIAL();
# Line 2492  for (;;) Line 2652  for (;;)
2652        {        {
2653        for (fi = min;; fi++)        for (fi = min;; fi++)
2654          {          {
2655          int slength;          int slength;
2656          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2657          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2658          if (fi >= max) MRRETURN(MATCH_NOMATCH);          if (fi >= max) MRRETURN(MATCH_NOMATCH);
# Line 2513  for (;;) Line 2673  for (;;)
2673        pp = eptr;        pp = eptr;
2674        for (i = min; i < max; i++)        for (i = min; i < max; i++)
2675          {          {
2676          int slength;          int slength;
2677          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2678            {            {
2679            CHECK_PARTIAL();            CHECK_PARTIAL();
# Line 3246  for (;;) Line 3406  for (;;)
3406      checking can be multibyte. */      checking can be multibyte. */
3407    
3408      case OP_NOT:      case OP_NOT:
3409      case OP_NOTI:      case OP_NOTI:
3410      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3411        {        {
3412        SCHECK_PARTIAL();        SCHECK_PARTIAL();
# Line 3741  for (;;) Line 3901  for (;;)
3901            case PT_LAMP:            case PT_LAMP:
3902            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3903              {              {
3904                int chartype;
3905              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3906                {                {
3907                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3908                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3909                }                }
3910              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3911              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
3912              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
3913                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
3914                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
3915                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3916              }              }
3917            break;            break;
# Line 3764  for (;;) Line 3925  for (;;)
3925                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3926                }                }
3927              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3928              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
             if ((prop_category == prop_value) == prop_fail_result)  
3929                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3930              }              }
3931            break;            break;
# Line 3779  for (;;) Line 3939  for (;;)
3939                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3940                }                }
3941              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3942              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
             if ((prop_chartype == prop_value) == prop_fail_result)  
3943                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3944              }              }
3945            break;            break;
# Line 3794  for (;;) Line 3953  for (;;)
3953                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3954                }                }
3955              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3956              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
             if ((prop_script == prop_value) == prop_fail_result)  
3957                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3958              }              }
3959            break;            break;
# Line 3803  for (;;) Line 3961  for (;;)
3961            case PT_ALNUM:            case PT_ALNUM:
3962            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3963              {              {
3964                int category;
3965              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
3966                {                {
3967                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3968                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3969                }                }
3970              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3971              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
3972              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                    == prop_fail_result)  
3973                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3974              }              }
3975            break;            break;
# Line 3825  for (;;) Line 3983  for (;;)
3983                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3984                }                }
3985              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3986              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
3987                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
3988                     == prop_fail_result)                     == prop_fail_result)
3989                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 3842  for (;;) Line 3999  for (;;)
3999                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4000                }                }
4001              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4002              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
4003                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4004                     == prop_fail_result)                     == prop_fail_result)
4005                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 3853  for (;;) Line 4009  for (;;)
4009            case PT_WORD:            case PT_WORD:
4010            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4011              {              {
4012                int category;
4013              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4014                {                {
4015                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4016                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4017                }                }
4018              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4019              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4020              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
                  c == CHAR_UNDERSCORE)  
4021                     == prop_fail_result)                     == prop_fail_result)
4022                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4023              }              }
# Line 3887  for (;;) Line 4043  for (;;)
4043              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4044              }              }
4045            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4046            prop_category = UCD_CATEGORY(c);            if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);  
4047            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4048              {              {
4049              int len = 1;              int len = 1;
4050              if (!utf8) c = *eptr;              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4051                else { GETCHARLEN(c, eptr, len); }              if (UCD_CATEGORY(c) != ucp_M) break;
             prop_category = UCD_CATEGORY(c);  
             if (prop_category != ucp_M) break;  
4052              eptr += len;              eptr += len;
4053              }              }
4054            }            }
# Line 3953  for (;;) Line 4106  for (;;)
4106            switch(c)            switch(c)
4107              {              {
4108              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4109    
4110              case 0x000d:              case 0x000d:
4111              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4112              break;              break;
# Line 4230  for (;;) Line 4383  for (;;)
4383            switch(*eptr++)            switch(*eptr++)
4384              {              {
4385              default: MRRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4386    
4387              case 0x000d:              case 0x000d:
4388              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4389              break;              break;
4390    
4391              case 0x000a:              case 0x000a:
4392              break;              break;
4393    
# Line 4440  for (;;) Line 4593  for (;;)
4593            case PT_LAMP:            case PT_LAMP:
4594            for (fi = min;; fi++)            for (fi = min;; fi++)
4595              {              {
4596                int chartype;
4597              RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4598              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4599              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
# Line 4449  for (;;) Line 4603  for (;;)
4603                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4604                }                }
4605              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4606              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
4607              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
4608                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
4609                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
4610                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4611              }              }
4612            /* Control never gets here */            /* Control never gets here */
# Line 4469  for (;;) Line 4623  for (;;)
4623                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4624                }                }
4625              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4626              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
             if ((prop_category == prop_value) == prop_fail_result)  
4627                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4628              }              }
4629            /* Control never gets here */            /* Control never gets here */
# Line 4487  for (;;) Line 4640  for (;;)
4640                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4641                }                }
4642              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4643              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
             if ((prop_chartype == prop_value) == prop_fail_result)  
4644                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4645              }              }
4646            /* Control never gets here */            /* Control never gets here */
# Line 4505  for (;;) Line 4657  for (;;)
4657                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4658                }                }
4659              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4660              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
             if ((prop_script == prop_value) == prop_fail_result)  
4661                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4662              }              }
4663            /* Control never gets here */            /* Control never gets here */
# Line 4514  for (;;) Line 4665  for (;;)
4665            case PT_ALNUM:            case PT_ALNUM:
4666            for (fi = min;; fi++)            for (fi = min;; fi++)
4667              {              {
4668                int category;
4669              RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4670              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4671              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
# Line 4523  for (;;) Line 4675  for (;;)
4675                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4676                }                }
4677              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4678              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4679              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                    == prop_fail_result)  
4680                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4681              }              }
4682            /* Control never gets here */            /* Control never gets here */
# Line 4542  for (;;) Line 4693  for (;;)
4693                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4694                }                }
4695              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4696              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
4697                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
4698                     == prop_fail_result)                     == prop_fail_result)
4699                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4562  for (;;) Line 4712  for (;;)
4712                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4713                }                }
4714              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4715              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
4716                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4717                     == prop_fail_result)                     == prop_fail_result)
4718                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4573  for (;;) Line 4722  for (;;)
4722            case PT_WORD:            case PT_WORD:
4723            for (fi = min;; fi++)            for (fi = min;; fi++)
4724              {              {
4725                int category;
4726              RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4727              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4728              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
# Line 4582  for (;;) Line 4732  for (;;)
4732                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4733                }                }
4734              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4735              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
4736              if ((prop_category == ucp_L ||              if ((category == ucp_L ||
4737                   prop_category == ucp_N ||                   category == ucp_N ||
4738                   c == CHAR_UNDERSCORE)                   c == CHAR_UNDERSCORE)
4739                     == prop_fail_result)                     == prop_fail_result)
4740                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4614  for (;;) Line 4764  for (;;)
4764              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4765              }              }
4766            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4767            prop_category = UCD_CATEGORY(c);            if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);  
4768            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4769              {              {
4770              int len = 1;              int len = 1;
4771              if (!utf8) c = *eptr;              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4772                else { GETCHARLEN(c, eptr, len); }              if (UCD_CATEGORY(c) != ucp_M) break;
             prop_category = UCD_CATEGORY(c);  
             if (prop_category != ucp_M) break;  
4773              eptr += len;              eptr += len;
4774              }              }
4775            }            }
4776          }          }
   
4777        else        else
4778  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
4779    
# Line 4948  for (;;) Line 5094  for (;;)
5094            case PT_LAMP:            case PT_LAMP:
5095            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5096              {              {
5097                int chartype;
5098              int len = 1;              int len = 1;
5099              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
5100                {                {
# Line 4955  for (;;) Line 5102  for (;;)
5102                break;                break;
5103                }                }
5104              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5105              prop_chartype = UCD_CHARTYPE(c);              chartype = UCD_CHARTYPE(c);
5106              if ((prop_chartype == ucp_Lu ||              if ((chartype == ucp_Lu ||
5107                   prop_chartype == ucp_Ll ||                   chartype == ucp_Ll ||
5108                   prop_chartype == ucp_Lt) == prop_fail_result)                   chartype == ucp_Lt) == prop_fail_result)
5109                break;                break;
5110              eptr+= len;              eptr+= len;
5111              }              }
# Line 4974  for (;;) Line 5121  for (;;)
5121                break;                break;
5122                }                }
5123              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5124              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
             if ((prop_category == prop_value) == prop_fail_result)  
               break;  
5125              eptr+= len;              eptr+= len;
5126              }              }
5127            break;            break;
# Line 4991  for (;;) Line 5136  for (;;)
5136                break;                break;
5137                }                }
5138              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5139              prop_chartype = UCD_CHARTYPE(c);              if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
             if ((prop_chartype == prop_value) == prop_fail_result)  
               break;  
5140              eptr+= len;              eptr+= len;
5141              }              }
5142            break;            break;
# Line 5008  for (;;) Line 5151  for (;;)
5151                break;                break;
5152                }                }
5153              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5154              prop_script = UCD_SCRIPT(c);              if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
             if ((prop_script == prop_value) == prop_fail_result)  
               break;  
5155              eptr+= len;              eptr+= len;
5156              }              }
5157            break;            break;
# Line 5018  for (;;) Line 5159  for (;;)
5159            case PT_ALNUM:            case PT_ALNUM:
5160            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5161              {              {
5162                int category;
5163              int len = 1;              int len = 1;
5164              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
5165                {                {
# Line 5025  for (;;) Line 5167  for (;;)
5167                break;                break;
5168                }                }
5169              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5170              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
5171              if ((prop_category == ucp_L || prop_category == ucp_N)              if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                  == prop_fail_result)  
5172                break;                break;
5173              eptr+= len;              eptr+= len;
5174              }              }
# Line 5043  for (;;) Line 5184  for (;;)
5184                break;                break;
5185                }                }
5186              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5187              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
5188                   c == CHAR_FF || c == CHAR_CR)                   c == CHAR_FF || c == CHAR_CR)
5189                   == prop_fail_result)                   == prop_fail_result)
5190                break;                break;
# Line 5062  for (;;) Line 5202  for (;;)
5202                break;                break;
5203                }                }
5204              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5205              prop_category = UCD_CATEGORY(c);              if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
5206                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)                   c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5207                   == prop_fail_result)                   == prop_fail_result)
5208                break;                break;
# Line 5074  for (;;) Line 5213  for (;;)
5213            case PT_WORD:            case PT_WORD:
5214            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5215              {              {
5216                int category;
5217              int len = 1;              int len = 1;
5218              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
5219                {                {
# Line 5081  for (;;) Line 5221  for (;;)
5221                break;                break;
5222                }                }
5223              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5224              prop_category = UCD_CATEGORY(c);              category = UCD_CATEGORY(c);
5225              if ((prop_category == ucp_L || prop_category == ucp_N ||              if ((category == ucp_L || category == ucp_N ||
5226                   c == CHAR_UNDERSCORE) == prop_fail_result)                   c == CHAR_UNDERSCORE) == prop_fail_result)
5227                break;                break;
5228              eptr+= len;              eptr+= len;
# Line 5112  for (;;) Line 5252  for (;;)
5252          {          {
5253          for (i = min; i < max; i++)          for (i = min; i < max; i++)
5254            {            {
5255              int len = 1;
5256            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
5257              {              {
5258              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5259              break;              break;
5260              }              }
5261            GETCHARINCTEST(c, eptr);            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5262            prop_category = UCD_CATEGORY(c);            if (UCD_CATEGORY(c) == ucp_M) break;
5263            if (prop_category == ucp_M) break;            eptr += len;
5264            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5265              {              {
5266              int len = 1;              len = 1;
5267              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5268                {              if (UCD_CATEGORY(c) != ucp_M) break;
               GETCHARLEN(c, eptr, len);  
               }  
             prop_category = UCD_CATEGORY(c);  
             if (prop_category != ucp_M) break;  
5269              eptr += len;              eptr += len;
5270              }              }
5271            }            }
# Line 5144  for (;;) Line 5281  for (;;)
5281            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5282            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5283              {              {
             int len = 1;  
5284              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
5285                {                {
5286                BACKCHAR(eptr);                BACKCHAR(eptr);
5287                GETCHARLEN(c, eptr, len);                GETCHAR(c, eptr);
5288                }                }
5289              prop_category = UCD_CATEGORY(c);              if (UCD_CATEGORY(c) != ucp_M) break;
             if (prop_category != ucp_M) break;  
5290              eptr--;              eptr--;
5291              }              }
5292            }            }
# Line 5215  for (;;) Line 5350  for (;;)
5350                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5351                }                }
5352              }              }
5353            else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */            else
5354                {
5355                eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5356                SCHECK_PARTIAL();
5357                }
5358            break;            break;
5359    
5360            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 5436  for (;;) Line 5575  for (;;)
5575            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5576            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5577            BACKCHAR(eptr);            BACKCHAR(eptr);
5578            if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&            if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5579                eptr[-1] == '\r') eptr--;                eptr[-1] == '\r') eptr--;
5580            }            }
5581          }          }
# Line 5648  for (;;) Line 5787  for (;;)
5787            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
5788            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5789            eptr--;            eptr--;
5790            if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&            if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5791                eptr[-1] == '\r') eptr--;                eptr[-1] == '\r') eptr--;
5792            }            }
5793          }          }
# Line 5688  switch (frame->Xwhere) Line 5827  switch (frame->Xwhere)
5827    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5828    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5829    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5830    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
5831      LBL(65) LBL(66)
5832  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5833    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5834    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
# Line 5790  pcre_exec(const pcre *argument_re, const Line 5930  pcre_exec(const pcre *argument_re, const
5930    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5931    int offsetcount)    int offsetcount)
5932  {  {
5933  int rc, resetcount, ocount;  int rc, ocount, arg_offset_max;
5934  int first_byte = -1;  int first_byte = -1;
5935  int req_byte = -1;  int req_byte = -1;
5936  int req_byte2 = -1;  int req_byte2 = -1;
# Line 5826  if (re == NULL || subject == NULL || Line 5966  if (re == NULL || subject == NULL ||
5966  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5967  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5968    
5969  /* This information is for finding all the numbers associated with a given  /* These two settings are used in the code for checking a UTF-8 string that
5970  name, for condition testing. */  follows immediately afterwards. Other values in the md block are used only
5971    during "normal" pcre_exec() processing, not when the JIT support is in use,
5972    so they are set up later. */
5973    
5974    utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5975    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5976                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5977    
5978    /* Check a UTF-8 string if required. Pass back the character offset and error
5979    code for an invalid string if a results vector is available. */
5980    
5981    #ifdef SUPPORT_UTF8
5982    if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5983      {
5984      int erroroffset;
5985      int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5986      if (errorcode != 0)
5987        {
5988        if (offsetcount >= 2)
5989          {
5990          offsets[0] = erroroffset;
5991          offsets[1] = errorcode;
5992          }
5993        return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5994          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5995        }
5996    
5997      /* Check that a start_offset points to the start of a UTF-8 character. */
5998      if (start_offset > 0 && start_offset < length &&
5999          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
6000        return PCRE_ERROR_BADUTF8_OFFSET;
6001      }
6002    #endif
6003    
6004    /* If the pattern was successfully studied with JIT support, run the JIT
6005    executable instead of the rest of this function. Most options must be set at
6006    compile time for the JIT code to be usable. Fallback to the normal code path if
6007    an unsupported flag is set. In particular, JIT does not support partial
6008    matching. */
6009    
6010    #ifdef SUPPORT_JIT
6011    if (extra_data != NULL
6012        && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
6013        && extra_data->executable_jit != NULL
6014        && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
6015        && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6016                        PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
6017      return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
6018        start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6019        ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6020    #endif
6021    
6022    /* Carry on with non-JIT matching. This information is for finding all the
6023    numbers associated with a given name, for condition testing. */
6024    
6025  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
6026  md->name_count = re->name_count;  md->name_count = re->name_count;
# Line 5894  md->end_subject = md->start_subject + le Line 6087  md->end_subject = md->start_subject + le
6087  end_subject = md->end_subject;  end_subject = md->end_subject;
6088    
6089  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  
6090  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
6091  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6092    
6093    /* Some options are unpacked into BOOL variables in the hope that testing
6094    them will be faster than individual option bits. */
6095    
6096  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
6097  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
6098  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
6099  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6100  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;  
6101  md->hitend = FALSE;  md->hitend = FALSE;
6102  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
6103    
6104  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
6105    md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6106    
6107  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6108  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
# Line 5985  defined (though never set). So there's n Line 6180  defined (though never set). So there's n
6180  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6181    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
6182    
 /* Check a UTF-8 string if required. Pass back the character offset and error  
 code for an invalid string if a results vector is available. */  
   
 #ifdef SUPPORT_UTF8  
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  
   {  
   int erroroffset;  
   int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);  
   if (errorcode != 0)  
     {  
     if (offsetcount >= 2)  
       {  
       offsets[0] = erroroffset;  
       offsets[1] = errorcode;  
       }  
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;  
     }  
   
   /* Check that a start_offset points to the start of a UTF-8 character. */  
   
   if (start_offset > 0 && start_offset < length &&  
       (((USPTR)subject)[start_offset] & 0xc0) == 0x80)  
     return PCRE_ERROR_BADUTF8_OFFSET;  
   }  
 #endif  
   
6183  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
6184  hold, we get a temporary chunk of working store to use during the matching.  hold, we get a temporary chunk of working store to use during the matching.
6185  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
6186  of 3. */  of 3. */
6187    
6188  ocount = offsetcount - (offsetcount % 3);  ocount = offsetcount - (offsetcount % 3);
6189    arg_offset_max = (2*ocount)/3;
6190    
6191  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6192    {    {
# Line 6034  md->offset_max = (2*ocount)/3; Line 6203  md->offset_max = (2*ocount)/3;
6203  md->offset_overflow = FALSE;  md->offset_overflow = FALSE;
6204  md->capture_last = -1;  md->capture_last = -1;
6205    
 /* Compute the minimum number of offsets that we need to reset each time. Doing  
 this makes a huge difference to execution time when there aren't many brackets  
 in the pattern. */  
   
 resetcount = 2 + re->top_bracket * 2;  
 if (resetcount > offsetcount) resetcount = ocount;  
   
6206  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6207  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
6208  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. Also, unset the
6209    offsets for the matched string. This is really just for tidiness with callouts,
6210    in case they inspect these fields. */
6211    
6212  if (md->offset_vector != NULL)  if (md->offset_vector != NULL)
6213    {    {
6214    register int *iptr = md->offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
6215    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - re->top_bracket;
6216      if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6217    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
6218      md->offset_vector[0] = md->offset_vector[1] = -1;
6219    }    }
6220    
6221  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 6083  if ((re->flags & PCRE_REQCHSET) != 0) Line 6249  if ((re->flags & PCRE_REQCHSET) != 0)
6249    }    }
6250    
6251    
6252    
6253    
6254  /* ==========================================================================*/  /* ==========================================================================*/
6255    
6256  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6093  for(;;) Line 6261  for(;;)
6261    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
6262    USPTR new_start_match;    USPTR new_start_match;
6263    
   /* Reset the maximum number of extractions we might see. */  
   
   if (md->offset_vector != NULL)  
     {  
     register int *iptr = md->offset_vector;  
     register int *iend = iptr + resetcount;  
     while (iptr < iend) *iptr++ = -1;  
     }  
   
6264    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6265    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
6266    newline. Implement this by temporarily adjusting end_subject so that we stop    newline. Implement this by temporarily adjusting end_subject so that we stop
# Line 6207  for(;;) Line 6366  for(;;)
6366    /* The following two optimizations are disabled for partial matching or if    /* The following two optimizations are disabled for partial matching or if
6367    disabling is explicitly requested. */    disabling is explicitly requested. */
6368    
6369    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6370      {      {
6371      /* If the pattern was studied, a minimum subject length may be set. This is      /* If the pattern was studied, a minimum subject length may be set. This is
6372      a lower bound; no actual string of that length may actually match the      a lower bound; no actual string of that length may actually match the
# Line 6290  for(;;) Line 6449  for(;;)
6449    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
6450    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6451    md->match_call_count = 0;    md->match_call_count = 0;
6452    md->match_function_type = 0;    md->match_function_type = 0;
6453      md->end_offset_top = 0;
6454    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);
6455    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6456    
# Line 6401  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6561  if (rc == MATCH_MATCH || rc == MATCH_ACC
6561    {    {
6562    if (using_temporary_offsets)    if (using_temporary_offsets)
6563      {      {
6564      if (offsetcount >= 4)      if (arg_offset_max >= 4)
6565        {        {
6566        memcpy(offsets + 2, md->offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
6567          (offsetcount - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6568        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6569        }        }
6570      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6571      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6572      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
6573      }      }
6574    
6575    /* Set the return code to the number of captured strings, or 0 if there are    /* Set the return code to the number of captured strings, or 0 if there were
6576    too many to fit into the vector. */    too many to fit into the vector. */
6577    
6578    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6579        0 : md->end_offset_top/2;
6580    
6581      /* If there is space in the offset vector, set any unused pairs at the end of
6582      the pattern to -1 for backwards compatibility. It is documented that this
6583      happens. In earlier versions, the whole set of potential capturing offsets
6584      was set to -1 each time round the loop, but this is handled differently now.
6585      "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6586      those at the end that need unsetting here. We can't just unset them all at
6587      the start of the whole thing because they may get set in one branch that is
6588      not the final matching branch. */
6589    
6590      if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
6591        {
6592        register int *iptr, *iend;
6593        int resetcount = 2 + re->top_bracket * 2;
6594        if (resetcount > offsetcount) resetcount = ocount;
6595        iptr = offsets + md->end_offset_top;
6596        iend = offsets + resetcount;
6597        while (iptr < iend) *iptr++ = -1;
6598        }
6599    
6600    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of
6601    md->start_match_ptr might be modified if \K was encountered on the success    md->start_match_ptr might be modified if \K was encountered on the success

Legend:
Removed from v.609  
changed lines
  Added in v.742

  ViewVC Help
Powered by ViewVC 1.1.5