/[pcre]/code/branches/pcre16/pcre_exec.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 178 by ph10, Wed Jun 13 08:44:34 2007 UTC revision 211 by ph10, Thu Aug 9 09:52:43 2007 UTC
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include <config.h>
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 212  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 218  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
218         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
219         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
220         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
221         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
222           RM51,  RM52, RM53 };
223    
224    
225  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
# Line 384  Arguments: Line 391  Arguments:
391                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
392                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
393                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
394     rdepth      the recursion depth     rdepth      the recursion depth
395    
396  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 586  original_ims = ims;    /* Save for reset Line 592  original_ims = ims;    /* Save for reset
592  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
593  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
594  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
595  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
596  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
597  already used. */  block that is used is on the stack, so a new one may be required for each
598    match(). */
599    
600  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
601    {    {
602    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
603    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
604      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
605    }    }
606    
607  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 621  for (;;) Line 621  for (;;)
621    
622    switch(op)    switch(op)
623      {      {
624        case OP_FAIL:
625        return MATCH_NOMATCH;
626    
627        case OP_PRUNE:
628        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
629          ims, eptrb, flags, RM51);
630        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
631        return MATCH_PRUNE;
632    
633        case OP_COMMIT:
634        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
635          ims, eptrb, flags, RM52);
636        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
637        return MATCH_COMMIT;
638    
639        case OP_SKIP:
640        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
641          ims, eptrb, flags, RM53);
642        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
643        md->start_match_ptr = eptr;   /* Pass back current position */
644        return MATCH_SKIP;
645    
646        case OP_THEN:
647        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
648          ims, eptrb, flags, RM53);
649        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
650        return MATCH_THEN;
651    
652      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
653      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
654      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 662  for (;;) Line 690  for (;;)
690          {          {
691          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
692            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
693          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
694          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
695          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
696          }          }
# Line 677  for (;;) Line 705  for (;;)
705        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
706        }        }
707    
708      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
709      bracket. */      as a non-capturing bracket. */
710    
711        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
712        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
713    
714      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
715    
716        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
717        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
718    
719      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
720      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
721      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
722      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
723        is set.*/
724    
725      case OP_BRA:      case OP_BRA:
726      case OP_SBRA:      case OP_SBRA:
# Line 693  for (;;) Line 728  for (;;)
728      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
729      for (;;)      for (;;)
730        {        {
731        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
732          {          {
733          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
734          flags |= match_tail_recursed;            {
735          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
736          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
737              goto TAIL_RECURSE;
738              }
739    
740            /* Possibly empty group; can't use tail recursion. */
741    
742            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
743              eptrb, flags, RM48);
744            RRETURN(rrc);
745          }          }
746    
747        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 706  for (;;) Line 749  for (;;)
749    
750        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
751          eptrb, flags, RM2);          eptrb, flags, RM2);
752        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
753        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
754        }        }
755      /* Control never reaches here. */      /* Control never reaches here. */
# Line 754  for (;;) Line 797  for (;;)
797          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
798          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
799          }          }
800        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
801          {          {
802          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
803          }          }
# Line 766  for (;;) Line 809  for (;;)
809        }        }
810    
811      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
812      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
813      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
814        group. If the second alternative doesn't exist, we can just plough on. */
815    
816      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
817        {        {
818        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
819        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
820        goto TAIL_RECURSE;          {
821            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
822            RRETURN(rrc);
823            }
824          else                       /* Group must match something */
825            {
826            flags = 0;
827            goto TAIL_RECURSE;
828            }
829        }        }
830      else      else                         /* Condition false & no 2nd alternative */
831        {        {
832        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
833        }        }
834      break;      break;
835    
836    
837      /* End of the pattern. If we are in a top-level recursion, we should      /* End of the pattern, either real or forced. If we are in a top-level
838      restore the offsets appropriately and continue from after the call. */      recursion, we should restore the offsets appropriately and continue from
839        after the call. */
840    
841        case OP_ACCEPT:
842      case OP_END:      case OP_END:
843      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
844        {        {
# Line 805  for (;;) Line 859  for (;;)
859      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
860      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
861      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
862      md->start_match_ptr = mstart;  /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
863      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
864    
865      /* Change option settings */      /* Change option settings */
# Line 829  for (;;) Line 883  for (;;)
883        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
884          RM4);          RM4);
885        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
886        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
887        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
888        }        }
889      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 856  for (;;) Line 910  for (;;)
910        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
911          RM5);          RM5);
912        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
913        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
914        ecode += GET(ecode,1);        ecode += GET(ecode,1);
915        }        }
916      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 880  for (;;) Line 934  for (;;)
934          {          {
935          eptr--;          eptr--;
936          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
937          BACKCHAR(eptr)          BACKCHAR(eptr);
938          }          }
939        }        }
940      else      else
# Line 993  for (;;) Line 1047  for (;;)
1047              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1048            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1049            }            }
1050          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1051            {            {
1052            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1053            RRETURN(rrc);            RRETURN(rrc);
# Line 1027  for (;;) Line 1081  for (;;)
1081    
1082      do      do
1083        {        {
1084        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, 0, RM7);  
1085        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1086        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1087        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1088        }        }
1089      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1073  for (;;) Line 1126  for (;;)
1126    
1127      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1128        {        {
1129        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
         RM8);  
1130        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1131        ecode = prev;        ecode = prev;
1132        flags = match_tail_recursed;        flags = 0;
1133        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1134        }        }
1135      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1085  for (;;) Line 1137  for (;;)
1137        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1138        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1139        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1140        flags = match_tail_recursed;        flags = 0;
1141        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1142        }        }
1143      /* Control never gets here */      /* Control never gets here */
# Line 1216  for (;;) Line 1268  for (;;)
1268    
1269      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1270      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1271      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1272        unlimited repeat of a group that can match an empty string. */
1273    
1274      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1275    
1276      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1277        {        {
1278        RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
         RM12);  
1279        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1280          if (flags != 0)    /* Could match an empty string */
1281            {
1282            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1283            RRETURN(rrc);
1284            }
1285        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1286        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1287        }        }
1288      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1234  for (;;) Line 1290  for (;;)
1290        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1291        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1292        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1293        flags = match_tail_recursed;        flags = 0;
1294        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1295        }        }
1296      /* Control never gets here */      /* Control never gets here */
# Line 2033  for (;;) Line 2089  for (;;)
2089            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2090            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2091            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2092            BACKCHAR(eptr)            BACKCHAR(eptr);
2093            }            }
2094          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2095          }          }
# Line 2786  for (;;) Line 2842  for (;;)
2842            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2843              {              {
2844              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2845              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2846              }              }
2847            break;            break;
2848    
# Line 2794  for (;;) Line 2850  for (;;)
2850            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2851              {              {
2852              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2853              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2854              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2855              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2856                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 2807  for (;;) Line 2863  for (;;)
2863            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2864              {              {
2865              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2866              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2867              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2868              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2869                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2818  for (;;) Line 2874  for (;;)
2874            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2875              {              {
2876              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2877              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2878              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2879              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2880                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2829  for (;;) Line 2885  for (;;)
2885            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2886              {              {
2887              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2888              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2889              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2890              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2891                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2941  for (;;) Line 2997  for (;;)
2997              }              }
2998            }            }
2999          break;          break;
3000    
3001          case OP_HSPACE:          case OP_HSPACE:
3002          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3003            {            {
# Line 2973  for (;;) Line 3029  for (;;)
3029              }              }
3030            }            }
3031          break;          break;
3032    
3033          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3034          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3035            {            {
# Line 2993  for (;;) Line 3049  for (;;)
3049              }              }
3050            }            }
3051          break;          break;
3052    
3053          case OP_VSPACE:          case OP_VSPACE:
3054          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3055            {            {
# Line 3009  for (;;) Line 3065  for (;;)
3065              case 0x85:      /* NEL */              case 0x85:      /* NEL */
3066              case 0x2028:    /* LINE SEPARATOR */              case 0x2028:    /* LINE SEPARATOR */
3067              case 0x2029:    /* PARAGRAPH SEPARATOR */              case 0x2029:    /* PARAGRAPH SEPARATOR */
3068              break;              break;
3069              }              }
3070            }            }
3071          break;          break;
# Line 3150  for (;;) Line 3206  for (;;)
3206              case 0x09:      /* HT */              case 0x09:      /* HT */
3207              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
3208              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
3209              break;              break;
3210              }              }
3211            }            }
3212          break;          break;
# Line 3184  for (;;) Line 3240  for (;;)
3240              case 0x0c:      /* FF */              case 0x0c:      /* FF */
3241              case 0x0d:      /* CR */              case 0x0d:      /* CR */
3242              case 0x85:      /* NEL */              case 0x85:      /* NEL */
3243              break;              break;
3244              }              }
3245            }            }
3246          break;          break;
# Line 3702  for (;;) Line 3758  for (;;)
3758            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3759            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3760            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3761            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
3762            }            }
3763          }          }
3764    
# Line 3741  for (;;) Line 3797  for (;;)
3797            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
3798              {              {
3799              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
3800              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
3801                {                {
3802                  BACKCHAR(eptr);
3803                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3804                }                }
3805              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
# Line 3764  for (;;) Line 3820  for (;;)
3820          switch(ctype)          switch(ctype)
3821            {            {
3822            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
3823            if (max < INT_MAX)            if (max < INT_MAX)
3824              {              {
3825              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
# Line 3801  for (;;) Line 3852  for (;;)
3852                  {                  {
3853                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3854                  eptr++;                  eptr++;
3855                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3856                  }                  }
               break;  
3857                }                }
3858              else              else
3859                {                {
3860                c = max - min;                eptr = md->end_subject;
               if (c > (unsigned int)(md->end_subject - eptr))  
                 c = md->end_subject - eptr;  
               eptr += c;  
3861                }                }
3862              }              }
3863            break;            break;
# Line 3845  for (;;) Line 3893  for (;;)
3893            break;            break;
3894    
3895            case OP_NOT_HSPACE:            case OP_NOT_HSPACE:
3896            case OP_HSPACE:            case OP_HSPACE:
3897            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3898              {              {
3899              BOOL gotspace;              BOOL gotspace;
3900              int len = 1;              int len = 1;
3901              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3902              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3903              switch(c)              switch(c)
3904                {                {
3905                default: gotspace = FALSE; break;                default: gotspace = FALSE; break;
3906                case 0x09:      /* HT */                case 0x09:      /* HT */
3907                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
3908                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
# Line 3875  for (;;) Line 3923  for (;;)
3923                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3924                case 0x3000:    /* IDEOGRAPHIC SPACE */                case 0x3000:    /* IDEOGRAPHIC SPACE */
3925                gotspace = TRUE;                gotspace = TRUE;
3926                break;                break;
3927                }                }
3928              if (gotspace == (ctype == OP_NOT_HSPACE)) break;              if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3929              eptr += len;              eptr += len;
# Line 3883  for (;;) Line 3931  for (;;)
3931            break;            break;
3932    
3933            case OP_NOT_VSPACE:            case OP_NOT_VSPACE:
3934            case OP_VSPACE:            case OP_VSPACE:
3935            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3936              {              {
3937              BOOL gotspace;              BOOL gotspace;
3938              int len = 1;              int len = 1;
3939              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3940              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3941              switch(c)              switch(c)
3942                {                {
3943                default: gotspace = FALSE; break;                default: gotspace = FALSE; break;
3944                case 0x0a:      /* LF */                case 0x0a:      /* LF */
3945                case 0x0b:      /* VT */                case 0x0b:      /* VT */
3946                case 0x0c:      /* FF */                case 0x0c:      /* FF */
# Line 3903  for (;;) Line 3951  for (;;)
3951                gotspace = TRUE;                gotspace = TRUE;
3952                break;                break;
3953                }                }
3954              if (gotspace == (ctype == OP_NOT_VSPACE)) break;              if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3955              eptr += len;              eptr += len;
3956              }              }
3957            break;            break;
# Line 3990  for (;;) Line 4038  for (;;)
4038            }            }
4039          }          }
4040        else        else
4041  #endif  #endif  /* SUPPORT_UTF8 */
4042    
4043        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4044          {          {
# Line 4040  for (;;) Line 4088  for (;;)
4088              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4089              c = *eptr;              c = *eptr;
4090              if (c == 0x09 || c == 0x20 || c == 0xa0) break;              if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4091              eptr++;              eptr++;
4092              }              }
4093            break;            break;
4094    
# Line 4050  for (;;) Line 4098  for (;;)
4098              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4099              c = *eptr;              c = *eptr;
4100              if (c != 0x09 && c != 0x20 && c != 0xa0) break;              if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4101              eptr++;              eptr++;
4102              }              }
4103            break;            break;
4104    
# Line 4061  for (;;) Line 4109  for (;;)
4109              c = *eptr;              c = *eptr;
4110              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4111                break;                break;
4112              eptr++;              eptr++;
4113              }              }
4114            break;            break;
4115    
# Line 4298  const uschar *start_bits = NULL; Line 4346  const uschar *start_bits = NULL;
4346  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4347  USPTR end_subject;  USPTR end_subject;
4348  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
4349    
4350  pcre_study_data internal_study;  pcre_study_data internal_study;
4351  const pcre_study_data *study;  const pcre_study_data *study;
# Line 4384  md->partial = (options & PCRE_PARTIAL) ! Line 4431  md->partial = (options & PCRE_PARTIAL) !
4431  md->hitend = FALSE;  md->hitend = FALSE;
4432    
4433  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
4434    
4435  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4436  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
# Line 4540  the loop runs just once. */ Line 4586  the loop runs just once. */
4586  for(;;)  for(;;)
4587    {    {
4588    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4589      USPTR new_start_match;
4590    
4591    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4592    
# Line 4680  for(;;) Line 4727  for(;;)
4727    
4728    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4729    
4730    md->start_match_ptr = start_match;      /* Insurance */    md->start_match_ptr = start_match;
4731    md->match_call_count = 0;    md->match_call_count = 0;
4732    md->eptrn = 0;                          /* Next free eptrchain slot */    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
   rc = match(start_match, md->start_code, start_match, 2, md,  
     ims, NULL, 0, 0);  
4733    
4734    /* Any return other than MATCH_NOMATCH breaks the loop. */    switch(rc)
4735        {
4736        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4737        exactly like PRUNE. */
4738    
4739        case MATCH_NOMATCH:
4740        case MATCH_PRUNE:
4741        case MATCH_THEN:
4742        new_start_match = start_match + 1;
4743    #ifdef SUPPORT_UTF8
4744        if (utf8)
4745          while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4746            new_start_match++;
4747    #endif
4748        break;
4749    
4750        /* SKIP passes back the next starting point explicitly. */
4751    
4752        case MATCH_SKIP:
4753        new_start_match = md->start_match_ptr;
4754        break;
4755    
4756        /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4757    
4758    if (rc != MATCH_NOMATCH) break;      case MATCH_COMMIT:
4759        rc = MATCH_NOMATCH;
4760        goto ENDLOOP;
4761    
4762        /* Any other return is some kind of error. */
4763    
4764        default:
4765        goto ENDLOOP;
4766        }
4767    
4768      /* Control reaches here for the various types of "no match at this point"
4769      result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4770    
4771      rc = MATCH_NOMATCH;
4772    
4773    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4774    newline in the subject (though it may continue over the newline). Therefore,    newline in the subject (though it may continue over the newline). Therefore,
# Line 4696  for(;;) Line 4776  for(;;)
4776    
4777    if (firstline && IS_NEWLINE(start_match)) break;    if (firstline && IS_NEWLINE(start_match)) break;
4778    
4779    /* Advance the match position by one character. */    /* Advance to new matching position */
4780    
4781    start_match++;    start_match = new_start_match;
 #ifdef SUPPORT_UTF8  
   if (utf8)  
     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)  
       start_match++;  
 #endif  
4782    
4783    /* Break the loop if the pattern is anchored or if we have passed the end of    /* Break the loop if the pattern is anchored or if we have passed the end of
4784    the subject. */    the subject. */
# Line 4729  for(;;) Line 4804  for(;;)
4804  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4805  conditions is true:  conditions is true:
4806    
4807  (1) The pattern is anchored;  (1) The pattern is anchored or the match was failed by (*COMMIT);
4808    
4809  (2) We are past the end of the subject;  (2) We are past the end of the subject;
4810    
# Line 4744  processing, copy those that we can. In t Line 4819  processing, copy those that we can. In t
4819  certain parts of the pattern were not used, even though there are more  certain parts of the pattern were not used, even though there are more
4820  capturing parentheses than vector slots. */  capturing parentheses than vector slots. */
4821    
4822    ENDLOOP:
4823    
4824  if (rc == MATCH_MATCH)  if (rc == MATCH_MATCH)
4825    {    {
4826    if (using_temporary_offsets)    if (using_temporary_offsets)

Legend:
Removed from v.178  
changed lines
  Added in v.211

  ViewVC Help
Powered by ViewVC 1.1.5