/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 208 by ph10, Mon Aug 6 15:23:29 2007 UTC revision 210 by ph10, Wed Aug 8 14:24:50 2007 UTC
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 210  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 218  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
218         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
219         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
220         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
221         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
222           RM51,  RM52, RM53 };
223    
224    
225  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
# Line 612  for (;;) Line 621  for (;;)
621    
622    switch(op)    switch(op)
623      {      {
624        case OP_FAIL:
625        return MATCH_NOMATCH;
626    
627        case OP_PRUNE:
628        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
629          ims, eptrb, flags, RM51);
630        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
631        return MATCH_PRUNE;
632    
633        case OP_COMMIT:
634        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
635          ims, eptrb, flags, RM52);
636        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
637        return MATCH_COMMIT;
638    
639        case OP_SKIP:
640        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
641          ims, eptrb, flags, RM53);
642        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
643        md->start_match_ptr = eptr;   /* Pass back current position */
644        return MATCH_SKIP;
645    
646        case OP_THEN:
647        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
648          ims, eptrb, flags, RM53);
649        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
650        return MATCH_THEN;
651    
652      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
653      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
654      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 653  for (;;) Line 690  for (;;)
690          {          {
691          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
692            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
693          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
694          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
695          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
696          }          }
# Line 712  for (;;) Line 749  for (;;)
749    
750        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
751          eptrb, flags, RM2);          eptrb, flags, RM2);
752        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
753        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
754        }        }
755      /* Control never reaches here. */      /* Control never reaches here. */
# Line 760  for (;;) Line 797  for (;;)
797          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
798          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
799          }          }
800        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
801          {          {
802          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
803          }          }
# Line 797  for (;;) Line 834  for (;;)
834      break;      break;
835    
836    
837      /* End of the pattern. If we are in a top-level recursion, we should      /* End of the pattern, either real or forced. If we are in a top-level
838      restore the offsets appropriately and continue from after the call. */      recursion, we should restore the offsets appropriately and continue from
839        after the call. */
840    
841        case OP_ACCEPT:
842      case OP_END:      case OP_END:
843      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
844        {        {
# Line 820  for (;;) Line 859  for (;;)
859      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
860      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
861      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
862      md->start_match_ptr = mstart;  /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
863      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
864    
865      /* Change option settings */      /* Change option settings */
# Line 844  for (;;) Line 883  for (;;)
883        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
884          RM4);          RM4);
885        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
886        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
887        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
888        }        }
889      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 871  for (;;) Line 910  for (;;)
910        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
911          RM5);          RM5);
912        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
913        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
914        ecode += GET(ecode,1);        ecode += GET(ecode,1);
915        }        }
916      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1008  for (;;) Line 1047  for (;;)
1047              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1048            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1049            }            }
1050          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1051            {            {
1052            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1053            RRETURN(rrc);            RRETURN(rrc);
# Line 1044  for (;;) Line 1083  for (;;)
1083        {        {
1084        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1085        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1086        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1087        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1088        }        }
1089      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 4547  the loop runs just once. */ Line 4586  the loop runs just once. */
4586  for(;;)  for(;;)
4587    {    {
4588    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4589      USPTR new_start_match;
4590    
4591    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4592    
# Line 4687  for(;;) Line 4727  for(;;)
4727    
4728    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4729    
4730    md->start_match_ptr = start_match;      /* Insurance */    md->start_match_ptr = start_match;
4731    md->match_call_count = 0;    md->match_call_count = 0;
4732    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4733    
4734    /* Any return other than MATCH_NOMATCH breaks the loop. */    switch(rc)
4735        {
4736    if (rc != MATCH_NOMATCH) break;      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4737        exactly like PRUNE. */
4738    
4739        case MATCH_NOMATCH:
4740        case MATCH_PRUNE:
4741        case MATCH_THEN:
4742        new_start_match = start_match + 1;
4743    #ifdef SUPPORT_UTF8
4744        if (utf8)
4745          while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4746            new_start_match++;
4747    #endif
4748        break;
4749    
4750        /* SKIP passes back the next starting point explicitly. */
4751    
4752        case MATCH_SKIP:
4753        new_start_match = md->start_match_ptr;
4754        break;
4755    
4756        /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4757    
4758        case MATCH_COMMIT:
4759        rc = MATCH_NOMATCH;
4760        goto ENDLOOP;
4761    
4762        /* Any other return is some kind of error. */
4763    
4764        default:
4765        goto ENDLOOP;
4766        }
4767    
4768      /* Control reaches here for the various types of "no match at this point"
4769      result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4770    
4771      rc = MATCH_NOMATCH;
4772    
4773    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4774    newline in the subject (though it may continue over the newline). Therefore,    newline in the subject (though it may continue over the newline). Therefore,
# Line 4701  for(;;) Line 4776  for(;;)
4776    
4777    if (firstline && IS_NEWLINE(start_match)) break;    if (firstline && IS_NEWLINE(start_match)) break;
4778    
4779    /* Advance the match position by one character. */    /* Advance to new matching position */
4780    
4781    start_match++;    start_match = new_start_match;
 #ifdef SUPPORT_UTF8  
   if (utf8)  
     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)  
       start_match++;  
 #endif  
4782    
4783    /* Break the loop if the pattern is anchored or if we have passed the end of    /* Break the loop if the pattern is anchored or if we have passed the end of
4784    the subject. */    the subject. */
# Line 4734  for(;;) Line 4804  for(;;)
4804  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4805  conditions is true:  conditions is true:
4806    
4807  (1) The pattern is anchored;  (1) The pattern is anchored or the match was failed by (*COMMIT);
4808    
4809  (2) We are past the end of the subject;  (2) We are past the end of the subject;
4810    
# Line 4749  processing, copy those that we can. In t Line 4819  processing, copy those that we can. In t
4819  certain parts of the pattern were not used, even though there are more  certain parts of the pattern were not used, even though there are more
4820  capturing parentheses than vector slots. */  capturing parentheses than vector slots. */
4821    
4822    ENDLOOP:
4823    
4824  if (rc == MATCH_MATCH)  if (rc == MATCH_MATCH)
4825    {    {
4826    if (using_temporary_offsets)    if (using_temporary_offsets)

Legend:
Removed from v.208  
changed lines
  Added in v.210

  ViewVC Help
Powered by ViewVC 1.1.5