/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 604 by ph10, Thu Jun 2 19:04:54 2011 UTC revision 609 by ph10, Wed Jun 15 18:09:23 2011 UTC
# Line 276  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 276  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
276         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
277         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
278         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
279         RM61,  RM62, RM63, RM64 };         RM61,  RM62, RM63};
280    
281  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
282  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 858  for (;;) Line 858  for (;;)
858        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
859        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
860    
861        if (rrc != MATCH_THEN) md->mark = markptr;        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
862        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
863        }        }
864    
# Line 875  for (;;) Line 875  for (;;)
875    
876      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop
877      for all the alternatives. When we get to the final alternative within the      for all the alternatives. When we get to the final alternative within the
878      brackets, we would return the result of a recursive call to match()      brackets, we used to return the result of a recursive call to match()
879      whatever happened. We can reduce stack usage by turning this into a tail      whatever happened so it was possible to reduce stack usage by turning this
880      recursion, except in the case of a possibly empty group.*/      into a tail recursion, except in the case of a possibly empty group.
881        However, now that there is the possiblity of (*THEN) occurring in the final
882        alternative, this optimization is no longer possible. */
883    
884      case OP_BRA:      case OP_BRA:
885      case OP_SBRA:      case OP_SBRA:
886      DPRINTF(("start non-capturing bracket\n"));      DPRINTF(("start non-capturing bracket\n"));
887      for (;;)      for (;;)
888        {        {
       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */  
         {  
         if (op >= OP_SBRA)   /* Possibly empty group */  
           {  
           md->match_function_type = MATCH_CBEGROUP;  
           RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,  
             RM48);  
           if (rrc == MATCH_NOMATCH) md->mark = markptr;  
           RRETURN(rrc);  
           }  
         /* Not a possibly empty group; use tail recursion */  
         ecode += _pcre_OP_lengths[*ecode];  
         DPRINTF(("bracket 0 tail recursion\n"));  
         goto TAIL_RECURSE;  
         }  
   
       /* For non-final alternatives, continue the loop for a NOMATCH result;  
       otherwise return. */  
   
889        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
890        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
891          RM2);          RM2);
# Line 910  for (;;) Line 893  for (;;)
893            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
894          RRETURN(rrc);          RRETURN(rrc);
895        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
896          if (*ecode != OP_ALT) break;
897        }        }
898      /* Control never reaches here. */  
899        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
900        RRETURN(MATCH_NOMATCH);
901    
902      /* Handle possessive capturing brackets with an unlimited repeat. We come      /* Handle possessive capturing brackets with an unlimited repeat. We come
903      here from BRAZERO with allow_zero set TRUE. The offset_vector values are      here from BRAZERO with allow_zero set TRUE. The offset_vector values are
# Line 988  for (;;) Line 974  for (;;)
974          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
975          }          }
976    
977        if (rrc != MATCH_THEN) md->mark = markptr;        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
978        if (allow_zero || matched_once)        if (allow_zero || matched_once)
979          {          {
980          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
# Line 1026  for (;;) Line 1012  for (;;)
1012        {        {
1013        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1014        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
1015          eptrb, RM64);          eptrb, RM48);
1016        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1017          {          {
1018          eptr = md->end_match_ptr;          eptr = md->end_match_ptr;
# Line 1053  for (;;) Line 1039  for (;;)
1039      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
1040      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
1041      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
1042      exactly what going to the ket would do. As there is only one branch to be      exactly what going to the ket would do. */
     obeyed, we can use tail recursion to avoid using another stack frame. */  
1043    
1044      case OP_COND:      case OP_COND:
1045      case OP_SCOND:      case OP_SCOND:
# Line 1259  for (;;) Line 1244  for (;;)
1244        }        }
1245    
1246      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1247      we can use tail recursion to avoid using another stack frame, except when      we used to use tail recursion to avoid using another stack frame, except
1248      we have an unlimited repeat of a possibly empty group. If the second      when there was unlimited repeat of a possibly empty group. However, that
1249      alternative doesn't exist, we can just plough on. */      strategy no longer works because of the possibilty of (*THEN) being
1250        encountered in the branch. A recursive call to match() is always required,
1251        unless the second alternative doesn't exist, in which case we can just
1252        plough on. */
1253    
1254      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1255        {        {
1256        ecode += 1 + LINK_SIZE;        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;
1257        if (op == OP_SCOND)        /* Possibly empty group */        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1258          {        if (rrc == MATCH_THEN && md->start_match_ptr == ecode)
1259          md->match_function_type = MATCH_CBEGROUP;          rrc = MATCH_NOMATCH;
1260          RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);        RRETURN(rrc);
         RRETURN(rrc);  
         }  
       else goto TAIL_RECURSE;  
1261        }        }
1262      else                         /* Condition false & no alternative */      else                         /* Condition false & no alternative */
1263        {        {
# Line 1305  for (;;) Line 1290  for (;;)
1290      break;      break;
1291    
1292    
1293      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a recursion, we
1294      recursion, we should restore the offsets appropriately and continue from      should restore the offsets appropriately, and if it's a top-level
1295      after the call. */      recursion, continue from after the call. */
1296    
1297      case OP_ACCEPT:      case OP_ACCEPT:
1298      case OP_END:      case OP_END:
1299      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL)
1300        {        {
1301        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
       DPRINTF(("End of pattern in a (?0) recursion\n"));  
1302        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1303        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1304          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1305        offset_top = rec->save_offset_top;        offset_top = rec->save_offset_top;
1306        ecode = rec->after_call;        if (rec->group_num == 0)
1307        break;          {
1308            ecode = rec->after_call;
1309            break;
1310            }
1311        }        }
1312    
1313      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
# Line 1328  for (;;) Line 1315  for (;;)
1315      the subject. In both cases, backtracking will then try other alternatives,      the subject. In both cases, backtracking will then try other alternatives,
1316      if any. */      if any. */
1317    
1318      if (eptr == mstart &&      else if (eptr == mstart &&
1319          (md->notempty ||          (md->notempty ||
1320            (md->notempty_atstart &&            (md->notempty_atstart &&
1321              mstart == md->start_subject + md->start_offset)))              mstart == md->start_subject + md->start_offset)))
1322        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1323    
1324      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1325    
1326      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1327      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1328      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
# Line 1538  for (;;) Line 1525  for (;;)
1525        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1526              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1527        new_recursive.save_offset_top = offset_top;        new_recursive.save_offset_top = offset_top;
1528    
1529        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1530        restore the offset and recursion data. */        restore the offset and recursion data. */
1531    
# Line 5701  switch (frame->Xwhere) Line 5688  switch (frame->Xwhere)
5688    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5689    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5690    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5691    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63)
5692  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5693    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5694    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
# Line 5999  if (md->partial && (re->flags & PCRE_NOP Line 5986  if (md->partial && (re->flags & PCRE_NOP
5986    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
5987    
5988  /* Check a UTF-8 string if required. Pass back the character offset and error  /* Check a UTF-8 string if required. Pass back the character offset and error
5989  code if a results vector is available. */  code for an invalid string if a results vector is available. */
5990    
5991  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5992  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5993    {    {
5994    int errorcode;    int erroroffset;
5995    int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
5996    if (tb >= 0)    if (errorcode != 0)
5997      {      {
5998      if (offsetcount >= 2)      if (offsetcount >= 2)
5999        {        {
6000        offsets[0] = tb;        offsets[0] = erroroffset;
6001        offsets[1] = errorcode;        offsets[1] = errorcode;
6002        }        }
6003      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6004        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6005      }      }
6006    if (start_offset > 0 && start_offset < length)  
6007      {    /* Check that a start_offset points to the start of a UTF-8 character. */
6008      tb = ((USPTR)subject)[start_offset] & 0xc0;  
6009      if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;    if (start_offset > 0 && start_offset < length &&
6010      }        (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
6011        return PCRE_ERROR_BADUTF8_OFFSET;
6012    }    }
6013  #endif  #endif
6014    

Legend:
Removed from v.604  
changed lines
  Added in v.609

  ViewVC Help
Powered by ViewVC 1.1.5