/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 443 by ph10, Sun Sep 13 16:00:08 2009 UTC revision 455 by ph10, Sat Sep 26 19:12:32 2009 UTC
# Line 909  for (;;) Line 909  for (;;)
909        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
910        }        }
911      break;      break;
912    
913    
914        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
915        to close any currently open capturing brackets. */
916    
917        case OP_CLOSE:
918        number = GET2(ecode, 1);
919        offset = number << 1;
920    
921    #ifdef DEBUG
922          printf("end bracket %d at *ACCEPT", number);
923          printf("\n");
924    #endif
925    
926        md->capture_last = number;
927        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
928          {
929          md->offset_vector[offset] =
930            md->offset_vector[md->offset_end - number];
931          md->offset_vector[offset+1] = eptr - md->start_subject;
932          if (offset_top <= offset) offset_top = offset + 2;
933          }
934        ecode += 3;
935        break;
936    
937    
938      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a top-level
# Line 924  for (;;) Line 948  for (;;)
948        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
949        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
950          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
951          offset_top = rec->offset_top;
952        mstart = rec->save_start;        mstart = rec->save_start;
953        ims = original_ims;        ims = original_ims;
954        ecode = rec->after_call;        ecode = rec->after_call;
# Line 1115  for (;;) Line 1140  for (;;)
1140        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1141              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1142        new_recursive.save_start = mstart;        new_recursive.save_start = mstart;
1143          new_recursive.offset_top = offset_top;
1144        mstart = eptr;        mstart = eptr;
1145    
1146        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
# Line 1313  for (;;) Line 1339  for (;;)
1339        {        {
1340        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1341        offset = number << 1;        offset = number << 1;
1342    
1343  #ifdef DEBUG  #ifdef DEBUG
1344        printf("end bracket %d", number);        printf("end bracket %d", number);
1345        printf("\n");        printf("\n");
# Line 1339  for (;;) Line 1365  for (;;)
1365          mstart = rec->save_start;          mstart = rec->save_start;
1366          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1367            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1368            offset_top = rec->offset_top;
1369          ecode = rec->after_call;          ecode = rec->after_call;
1370          ims = original_ims;          ims = original_ims;
1371          break;          break;
# Line 5094  if (!anchored) Line 5121  if (!anchored)
5121      }      }
5122    else    else
5123      if (!startline && study != NULL &&      if (!startline && study != NULL &&
5124        (study->options & PCRE_STUDY_MAPPED) != 0)        (study->flags & PCRE_STUDY_MAPPED) != 0)
5125          start_bits = study->start_bits;          start_bits = study->start_bits;
5126    }    }
5127    
# Line 5220  for(;;) Line 5247  for(;;)
5247    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5248    
5249    end_subject = save_end_subject;    end_subject = save_end_subject;
5250    
5251  #ifdef DEBUG  /* Sigh. Some compilers never learn. */    /* The following two optimizations are disabled for partial matching or if
   printf(">>>> Match against: ");  
   pchars(start_match, end_subject - start_match, TRUE, md);  
   printf("\n");  
 #endif  
   
   /* If req_byte is set, we know that that character must appear in the  
   subject for the match to succeed. If the first character is set, req_byte  
   must be later in the subject; otherwise the test starts at the match point.  
   This optimization can save a huge amount of backtracking in patterns with  
   nested unlimited repeats that aren't going to match. Writing separate code  
   for cased/caseless versions makes it go faster, as does using an  
   autoincrement and backing off on a match.  
   
   HOWEVER: when the subject string is very, very long, searching to its end  
   can take a long time, and give bad performance on quite ordinary patterns.  
   This showed up when somebody was matching something like /^\d+C/ on a  
   32-megabyte string... so we don't do this when the string is sufficiently  
   long.  
   
   ALSO: this processing is disabled when partial matching is requested, or if  
5252    disabling is explicitly requested. */    disabling is explicitly requested. */
5253    
5254    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
5255        req_byte >= 0 &&      {
5256        end_subject - start_match < REQ_BYTE_MAX &&      /* If the pattern was studied, a minimum subject length may be set. This is
5257        !md->partial)      a lower bound; no actual string of that length may actually match the
5258      {      pattern. Although the value is, strictly, in characters, we treat it as
5259      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);      bytes to avoid spending too much time in this optimization. */
5260    
5261      /* We don't need to repeat the search if we haven't yet reached the      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5262      place we found it at last time. */          end_subject - start_match < study->minlength)
5263          {
5264      if (p > req_byte_ptr)        rc = MATCH_NOMATCH;
5265        {        break;
5266        if (req_byte_caseless)        }
5267    
5268        /* If req_byte is set, we know that that character must appear in the
5269        subject for the match to succeed. If the first character is set, req_byte
5270        must be later in the subject; otherwise the test starts at the match point.
5271        This optimization can save a huge amount of backtracking in patterns with
5272        nested unlimited repeats that aren't going to match. Writing separate code
5273        for cased/caseless versions makes it go faster, as does using an
5274        autoincrement and backing off on a match.
5275    
5276        HOWEVER: when the subject string is very, very long, searching to its end
5277        can take a long time, and give bad performance on quite ordinary patterns.
5278        This showed up when somebody was matching something like /^\d+C/ on a
5279        32-megabyte string... so we don't do this when the string is sufficiently
5280        long. */
5281    
5282        if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
5283          {
5284          register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
5285    
5286          /* We don't need to repeat the search if we haven't yet reached the
5287          place we found it at last time. */
5288    
5289          if (p > req_byte_ptr)
5290          {          {
5291          while (p < end_subject)          if (req_byte_caseless)
5292              {
5293              while (p < end_subject)
5294                {
5295                register int pp = *p++;
5296                if (pp == req_byte || pp == req_byte2) { p--; break; }
5297                }
5298              }
5299            else
5300            {            {
5301            register int pp = *p++;            while (p < end_subject)
5302            if (pp == req_byte || pp == req_byte2) { p--; break; }              {
5303                if (*p++ == req_byte) { p--; break; }
5304                }
5305            }            }
5306          }  
5307        else          /* If we can't find the required character, break the matching loop,
5308          {          forcing a match failure. */
5309          while (p < end_subject)  
5310            if (p >= end_subject)
5311            {            {
5312            if (*p++ == req_byte) { p--; break; }            rc = MATCH_NOMATCH;
5313              break;
5314            }            }
5315    
5316            /* If we have found the required character, save the point where we
5317            found it, so that we don't search again next time round the loop if
5318            the start hasn't passed this character yet. */
5319    
5320            req_byte_ptr = p;
5321          }          }
   
       /* If we can't find the required character, break the matching loop,  
       forcing a match failure. */  
   
       if (p >= end_subject)  
         {  
         rc = MATCH_NOMATCH;  
         break;  
         }  
   
       /* If we have found the required character, save the point where we  
       found it, so that we don't search again next time round the loop if  
       the start hasn't passed this character yet. */  
   
       req_byte_ptr = p;  
5322        }        }
5323      }      }
5324    
5325    #ifdef DEBUG  /* Sigh. Some compilers never learn. */
5326      printf(">>>> Match against: ");
5327      pchars(start_match, end_subject - start_match, TRUE, md);
5328      printf("\n");
5329    #endif
5330    
5331    /* OK, we can now run the match. If "hitend" is set afterwards, remember the    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5332    first starting point for which a partial match was found. */    first starting point for which a partial match was found. */
# Line 5326  for(;;) Line 5365  for(;;)
5365      rc = MATCH_NOMATCH;      rc = MATCH_NOMATCH;
5366      goto ENDLOOP;      goto ENDLOOP;
5367    
5368      /* Any other return is some kind of error. */      /* Any other return is either a match, or some kind of error. */
5369    
5370      default:      default:
5371      goto ENDLOOP;      goto ENDLOOP;
# Line 5408  if (rc == MATCH_MATCH) Line 5447  if (rc == MATCH_MATCH)
5447    too many to fit into the vector. */    too many to fit into the vector. */
5448    
5449    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
5450    
5451    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of
5452    md->start_match_ptr might be modified if \K was encountered on the success    md->start_match_ptr might be modified if \K was encountered on the success
5453    matching path. */    matching path. */

Legend:
Removed from v.443  
changed lines
  Added in v.455

  ViewVC Help
Powered by ViewVC 1.1.5