/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 459 by ph10, Sun Oct 4 09:21:39 2009 UTC revision 461 by ph10, Mon Oct 5 10:59:35 2009 UTC
# Line 843  for (;;) Line 843  for (;;)
843        {        {
844        if (md->recursive == NULL)                /* Not recursing => FALSE */        if (md->recursive == NULL)                /* Not recursing => FALSE */
845          {          {
846          condition = FALSE;          condition = FALSE;
847          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
848          }          }
849        else        else
850          {          {
851          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
852          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
853    
854          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
855          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
856          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
857          if any one is set. */          if any one is set. */
858    
859          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
860            {            {
861            uschar *slotA = md->name_table;            uschar *slotA = md->name_table;
862            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
863              {              {
864              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
865              slotA += md->name_entry_size;              slotA += md->name_entry_size;
866              }              }
867    
868            /* Found a name for the number - there can be only one; duplicate            /* Found a name for the number - there can be only one; duplicate
869            names for different numbers are allowed, but not vice versa. First            names for different numbers are allowed, but not vice versa. First
870            scan down for duplicates. */            scan down for duplicates. */
871    
872            if (i < md->name_count)            if (i < md->name_count)
873              {              {
874              uschar *slotB = slotA;              uschar *slotB = slotA;
875              while (slotB > md->name_table)              while (slotB > md->name_table)
876                {                {
# Line 878  for (;;) Line 878  for (;;)
878                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
879                  {                  {
880                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
881                  if (condition) break;                  if (condition) break;
882                  }                  }
883                else break;                else break;
884                }                }
885    
886              /* Scan up for duplicates */              /* Scan up for duplicates */
887    
888              if (!condition)              if (!condition)
889                {                {
890                slotB = slotA;                slotB = slotA;
891                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
892                  {                  {
# Line 895  for (;;) Line 895  for (;;)
895                    {                    {
896                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
897                    if (condition) break;                    if (condition) break;
898                    }                    }
899                  else break;                  else break;
900                  }                  }
901                }                }
902              }              }
903            }            }
904    
905          /* Chose branch according to the condition */          /* Chose branch according to the condition */
906    
907          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 3 : GET(ecode, 1);
908          }          }
909        }        }
910    
911      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
912        {        {
913        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
914        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
915    
916        /* If the numbered capture is unset, but the reference was by name,        /* If the numbered capture is unset, but the reference was by name,
917        scan the table to see if the name refers to any other numbers, and test        scan the table to see if the name refers to any other numbers, and test
918        them. The condition is true if any one is set. This is tediously similar        them. The condition is true if any one is set. This is tediously similar
919        to the code above, but not close enough to try to amalgamate. */        to the code above, but not close enough to try to amalgamate. */
920    
921        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
922          {          {
923          int refno = offset >> 1;          int refno = offset >> 1;
924          uschar *slotA = md->name_table;          uschar *slotA = md->name_table;
925    
926          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
927            {            {
928            if (GET2(slotA, 0) == refno) break;            if (GET2(slotA, 0) == refno) break;
929            slotA += md->name_entry_size;            slotA += md->name_entry_size;
930            }            }
931    
932          /* Found a name for the number - there can be only one; duplicate names          /* Found a name for the number - there can be only one; duplicate names
933          for different numbers are allowed, but not vice versa. First scan down          for different numbers are allowed, but not vice versa. First scan down
934          for duplicates. */          for duplicates. */
935    
936          if (i < md->name_count)          if (i < md->name_count)
937            {            {
938            uschar *slotB = slotA;            uschar *slotB = slotA;
939            while (slotB > md->name_table)            while (slotB > md->name_table)
940              {              {
# Line 942  for (;;) Line 942  for (;;)
942              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
943                {                {
944                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
945                condition = offset < offset_top &&                condition = offset < offset_top &&
946                  md->offset_vector[offset] >= 0;                  md->offset_vector[offset] >= 0;
947                if (condition) break;                if (condition) break;
948                }                }
949              else break;              else break;
950              }              }
951    
952            /* Scan up for duplicates */            /* Scan up for duplicates */
953    
954            if (!condition)            if (!condition)
955              {              {
956              slotB = slotA;              slotB = slotA;
957              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
958                {                {
# Line 960  for (;;) Line 960  for (;;)
960                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961                  {                  {
962                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
963                  condition = offset < offset_top &&                  condition = offset < offset_top &&
964                    md->offset_vector[offset] >= 0;                    md->offset_vector[offset] >= 0;
965                  if (condition) break;                  if (condition) break;
966                  }                  }
967                else break;                else break;
968                }                }
969              }              }
970            }            }
971          }          }
972    
973        /* Chose branch according to the condition */        /* Chose branch according to the condition */
974    
975        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
# Line 1030  for (;;) Line 1030  for (;;)
1030        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1031        }        }
1032      break;      break;
1033    
1034    
1035      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1036      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1037    
1038      case OP_CLOSE:      case OP_CLOSE:
1039      number = GET2(ecode, 1);      number = GET2(ecode, 1);
1040      offset = number << 1;      offset = number << 1;
1041    
1042  #ifdef DEBUG  #ifdef DEBUG
1043        printf("end bracket %d at *ACCEPT", number);        printf("end bracket %d at *ACCEPT", number);
1044        printf("\n");        printf("\n");
# Line 1053  for (;;) Line 1053  for (;;)
1053        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1054        }        }
1055      ecode += 3;      ecode += 3;
1056      break;      break;
1057    
1058    
1059      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a top-level
# Line 1069  for (;;) Line 1069  for (;;)
1069        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1070        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1071          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1072        offset_top = rec->offset_top;        offset_top = rec->save_offset_top;
1073        mstart = rec->save_start;        mstart = rec->save_start;
1074        ims = original_ims;        ims = original_ims;
1075        ecode = rec->after_call;        ecode = rec->after_call;
# Line 1261  for (;;) Line 1261  for (;;)
1261        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1262              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1263        new_recursive.save_start = mstart;        new_recursive.save_start = mstart;
1264        new_recursive.offset_top = offset_top;        new_recursive.save_offset_top = offset_top;
1265        mstart = eptr;        mstart = eptr;
1266    
1267        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
# Line 1460  for (;;) Line 1460  for (;;)
1460        {        {
1461        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1462        offset = number << 1;        offset = number << 1;
1463    
1464  #ifdef DEBUG  #ifdef DEBUG
1465        printf("end bracket %d", number);        printf("end bracket %d", number);
1466        printf("\n");        printf("\n");
# Line 1486  for (;;) Line 1486  for (;;)
1486          mstart = rec->save_start;          mstart = rec->save_start;
1487          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1488            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1489          offset_top = rec->offset_top;          offset_top = rec->save_offset_top;
1490          ecode = rec->after_call;          ecode = rec->after_call;
1491          ims = original_ims;          ims = original_ims;
1492          break;          break;
# Line 5010  if (re == NULL || subject == NULL || Line 5010  if (re == NULL || subject == NULL ||
5010     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5011  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5012    
5013  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5014  name, for condition testing. */  name, for condition testing. */
5015    
5016  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
# Line 5375  for(;;) Line 5375  for(;;)
5375    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5376    
5377    end_subject = save_end_subject;    end_subject = save_end_subject;
5378    
5379    /* The following two optimizations are disabled for partial matching or if    /* The following two optimizations are disabled for partial matching or if
5380    disabling is explicitly requested. */    disabling is explicitly requested. */
5381    
5382    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
5383      {      {
5384      /* If the pattern was studied, a minimum subject length may be set. This is      /* If the pattern was studied, a minimum subject length may be set. This is
5385      a lower bound; no actual string of that length may actually match the      a lower bound; no actual string of that length may actually match the
5386      pattern. Although the value is, strictly, in characters, we treat it as      pattern. Although the value is, strictly, in characters, we treat it as
5387      bytes to avoid spending too much time in this optimization. */      bytes to avoid spending too much time in this optimization. */
5388    
5389      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5390          end_subject - start_match < study->minlength)          end_subject - start_match < study->minlength)
5391        {        {
5392        rc = MATCH_NOMATCH;        rc = MATCH_NOMATCH;
5393        break;        break;
5394        }        }
5395    
5396      /* If req_byte is set, we know that that character must appear in the      /* If req_byte is set, we know that that character must appear in the
5397      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_byte
5398      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
# Line 5400  for(;;) Line 5400  for(;;)
5400      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
5401      for cased/caseless versions makes it go faster, as does using an      for cased/caseless versions makes it go faster, as does using an
5402      autoincrement and backing off on a match.      autoincrement and backing off on a match.
5403    
5404      HOWEVER: when the subject string is very, very long, searching to its end      HOWEVER: when the subject string is very, very long, searching to its end
5405      can take a long time, and give bad performance on quite ordinary patterns.      can take a long time, and give bad performance on quite ordinary patterns.
5406      This showed up when somebody was matching something like /^\d+C/ on a      This showed up when somebody was matching something like /^\d+C/ on a
5407      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
5408      long. */      long. */
5409    
5410      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
5411        {        {
5412        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
5413    
5414        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
5415        place we found it at last time. */        place we found it at last time. */
5416    
5417        if (p > req_byte_ptr)        if (p > req_byte_ptr)
5418          {          {
5419          if (req_byte_caseless)          if (req_byte_caseless)
# Line 5431  for(;;) Line 5431  for(;;)
5431              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_byte) { p--; break; }
5432              }              }
5433            }            }
5434    
5435          /* If we can't find the required character, break the matching loop,          /* If we can't find the required character, break the matching loop,
5436          forcing a match failure. */          forcing a match failure. */
5437    
5438          if (p >= end_subject)          if (p >= end_subject)
5439            {            {
5440            rc = MATCH_NOMATCH;            rc = MATCH_NOMATCH;
5441            break;            break;
5442            }            }
5443    
5444          /* If we have found the required character, save the point where we          /* If we have found the required character, save the point where we
5445          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
5446          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
5447    
5448          req_byte_ptr = p;          req_byte_ptr = p;
5449          }          }
5450        }        }
5451      }      }
5452    
5453  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
5454    printf(">>>> Match against: ");    printf(">>>> Match against: ");
# Line 5575  if (rc == MATCH_MATCH) Line 5575  if (rc == MATCH_MATCH)
5575    too many to fit into the vector. */    too many to fit into the vector. */
5576    
5577    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
5578    
5579    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of
5580    md->start_match_ptr might be modified if \K was encountered on the success    md->start_match_ptr might be modified if \K was encountered on the success
5581    matching path. */    matching path. */

Legend:
Removed from v.459  
changed lines
  Added in v.461

  ViewVC Help
Powered by ViewVC 1.1.5