/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 459 by ph10, Sun Oct 4 09:21:39 2009 UTC revision 500 by ph10, Sat Mar 6 19:00:29 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 89  static const char rep_max[] = { 0, 0, 0, Line 89  static const char rep_max[] = { 0, 0, 0,
89    
90    
91    
92  #ifdef DEBUG  #ifdef PCRE_DEBUG
93  /*************************************************  /*************************************************
94  *        Debugging function to print chars       *  *        Debugging function to print chars       *
95  *************************************************/  *************************************************/
# Line 141  match_ref(int offset, register USPTR ept Line 141  match_ref(int offset, register USPTR ept
141  {  {
142  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
143    
144  #ifdef DEBUG  #ifdef PCRE_DEBUG
145  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
146    printf("matching subject <null>");    printf("matching subject <null>");
147  else  else
# Line 254  actuall used in this definition. */ Line 254  actuall used in this definition. */
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256    
257  #ifdef DEBUG  #ifdef PCRE_DEBUG
258  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259    { \    { \
260    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
# Line 415  the subject. */ Line 415  the subject. */
415      }      }
416    
417  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
418    if (md->partial && eptr > mstart)\    if (md->partial != 0 && eptr > mstart)\
419      {\      {\
420      md->hitend = TRUE;\      md->hitend = TRUE;\
421      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
# Line 622  TAIL_RECURSE: Line 622  TAIL_RECURSE:
622  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
623  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
624  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
625  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
626  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
627  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
628  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
# Line 713  for (;;) Line 713  for (;;)
713      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
714      offset = number << 1;      offset = number << 1;
715    
716  #ifdef DEBUG  #ifdef PCRE_DEBUG
717      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
718      printf("subject=");      printf("subject=");
719      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 843  for (;;) Line 843  for (;;)
843        {        {
844        if (md->recursive == NULL)                /* Not recursing => FALSE */        if (md->recursive == NULL)                /* Not recursing => FALSE */
845          {          {
846          condition = FALSE;          condition = FALSE;
847          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
848          }          }
849        else        else
850          {          {
851          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
852          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
853    
854          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
855          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
856          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
857          if any one is set. */          if any one is set. */
858    
859          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
860            {            {
861            uschar *slotA = md->name_table;            uschar *slotA = md->name_table;
862            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
863              {              {
864              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
865              slotA += md->name_entry_size;              slotA += md->name_entry_size;
866              }              }
867    
868            /* Found a name for the number - there can be only one; duplicate            /* Found a name for the number - there can be only one; duplicate
869            names for different numbers are allowed, but not vice versa. First            names for different numbers are allowed, but not vice versa. First
870            scan down for duplicates. */            scan down for duplicates. */
871    
872            if (i < md->name_count)            if (i < md->name_count)
873              {              {
874              uschar *slotB = slotA;              uschar *slotB = slotA;
875              while (slotB > md->name_table)              while (slotB > md->name_table)
876                {                {
# Line 878  for (;;) Line 878  for (;;)
878                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
879                  {                  {
880                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
881                  if (condition) break;                  if (condition) break;
882                  }                  }
883                else break;                else break;
884                }                }
885    
886              /* Scan up for duplicates */              /* Scan up for duplicates */
887    
888              if (!condition)              if (!condition)
889                {                {
890                slotB = slotA;                slotB = slotA;
891                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
892                  {                  {
# Line 895  for (;;) Line 895  for (;;)
895                    {                    {
896                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
897                    if (condition) break;                    if (condition) break;
898                    }                    }
899                  else break;                  else break;
900                  }                  }
901                }                }
902              }              }
903            }            }
904    
905          /* Chose branch according to the condition */          /* Chose branch according to the condition */
906    
907          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 3 : GET(ecode, 1);
908          }          }
909        }        }
910    
911      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
912        {        {
913        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
914        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
915    
916        /* If the numbered capture is unset, but the reference was by name,        /* If the numbered capture is unset, but the reference was by name,
917        scan the table to see if the name refers to any other numbers, and test        scan the table to see if the name refers to any other numbers, and test
918        them. The condition is true if any one is set. This is tediously similar        them. The condition is true if any one is set. This is tediously similar
919        to the code above, but not close enough to try to amalgamate. */        to the code above, but not close enough to try to amalgamate. */
920    
921        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
922          {          {
923          int refno = offset >> 1;          int refno = offset >> 1;
924          uschar *slotA = md->name_table;          uschar *slotA = md->name_table;
925    
926          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
927            {            {
928            if (GET2(slotA, 0) == refno) break;            if (GET2(slotA, 0) == refno) break;
929            slotA += md->name_entry_size;            slotA += md->name_entry_size;
930            }            }
931    
932          /* Found a name for the number - there can be only one; duplicate names          /* Found a name for the number - there can be only one; duplicate names
933          for different numbers are allowed, but not vice versa. First scan down          for different numbers are allowed, but not vice versa. First scan down
934          for duplicates. */          for duplicates. */
935    
936          if (i < md->name_count)          if (i < md->name_count)
937            {            {
938            uschar *slotB = slotA;            uschar *slotB = slotA;
939            while (slotB > md->name_table)            while (slotB > md->name_table)
940              {              {
# Line 942  for (;;) Line 942  for (;;)
942              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
943                {                {
944                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
945                condition = offset < offset_top &&                condition = offset < offset_top &&
946                  md->offset_vector[offset] >= 0;                  md->offset_vector[offset] >= 0;
947                if (condition) break;                if (condition) break;
948                }                }
949              else break;              else break;
950              }              }
951    
952            /* Scan up for duplicates */            /* Scan up for duplicates */
953    
954            if (!condition)            if (!condition)
955              {              {
956              slotB = slotA;              slotB = slotA;
957              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
958                {                {
# Line 960  for (;;) Line 960  for (;;)
960                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961                  {                  {
962                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
963                  condition = offset < offset_top &&                  condition = offset < offset_top &&
964                    md->offset_vector[offset] >= 0;                    md->offset_vector[offset] >= 0;
965                  if (condition) break;                  if (condition) break;
966                  }                  }
967                else break;                else break;
968                }                }
969              }              }
970            }            }
971          }          }
972    
973        /* Chose branch according to the condition */        /* Chose branch according to the condition */
974    
975        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
# Line 1030  for (;;) Line 1030  for (;;)
1030        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1031        }        }
1032      break;      break;
1033    
1034    
1035      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1036      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1037    
1038      case OP_CLOSE:      case OP_CLOSE:
1039      number = GET2(ecode, 1);      number = GET2(ecode, 1);
1040      offset = number << 1;      offset = number << 1;
1041    
1042  #ifdef DEBUG  #ifdef PCRE_DEBUG
1043        printf("end bracket %d at *ACCEPT", number);        printf("end bracket %d at *ACCEPT", number);
1044        printf("\n");        printf("\n");
1045  #endif  #endif
# Line 1053  for (;;) Line 1053  for (;;)
1053        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1054        }        }
1055      ecode += 3;      ecode += 3;
1056      break;      break;
1057    
1058    
1059      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a top-level
# Line 1069  for (;;) Line 1069  for (;;)
1069        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1070        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1071          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1072        offset_top = rec->offset_top;        offset_top = rec->save_offset_top;
       mstart = rec->save_start;  
1073        ims = original_ims;        ims = original_ims;
1074        ecode = rec->after_call;        ecode = rec->after_call;
1075        break;        break;
# Line 1114  for (;;) Line 1113  for (;;)
1113        {        {
1114        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1115          RM4);          RM4);
1116        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1117            {
1118            mstart = md->start_match_ptr;   /* In case \K reset it */
1119            break;
1120            }
1121        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1122        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1123        }        }
# Line 1133  for (;;) Line 1136  for (;;)
1136      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1137      continue;      continue;
1138    
1139      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1140        PRUNE, or COMMIT means we must assume failure without checking subsequent
1141        branches. */
1142    
1143      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1144      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1142  for (;;) Line 1147  for (;;)
1147        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1148          RM5);          RM5);
1149        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1150          if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1151            {
1152            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1153            break;
1154            }
1155        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1156        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1157        }        }
# Line 1260  for (;;) Line 1270  for (;;)
1270    
1271        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1272              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1273        new_recursive.save_start = mstart;        new_recursive.save_offset_top = offset_top;
       new_recursive.offset_top = offset_top;  
       mstart = eptr;  
1274    
1275        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1276        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 1309  for (;;) Line 1317  for (;;)
1317      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1318      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1319      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1320      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1321        the start-of-match value in case it was changed by \K. */
1322    
1323      case OP_ONCE:      case OP_ONCE:
1324      prev = ecode;      prev = ecode;
# Line 1318  for (;;) Line 1327  for (;;)
1327      do      do
1328        {        {
1329        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1330        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1331            {
1332            mstart = md->start_match_ptr;
1333            break;
1334            }
1335        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1336        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1337        }        }
# Line 1437  for (;;) Line 1450  for (;;)
1450        }        }
1451      else saved_eptr = NULL;      else saved_eptr = NULL;
1452    
1453      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1454      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1455      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1456        it was changed by \K. */
1457    
1458      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1459          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1447  for (;;) Line 1461  for (;;)
1461        {        {
1462        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1463        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1464          md->start_match_ptr = mstart;
1465        RRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);
1466        }        }
1467    
# Line 1460  for (;;) Line 1475  for (;;)
1475        {        {
1476        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1477        offset = number << 1;        offset = number << 1;
1478    
1479  #ifdef DEBUG  #ifdef PCRE_DEBUG
1480        printf("end bracket %d", number);        printf("end bracket %d", number);
1481        printf("\n");        printf("\n");
1482  #endif  #endif
# Line 1483  for (;;) Line 1498  for (;;)
1498          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1499          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1500          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         mstart = rec->save_start;  
1501          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1502            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1503          offset_top = rec->offset_top;          offset_top = rec->save_offset_top;
1504          ecode = rec->after_call;          ecode = rec->after_call;
1505          ims = original_ims;          ims = original_ims;
1506          break;          break;
# Line 2146  for (;;) Line 2160  for (;;)
2160          pp = eptr;          pp = eptr;
2161          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2162            {            {
2163            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2164                {
2165                CHECK_PARTIAL();
2166                break;
2167                }
2168            eptr += length;            eptr += length;
2169            }            }
2170          while (eptr >= pp)          while (eptr >= pp)
# Line 2315  for (;;) Line 2333  for (;;)
2333            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2334              {              {
2335              int len = 1;              int len = 1;
2336              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2337                  {
2338                  SCHECK_PARTIAL();
2339                  break;
2340                  }
2341              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2342              if (c > 255)              if (c > 255)
2343                {                {
# Line 2341  for (;;) Line 2363  for (;;)
2363            {            {
2364            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2365              {              {
2366              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2367                  {
2368                  SCHECK_PARTIAL();
2369                  break;
2370                  }
2371              c = *eptr;              c = *eptr;
2372              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2373              eptr++;              eptr++;
# Line 2446  for (;;) Line 2472  for (;;)
2472          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2473            {            {
2474            int len = 1;            int len = 1;
2475            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2476                {
2477                SCHECK_PARTIAL();
2478                break;
2479                }
2480            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2481            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2482            eptr += len;            eptr += len;
# Line 2685  for (;;) Line 2715  for (;;)
2715                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
2716                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2717  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2718              else break;              else
2719                  {
2720                  CHECK_PARTIAL();
2721                  break;
2722                  }
2723              }              }
2724    
2725            if (possessive) continue;            if (possessive) continue;
# Line 2763  for (;;) Line 2797  for (;;)
2797          pp = eptr;          pp = eptr;
2798          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2799            {            {
2800            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2801                {
2802                SCHECK_PARTIAL();
2803                break;
2804                }
2805              if (fc != md->lcc[*eptr]) break;
2806            eptr++;            eptr++;
2807            }            }
2808    
# Line 2817  for (;;) Line 2856  for (;;)
2856          pp = eptr;          pp = eptr;
2857          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2858            {            {
2859            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2860                {
2861                SCHECK_PARTIAL();
2862                break;
2863                }
2864              if (fc != *eptr) break;
2865            eptr++;            eptr++;
2866            }            }
2867          if (possessive) continue;          if (possessive) continue;
# Line 3029  for (;;) Line 3073  for (;;)
3073            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3074              {              {
3075              int len = 1;              int len = 1;
3076              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3077                  {
3078                  SCHECK_PARTIAL();
3079                  break;
3080                  }
3081              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3082              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3083              if (fc == d) break;              if (fc == d) break;
# Line 3050  for (;;) Line 3098  for (;;)
3098            {            {
3099            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3100              {              {
3101              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3102                  {
3103                  SCHECK_PARTIAL();
3104                  break;
3105                  }
3106                if (fc == md->lcc[*eptr]) break;
3107              eptr++;              eptr++;
3108              }              }
3109            if (possessive) continue;            if (possessive) continue;
# Line 3159  for (;;) Line 3212  for (;;)
3212            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3213              {              {
3214              int len = 1;              int len = 1;
3215              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3216                  {
3217                  SCHECK_PARTIAL();
3218                  break;
3219                  }
3220              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3221              if (fc == d) break;              if (fc == d) break;
3222              eptr += len;              eptr += len;
# Line 3179  for (;;) Line 3236  for (;;)
3236            {            {
3237            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3238              {              {
3239              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3240                  {
3241                  SCHECK_PARTIAL();
3242                  break;
3243                  }
3244                if (fc == *eptr) break;
3245              eptr++;              eptr++;
3246              }              }
3247            if (possessive) continue;            if (possessive) continue;
# Line 3640  for (;;) Line 3702  for (;;)
3702          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3703          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3704            {            {
3705            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3706               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))              {
3707                SCHECK_PARTIAL();
3708                RRETURN(MATCH_NOMATCH);
3709                }
3710              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3711              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3712            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3713            }            }
# Line 4335  for (;;) Line 4401  for (;;)
4401            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4402              {              {
4403              int len = 1;              int len = 1;
4404              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4405                  {
4406                  SCHECK_PARTIAL();
4407                  break;
4408                  }
4409              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4410              if (prop_fail_result) break;              if (prop_fail_result) break;
4411              eptr+= len;              eptr+= len;
# Line 4346  for (;;) Line 4416  for (;;)
4416            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4417              {              {
4418              int len = 1;              int len = 1;
4419              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4420                  {
4421                  SCHECK_PARTIAL();
4422                  break;
4423                  }
4424              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4425              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4426              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
# Line 4361  for (;;) Line 4435  for (;;)
4435            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4436              {              {
4437              int len = 1;              int len = 1;
4438              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4439                  {
4440                  SCHECK_PARTIAL();
4441                  break;
4442                  }
4443              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4444              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4445              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
# Line 4374  for (;;) Line 4452  for (;;)
4452            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4453              {              {
4454              int len = 1;              int len = 1;
4455              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4456                  {
4457                  SCHECK_PARTIAL();
4458                  break;
4459                  }
4460              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4461              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4462              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
# Line 4387  for (;;) Line 4469  for (;;)
4469            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4470              {              {
4471              int len = 1;              int len = 1;
4472              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4473                  {
4474                  SCHECK_PARTIAL();
4475                  break;
4476                  }
4477              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4478              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4479              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
# Line 4416  for (;;) Line 4502  for (;;)
4502          {          {
4503          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4504            {            {
4505            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
4506                {
4507                SCHECK_PARTIAL();
4508                break;
4509                }
4510            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4511            prop_category = UCD_CATEGORY(c);            prop_category = UCD_CATEGORY(c);
4512            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
# Line 4436  for (;;) Line 4526  for (;;)
4526          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4527    
4528          if (possessive) continue;          if (possessive) continue;
4529    
4530          for(;;)          for(;;)
4531            {            {
4532            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
# Line 4471  for (;;) Line 4562  for (;;)
4562              {              {
4563              for (i = min; i < max; i++)              for (i = min; i < max; i++)
4564                {                {
4565                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                if (eptr >= md->end_subject)
4566                    {
4567                    SCHECK_PARTIAL();
4568                    break;
4569                    }
4570                  if (IS_NEWLINE(eptr)) break;
4571                eptr++;                eptr++;
4572                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4573                }                }
# Line 4483  for (;;) Line 4579  for (;;)
4579              {              {
4580              for (i = min; i < max; i++)              for (i = min; i < max; i++)
4581                {                {
4582                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                if (eptr >= md->end_subject)
4583                    {
4584                    SCHECK_PARTIAL();
4585                    break;
4586                    }
4587                  if (IS_NEWLINE(eptr)) break;
4588                eptr++;                eptr++;
4589                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4590                }                }
# Line 4495  for (;;) Line 4596  for (;;)
4596              {              {
4597              for (i = min; i < max; i++)              for (i = min; i < max; i++)
4598                {                {
4599                if (eptr >= md->end_subject) break;                if (eptr >= md->end_subject)
4600                    {
4601                    SCHECK_PARTIAL();
4602                    break;
4603                    }
4604                eptr++;                eptr++;
4605                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4606                }                }
# Line 4508  for (;;) Line 4613  for (;;)
4613            case OP_ANYBYTE:            case OP_ANYBYTE:
4614            c = max - min;            c = max - min;
4615            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
4616              c = md->end_subject - eptr;              {
4617            eptr += c;              eptr = md->end_subject;
4618                SCHECK_PARTIAL();
4619                }
4620              else eptr += c;
4621            break;            break;
4622    
4623            case OP_ANYNL:            case OP_ANYNL:
4624            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4625              {              {
4626              int len = 1;              int len = 1;
4627              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4628                  {
4629                  SCHECK_PARTIAL();
4630                  break;
4631                  }
4632              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4633              if (c == 0x000d)              if (c == 0x000d)
4634                {                {
# Line 4541  for (;;) Line 4653  for (;;)
4653              {              {
4654              BOOL gotspace;              BOOL gotspace;
4655              int len = 1;              int len = 1;
4656              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4657                  {
4658                  SCHECK_PARTIAL();
4659                  break;
4660                  }
4661              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4662              switch(c)              switch(c)
4663                {                {
# Line 4579  for (;;) Line 4695  for (;;)
4695              {              {
4696              BOOL gotspace;              BOOL gotspace;
4697              int len = 1;              int len = 1;
4698              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4699                  {
4700                  SCHECK_PARTIAL();
4701                  break;
4702                  }
4703              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4704              switch(c)              switch(c)
4705                {                {
# Line 4603  for (;;) Line 4723  for (;;)
4723            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4724              {              {
4725              int len = 1;              int len = 1;
4726              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4727                  {
4728                  SCHECK_PARTIAL();
4729                  break;
4730                  }
4731              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4732              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4733              eptr+= len;              eptr+= len;
# Line 4614  for (;;) Line 4738  for (;;)
4738            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4739              {              {
4740              int len = 1;              int len = 1;
4741              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4742                  {
4743                  SCHECK_PARTIAL();
4744                  break;
4745                  }
4746              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4747              if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;              if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
4748              eptr+= len;              eptr+= len;
# Line 4625  for (;;) Line 4753  for (;;)
4753            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4754              {              {
4755              int len = 1;              int len = 1;
4756              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4757                  {
4758                  SCHECK_PARTIAL();
4759                  break;
4760                  }
4761              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4762              if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;              if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4763              eptr+= len;              eptr+= len;
# Line 4636  for (;;) Line 4768  for (;;)
4768            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4769              {              {
4770              int len = 1;              int len = 1;
4771              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4772                  {
4773                  SCHECK_PARTIAL();
4774                  break;
4775                  }
4776              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4777              if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;              if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4778              eptr+= len;              eptr+= len;
# Line 4647  for (;;) Line 4783  for (;;)
4783            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4784              {              {
4785              int len = 1;              int len = 1;
4786              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4787                  {
4788                  SCHECK_PARTIAL();
4789                  break;
4790                  }
4791              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4792              if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;              if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4793              eptr+= len;              eptr+= len;
# Line 4658  for (;;) Line 4798  for (;;)
4798            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4799              {              {
4800              int len = 1;              int len = 1;
4801              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4802                  {
4803                  SCHECK_PARTIAL();
4804                  break;
4805                  }
4806              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4807              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4808              eptr+= len;              eptr+= len;
# Line 4690  for (;;) Line 4834  for (;;)
4834            case OP_ANY:            case OP_ANY:
4835            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4836              {              {
4837              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;              if (eptr >= md->end_subject)
4838                  {
4839                  SCHECK_PARTIAL();
4840                  break;
4841                  }
4842                if (IS_NEWLINE(eptr)) break;
4843              eptr++;              eptr++;
4844              }              }
4845            break;            break;
# Line 4699  for (;;) Line 4848  for (;;)
4848            case OP_ANYBYTE:            case OP_ANYBYTE:
4849            c = max - min;            c = max - min;
4850            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
4851              c = md->end_subject - eptr;              {
4852            eptr += c;              eptr = md->end_subject;
4853                SCHECK_PARTIAL();
4854                }
4855              else eptr += c;
4856            break;            break;
4857    
4858            case OP_ANYNL:            case OP_ANYNL:
4859            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4860              {              {
4861              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4862                  {
4863                  SCHECK_PARTIAL();
4864                  break;
4865                  }
4866              c = *eptr;              c = *eptr;
4867              if (c == 0x000d)              if (c == 0x000d)
4868                {                {
# Line 4727  for (;;) Line 4883  for (;;)
4883            case OP_NOT_HSPACE:            case OP_NOT_HSPACE:
4884            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4885              {              {
4886              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4887                  {
4888                  SCHECK_PARTIAL();
4889                  break;
4890                  }
4891              c = *eptr;              c = *eptr;
4892              if (c == 0x09 || c == 0x20 || c == 0xa0) break;              if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4893              eptr++;              eptr++;
# Line 4737  for (;;) Line 4897  for (;;)
4897            case OP_HSPACE:            case OP_HSPACE:
4898            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4899              {              {
4900              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4901                  {
4902                  SCHECK_PARTIAL();
4903                  break;
4904                  }
4905              c = *eptr;              c = *eptr;
4906              if (c != 0x09 && c != 0x20 && c != 0xa0) break;              if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4907              eptr++;              eptr++;
# Line 4747  for (;;) Line 4911  for (;;)
4911            case OP_NOT_VSPACE:            case OP_NOT_VSPACE:
4912            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4913              {              {
4914              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4915                  {
4916                  SCHECK_PARTIAL();
4917                  break;
4918                  }
4919              c = *eptr;              c = *eptr;
4920              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4921                break;                break;
# Line 4758  for (;;) Line 4926  for (;;)
4926            case OP_VSPACE:            case OP_VSPACE:
4927            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4928              {              {
4929              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4930                  {
4931                  SCHECK_PARTIAL();
4932                  break;
4933                  }
4934              c = *eptr;              c = *eptr;
4935              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4936                break;                break;
# Line 4769  for (;;) Line 4941  for (;;)
4941            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4942            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4943              {              {
4944              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)              if (eptr >= md->end_subject)
4945                  {
4946                  SCHECK_PARTIAL();
4947                break;                break;
4948                  }
4949                if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
4950              eptr++;              eptr++;
4951              }              }
4952            break;            break;
# Line 4778  for (;;) Line 4954  for (;;)
4954            case OP_DIGIT:            case OP_DIGIT:
4955            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4956              {              {
4957              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)              if (eptr >= md->end_subject)
4958                  {
4959                  SCHECK_PARTIAL();
4960                break;                break;
4961                  }
4962                if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
4963              eptr++;              eptr++;
4964              }              }
4965            break;            break;
# Line 4787  for (;;) Line 4967  for (;;)
4967            case OP_NOT_WHITESPACE:            case OP_NOT_WHITESPACE:
4968            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4969              {              {
4970              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)              if (eptr >= md->end_subject)
4971                  {
4972                  SCHECK_PARTIAL();
4973                break;                break;
4974                  }
4975                if ((md->ctypes[*eptr] & ctype_space) != 0) break;
4976              eptr++;              eptr++;
4977              }              }
4978            break;            break;
# Line 4796  for (;;) Line 4980  for (;;)
4980            case OP_WHITESPACE:            case OP_WHITESPACE:
4981            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4982              {              {
4983              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)              if (eptr >= md->end_subject)
4984                  {
4985                  SCHECK_PARTIAL();
4986                break;                break;
4987                  }
4988                if ((md->ctypes[*eptr] & ctype_space) == 0) break;
4989              eptr++;              eptr++;
4990              }              }
4991            break;            break;
# Line 4805  for (;;) Line 4993  for (;;)
4993            case OP_NOT_WORDCHAR:            case OP_NOT_WORDCHAR:
4994            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4995              {              {
4996              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)              if (eptr >= md->end_subject)
4997                  {
4998                  SCHECK_PARTIAL();
4999                break;                break;
5000                  }
5001                if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5002              eptr++;              eptr++;
5003              }              }
5004            break;            break;
# Line 4814  for (;;) Line 5006  for (;;)
5006            case OP_WORDCHAR:            case OP_WORDCHAR:
5007            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5008              {              {
5009              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)              if (eptr >= md->end_subject)
5010                  {
5011                  SCHECK_PARTIAL();
5012                break;                break;
5013                  }
5014                if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5015              eptr++;              eptr++;
5016              }              }
5017            break;            break;
# Line 5010  if (re == NULL || subject == NULL || Line 5206  if (re == NULL || subject == NULL ||
5206     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5207  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5208    
5209  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5210  name, for condition testing. */  name, for condition testing. */
5211    
5212  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (uschar *)re + re->name_table_offset;
# Line 5375  for(;;) Line 5571  for(;;)
5571    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5572    
5573    end_subject = save_end_subject;    end_subject = save_end_subject;
5574    
5575    /* The following two optimizations are disabled for partial matching or if    /* The following two optimizations are disabled for partial matching or if
5576    disabling is explicitly requested. */    disabling is explicitly requested. */
5577    
5578    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
5579      {      {
5580      /* If the pattern was studied, a minimum subject length may be set. This is      /* If the pattern was studied, a minimum subject length may be set. This is
5581      a lower bound; no actual string of that length may actually match the      a lower bound; no actual string of that length may actually match the
5582      pattern. Although the value is, strictly, in characters, we treat it as      pattern. Although the value is, strictly, in characters, we treat it as
5583      bytes to avoid spending too much time in this optimization. */      bytes to avoid spending too much time in this optimization. */
5584    
5585      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5586          end_subject - start_match < study->minlength)          (pcre_uint32)(end_subject - start_match) < study->minlength)
5587        {        {
5588        rc = MATCH_NOMATCH;        rc = MATCH_NOMATCH;
5589        break;        break;
5590        }        }
5591    
5592      /* If req_byte is set, we know that that character must appear in the      /* If req_byte is set, we know that that character must appear in the
5593      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_byte
5594      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
# Line 5400  for(;;) Line 5596  for(;;)
5596      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
5597      for cased/caseless versions makes it go faster, as does using an      for cased/caseless versions makes it go faster, as does using an
5598      autoincrement and backing off on a match.      autoincrement and backing off on a match.
5599    
5600      HOWEVER: when the subject string is very, very long, searching to its end      HOWEVER: when the subject string is very, very long, searching to its end
5601      can take a long time, and give bad performance on quite ordinary patterns.      can take a long time, and give bad performance on quite ordinary patterns.
5602      This showed up when somebody was matching something like /^\d+C/ on a      This showed up when somebody was matching something like /^\d+C/ on a
5603      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
5604      long. */      long. */
5605    
5606      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
5607        {        {
5608        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
5609    
5610        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
5611        place we found it at last time. */        place we found it at last time. */
5612    
5613        if (p > req_byte_ptr)        if (p > req_byte_ptr)
5614          {          {
5615          if (req_byte_caseless)          if (req_byte_caseless)
# Line 5431  for(;;) Line 5627  for(;;)
5627              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_byte) { p--; break; }
5628              }              }
5629            }            }
5630    
5631          /* If we can't find the required character, break the matching loop,          /* If we can't find the required character, break the matching loop,
5632          forcing a match failure. */          forcing a match failure. */
5633    
5634          if (p >= end_subject)          if (p >= end_subject)
5635            {            {
5636            rc = MATCH_NOMATCH;            rc = MATCH_NOMATCH;
5637            break;            break;
5638            }            }
5639    
5640          /* If we have found the required character, save the point where we          /* If we have found the required character, save the point where we
5641          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
5642          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
5643    
5644          req_byte_ptr = p;          req_byte_ptr = p;
5645          }          }
5646        }        }
5647      }      }
5648    
5649  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
5650    printf(">>>> Match against: ");    printf(">>>> Match against: ");
5651    pchars(start_match, end_subject - start_match, TRUE, md);    pchars(start_match, end_subject - start_match, TRUE, md);
5652    printf("\n");    printf("\n");
# Line 5575  if (rc == MATCH_MATCH) Line 5771  if (rc == MATCH_MATCH)
5771    too many to fit into the vector. */    too many to fit into the vector. */
5772    
5773    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
5774    
5775    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of
5776    md->start_match_ptr might be modified if \K was encountered on the success    md->start_match_ptr might be modified if \K was encountered on the success
5777    matching path. */    matching path. */

Legend:
Removed from v.459  
changed lines
  Added in v.500

  ViewVC Help
Powered by ViewVC 1.1.5