/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 231 by ph10, Tue Sep 11 11:15:33 2007 UTC revision 446 by ph10, Tue Sep 15 10:49:50 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 43  pattern matching using an NFA algorithm, Line 43  pattern matching using an NFA algorithm,
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
46  #include <config.h>  #include "config.h"
47  #endif  #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 296  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327    const uschar *Xmstart;    USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 307  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
# Line 334  typedef struct heapframe { Line 362  typedef struct heapframe {
362    uschar Xocchars[8];    uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    unsigned int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
# Line 369  typedef struct heapframe { Line 398  typedef struct heapframe {
398    
399  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
400  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
401  same response.  same response. */
402    
403  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
404  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
405    at the end of the subject and also past the start of the subject (i.e.
406    something has been matched). For hard partial matching, we then return
407    immediately. The second one is used when we already know we are past the end of
408    the subject. */
409    
410    #define CHECK_PARTIAL()\
411      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
412        {\
413        md->hitend = TRUE;\
414        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415        }
416    
417    #define SCHECK_PARTIAL()\
418      if (md->partial && eptr > mstart)\
419        {\
420        md->hitend = TRUE;\
421        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
422        }
423    
424    
425    /* Performance note: It might be tempting to extract commonly used fields from
426    the md structure (e.g. utf8, end_subject) into individual variables to improve
427  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
428  made performance worse.  made performance worse.
429    
# Line 399  Returns:       MATCH_MATCH if matched Line 450  Returns:       MATCH_MATCH if matched
450  */  */
451    
452  static int  static int
453  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
454    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
455    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
456  {  {
# Line 413  register unsigned int c;   /* Character Line 464  register unsigned int c;   /* Character
464  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
465    
466  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
467    int condcode;
468    
469  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
470  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 455  HEAP_RECURSE: Line 507  HEAP_RECURSE:
507  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
508  #endif  #endif
509  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
510    #define codelink           frame->Xcodelink
511  #define data               frame->Xdata  #define data               frame->Xdata
512  #define next               frame->Xnext  #define next               frame->Xnext
513  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 535  int oclength; Line 588  int oclength;
588  uschar occhars[8];  uschar occhars[8];
589  #endif  #endif
590    
591    int codelink;
592  int ctype;  int ctype;
593  int length;  int length;
594  int max;  int max;
# Line 610  for (;;) Line 664  for (;;)
664    minimize = possessive = FALSE;    minimize = possessive = FALSE;
665    op = *ecode;    op = *ecode;
666    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > mstart)  
     md->hitend = TRUE;  
   
667    switch(op)    switch(op)
668      {      {
669      case OP_FAIL:      case OP_FAIL:
# Line 761  for (;;) Line 807  for (;;)
807    
808      case OP_COND:      case OP_COND:
809      case OP_SCOND:      case OP_SCOND:
810      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
811    
812        /* Because of the way auto-callout works during compile, a callout item is
813        inserted between OP_COND and an assertion condition. */
814    
815        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
816          {
817          if (pcre_callout != NULL)
818            {
819            pcre_callout_block cb;
820            cb.version          = 1;   /* Version 1 of the callout block */
821            cb.callout_number   = ecode[LINK_SIZE+2];
822            cb.offset_vector    = md->offset_vector;
823            cb.subject          = (PCRE_SPTR)md->start_subject;
824            cb.subject_length   = md->end_subject - md->start_subject;
825            cb.start_match      = mstart - md->start_subject;
826            cb.current_position = eptr - md->start_subject;
827            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
828            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
829            cb.capture_top      = offset_top/2;
830            cb.capture_last     = md->capture_last;
831            cb.callout_data     = md->callout_data;
832            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
833            if (rrc < 0) RRETURN(rrc);
834            }
835          ecode += _pcre_OP_lengths[OP_CALLOUT];
836          }
837    
838        condcode = ecode[LINK_SIZE+1];
839    
840        /* Now see what the actual condition is */
841    
842        if (condcode == OP_RREF)         /* Recursion test */
843        {        {
844        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
845        condition = md->recursive != NULL &&        condition = md->recursive != NULL &&
# Line 769  for (;;) Line 847  for (;;)
847        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
848        }        }
849    
850      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF)    /* Group used test */
851        {        {
852        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
853        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
854        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
855        }        }
856    
857      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
858        {        {
859        condition = FALSE;        condition = FALSE;
860        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 803  for (;;) Line 881  for (;;)
881        else        else
882          {          {
883          condition = FALSE;          condition = FALSE;
884          ecode += GET(ecode, 1);          ecode += codelink;
885          }          }
886        }        }
887    
# Line 826  for (;;) Line 904  for (;;)
904          goto TAIL_RECURSE;          goto TAIL_RECURSE;
905          }          }
906        }        }
907      else                         /* Condition false & no 2nd alternative */      else                         /* Condition false & no alternative */
908        {        {
909        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
910        }        }
# Line 846  for (;;) Line 924  for (;;)
924        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
925        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
926          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
927          offset_top = rec->offset_top;
928        mstart = rec->save_start;        mstart = rec->save_start;
929        ims = original_ims;        ims = original_ims;
930        ecode = rec->after_call;        ecode = rec->after_call;
931        break;        break;
932        }        }
933    
934      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
935      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
936        the subject. In both cases, backtracking will then try other alternatives,
937        if any. */
938    
939        if (eptr == mstart &&
940            (md->notempty ||
941              (md->notempty_atstart &&
942                mstart == md->start_subject + md->start_offset)))
943          RRETURN(MATCH_NOMATCH);
944    
945        /* Otherwise, we have a match. */
946    
     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);  
947      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
948      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
949      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
# Line 946  for (;;) Line 1034  for (;;)
1034        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1035        }        }
1036    
1037      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1038    
1039        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1040      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1041      break;      break;
1042    
# Line 1027  for (;;) Line 1116  for (;;)
1116        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1117              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1118        new_recursive.save_start = mstart;        new_recursive.save_start = mstart;
1119          new_recursive.offset_top = offset_top;
1120        mstart = eptr;        mstart = eptr;
1121    
1122        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
# Line 1049  for (;;) Line 1139  for (;;)
1139          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1140            {            {
1141            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1142              if (new_recursive.offset_save != stacksave)
1143                (pcre_free)(new_recursive.offset_save);
1144            RRETURN(rrc);            RRETURN(rrc);
1145            }            }
1146    
# Line 1148  for (;;) Line 1240  for (;;)
1240      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1241      break;      break;
1242    
1243      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1244      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1245      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1246      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1247      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1248    
1249      case OP_BRAZERO:      case OP_BRAZERO:
1250        {        {
# Line 1174  for (;;) Line 1266  for (;;)
1266        }        }
1267      break;      break;
1268    
1269        case OP_SKIPZERO:
1270          {
1271          next = ecode+1;
1272          do next += GET(next,1); while (*next == OP_ALT);
1273          ecode = next + 1 + LINK_SIZE;
1274          }
1275        break;
1276    
1277      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1278    
1279      case OP_KET:      case OP_KET:
# Line 1215  for (;;) Line 1315  for (;;)
1315        {        {
1316        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1317        offset = number << 1;        offset = number << 1;
1318    
1319  #ifdef DEBUG  #ifdef DEBUG
1320        printf("end bracket %d", number);        printf("end bracket %d", number);
1321        printf("\n");        printf("\n");
# Line 1241  for (;;) Line 1341  for (;;)
1341          mstart = rec->save_start;          mstart = rec->save_start;
1342          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1343            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1344            offset_top = rec->offset_top;
1345          ecode = rec->after_call;          ecode = rec->after_call;
1346          ims = original_ims;          ims = original_ims;
1347          break;          break;
# Line 1380  for (;;) Line 1481  for (;;)
1481    
1482        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1483        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1484        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1485          partial matching. */
1486    
1487  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1488        if (utf8)        if (utf8)
1489          {          {
1490          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1491            {            {
1492            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1493            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1494              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1495            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1496            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1497            }            }
1498          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1499              {
1500              SCHECK_PARTIAL();
1501              cur_is_word = FALSE;
1502              }
1503            else
1504            {            {
1505            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1506            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1401  for (;;) Line 1509  for (;;)
1509        else        else
1510  #endif  #endif
1511    
1512        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1513    
1514          {          {
1515          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1516            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1517          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1518            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1519              }
1520            if (eptr >= md->end_subject)
1521              {
1522              SCHECK_PARTIAL();
1523              cur_is_word = FALSE;
1524              }
1525            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1526          }          }
1527    
1528        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 1421  for (;;) Line 1536  for (;;)
1536      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1537    
1538      case OP_ANY:      case OP_ANY:
1539      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1540        /* Fall through */
1541    
1542        case OP_ALLANY:
1543        if (eptr++ >= md->end_subject)
1544        {        {
1545        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1546          RRETURN(MATCH_NOMATCH);
1547        }        }
1548      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1549      ecode++;      ecode++;
1550      break;      break;
1551    
# Line 1435  for (;;) Line 1553  for (;;)
1553      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1554    
1555      case OP_ANYBYTE:      case OP_ANYBYTE:
1556      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1557          {
1558          SCHECK_PARTIAL();
1559          RRETURN(MATCH_NOMATCH);
1560          }
1561      ecode++;      ecode++;
1562      break;      break;
1563    
1564      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1565      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1566          {
1567          SCHECK_PARTIAL();
1568          RRETURN(MATCH_NOMATCH);
1569          }
1570      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1571      if (      if (
1572  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1453  for (;;) Line 1579  for (;;)
1579      break;      break;
1580    
1581      case OP_DIGIT:      case OP_DIGIT:
1582      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1583          {
1584          SCHECK_PARTIAL();
1585          RRETURN(MATCH_NOMATCH);
1586          }
1587      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1588      if (      if (
1589  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1466  for (;;) Line 1596  for (;;)
1596      break;      break;
1597    
1598      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1599      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1600          {
1601          SCHECK_PARTIAL();
1602          RRETURN(MATCH_NOMATCH);
1603          }
1604      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1605      if (      if (
1606  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1479  for (;;) Line 1613  for (;;)
1613      break;      break;
1614    
1615      case OP_WHITESPACE:      case OP_WHITESPACE:
1616      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1617          {
1618          SCHECK_PARTIAL();
1619          RRETURN(MATCH_NOMATCH);
1620          }
1621      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1622      if (      if (
1623  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1492  for (;;) Line 1630  for (;;)
1630      break;      break;
1631    
1632      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1633      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1634          {
1635          SCHECK_PARTIAL();
1636          RRETURN(MATCH_NOMATCH);
1637          }
1638      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1639      if (      if (
1640  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1505  for (;;) Line 1647  for (;;)
1647      break;      break;
1648    
1649      case OP_WORDCHAR:      case OP_WORDCHAR:
1650      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1651          {
1652          SCHECK_PARTIAL();
1653          RRETURN(MATCH_NOMATCH);
1654          }
1655      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1656      if (      if (
1657  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1518  for (;;) Line 1664  for (;;)
1664      break;      break;
1665    
1666      case OP_ANYNL:      case OP_ANYNL:
1667      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1668          {
1669          SCHECK_PARTIAL();
1670          RRETURN(MATCH_NOMATCH);
1671          }
1672      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1673      switch(c)      switch(c)
1674        {        {
# Line 1542  for (;;) Line 1692  for (;;)
1692      break;      break;
1693    
1694      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
1695      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1696          {
1697          SCHECK_PARTIAL();
1698          RRETURN(MATCH_NOMATCH);
1699          }
1700      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1701      switch(c)      switch(c)
1702        {        {
# Line 1572  for (;;) Line 1726  for (;;)
1726      break;      break;
1727    
1728      case OP_HSPACE:      case OP_HSPACE:
1729      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1730          {
1731          SCHECK_PARTIAL();
1732          RRETURN(MATCH_NOMATCH);
1733          }
1734      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1735      switch(c)      switch(c)
1736        {        {
# Line 1602  for (;;) Line 1760  for (;;)
1760      break;      break;
1761    
1762      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
1763      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1764          {
1765          SCHECK_PARTIAL();
1766          RRETURN(MATCH_NOMATCH);
1767          }
1768      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1769      switch(c)      switch(c)
1770        {        {
# Line 1620  for (;;) Line 1782  for (;;)
1782      break;      break;
1783    
1784      case OP_VSPACE:      case OP_VSPACE:
1785      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1786          {
1787          SCHECK_PARTIAL();
1788          RRETURN(MATCH_NOMATCH);
1789          }
1790      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1791      switch(c)      switch(c)
1792        {        {
# Line 1643  for (;;) Line 1809  for (;;)
1809    
1810      case OP_PROP:      case OP_PROP:
1811      case OP_NOTPROP:      case OP_NOTPROP:
1812      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1813          {
1814          SCHECK_PARTIAL();
1815          RRETURN(MATCH_NOMATCH);
1816          }
1817      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1818        {        {
1819        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1820    
1821        switch(ecode[1])        switch(ecode[1])
1822          {          {
# Line 1656  for (;;) Line 1825  for (;;)
1825          break;          break;
1826    
1827          case PT_LAMP:          case PT_LAMP:
1828          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1829               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1830               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1831            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1832           break;           break;
1833    
1834          case PT_GC:          case PT_GC:
1835          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1836            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1837          break;          break;
1838    
1839          case PT_PC:          case PT_PC:
1840          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1841            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1842          break;          break;
1843    
1844          case PT_SC:          case PT_SC:
1845          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1846            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1847          break;          break;
1848    
# Line 1689  for (;;) Line 1858  for (;;)
1858      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
1859    
1860      case OP_EXTUNI:      case OP_EXTUNI:
1861      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1862          {
1863          SCHECK_PARTIAL();
1864          RRETURN(MATCH_NOMATCH);
1865          }
1866      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1867        {        {
1868        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1869        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1870        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1871          {          {
# Line 1702  for (;;) Line 1874  for (;;)
1874            {            {
1875            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1876            }            }
1877          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1878          if (category != ucp_M) break;          if (category != ucp_M) break;
1879          eptr += len;          eptr += len;
1880          }          }
# Line 1723  for (;;) Line 1895  for (;;)
1895      case OP_REF:      case OP_REF:
1896        {        {
1897        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1898        ecode += 3;                                 /* Advance past item */        ecode += 3;
1899    
1900          /* If the reference is unset, there are two possibilities:
1901    
1902          (a) In the default, Perl-compatible state, set the length to be longer
1903          than the amount of subject left; this ensures that every attempt at a
1904          match fails. We can't just fail here, because of the possibility of
1905          quantifiers with zero minima.
1906    
1907        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
1908        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
1909        can't just fail here, because of the possibility of quantifiers with zero  
1910        minima. */        Otherwise, set the length to the length of what was matched by the
1911          referenced subpattern. */
1912        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
1913          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
1914          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1915          else
1916            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1917    
1918        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1919    
# Line 1761  for (;;) Line 1942  for (;;)
1942          break;          break;
1943    
1944          default:               /* No repeat follows */          default:               /* No repeat follows */
1945          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1946              {
1947              CHECK_PARTIAL();
1948              RRETURN(MATCH_NOMATCH);
1949              }
1950          eptr += length;          eptr += length;
1951          continue;              /* With the main loop */          continue;              /* With the main loop */
1952          }          }
# Line 1777  for (;;) Line 1962  for (;;)
1962    
1963        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
1964          {          {
1965          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1966              {
1967              CHECK_PARTIAL();
1968              RRETURN(MATCH_NOMATCH);
1969              }
1970          eptr += length;          eptr += length;
1971          }          }
1972    
# Line 1794  for (;;) Line 1983  for (;;)
1983            {            {
1984            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1985            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1986            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) RRETURN(MATCH_NOMATCH);
1987              if (!match_ref(offset, eptr, length, md, ims))
1988                {
1989                CHECK_PARTIAL();
1990              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1991                }
1992            eptr += length;            eptr += length;
1993            }            }
1994          /* Control never gets here */          /* Control never gets here */
# Line 1822  for (;;) Line 2015  for (;;)
2015        }        }
2016      /* Control never gets here */      /* Control never gets here */
2017    
   
   
2018      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2019      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2020      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1878  for (;;) Line 2069  for (;;)
2069          {          {
2070          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2071            {            {
2072            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2073                {
2074                SCHECK_PARTIAL();
2075                RRETURN(MATCH_NOMATCH);
2076                }
2077            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2078            if (c > 255)            if (c > 255)
2079              {              {
# Line 1896  for (;;) Line 2091  for (;;)
2091          {          {
2092          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2093            {            {
2094            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2095                {
2096                SCHECK_PARTIAL();
2097                RRETURN(MATCH_NOMATCH);
2098                }
2099            c = *eptr++;            c = *eptr++;
2100            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2101            }            }
# Line 1920  for (;;) Line 2119  for (;;)
2119              {              {
2120              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2121              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2122              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2123                if (eptr >= md->end_subject)
2124                  {
2125                  SCHECK_PARTIAL();
2126                  RRETURN(MATCH_NOMATCH);
2127                  }
2128              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2129              if (c > 255)              if (c > 255)
2130                {                {
# Line 1940  for (;;) Line 2144  for (;;)
2144              {              {
2145              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2146              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2147              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2148                if (eptr >= md->end_subject)
2149                  {
2150                  SCHECK_PARTIAL();
2151                  RRETURN(MATCH_NOMATCH);
2152                  }
2153              c = *eptr++;              c = *eptr++;
2154              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2155              }              }
# Line 2007  for (;;) Line 2216  for (;;)
2216    
2217    
2218      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2219      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2220        mode, because Unicode properties are supported in non-UTF-8 mode. */
2221    
2222  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2223      case OP_XCLASS:      case OP_XCLASS:
# Line 2048  for (;;) Line 2258  for (;;)
2258    
2259        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2260          {          {
2261          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2262          GETCHARINC(c, eptr);            {
2263              SCHECK_PARTIAL();
2264              RRETURN(MATCH_NOMATCH);
2265              }
2266            GETCHARINCTEST(c, eptr);
2267          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2268          }          }
2269    
# Line 2067  for (;;) Line 2281  for (;;)
2281            {            {
2282            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2283            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2284            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
2285            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2286                {
2287                SCHECK_PARTIAL();
2288                RRETURN(MATCH_NOMATCH);
2289                }
2290              GETCHARINCTEST(c, eptr);
2291            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2292            }            }
2293          /* Control never gets here */          /* Control never gets here */
# Line 2083  for (;;) Line 2302  for (;;)
2302            {            {
2303            int len = 1;            int len = 1;
2304            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2305            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2306            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2307            eptr += len;            eptr += len;
2308            }            }
# Line 2110  for (;;) Line 2329  for (;;)
2329        length = 1;        length = 1;
2330        ecode++;        ecode++;
2331        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2332        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2333            {
2334            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2335            RRETURN(MATCH_NOMATCH);
2336            }
2337        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2338        }        }
2339      else      else
# Line 2118  for (;;) Line 2341  for (;;)
2341    
2342      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2343        {        {
2344        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2345            {
2346            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2347            RRETURN(MATCH_NOMATCH);
2348            }
2349        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2350        ecode += 2;        ecode += 2;
2351        }        }
# Line 2134  for (;;) Line 2361  for (;;)
2361        ecode++;        ecode++;
2362        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2363    
2364        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2365            {
2366            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2367            RRETURN(MATCH_NOMATCH);
2368            }
2369    
2370        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2371        can use the fast lookup table. */        can use the fast lookup table. */
# Line 2158  for (;;) Line 2389  for (;;)
2389          if (fc != dc)          if (fc != dc)
2390            {            {
2391  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2392            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2393  #endif  #endif
2394              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2395            }            }
# Line 2169  for (;;) Line 2400  for (;;)
2400    
2401      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2402        {        {
2403        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2404            {
2405            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2406            RRETURN(MATCH_NOMATCH);
2407            }
2408        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2409        ecode += 2;        ecode += 2;
2410        }        }
# Line 2223  for (;;) Line 2458  for (;;)
2458      case OP_MINQUERY:      case OP_MINQUERY:
2459      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2460      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2461    
2462      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2463      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2464      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2465    
2466      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2467    
2468      REPEATCHAR:      REPEATCHAR:
2469  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2238  for (;;) Line 2472  for (;;)
2472        length = 1;        length = 1;
2473        charptr = ecode;        charptr = ecode;
2474        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2475        ecode += length;        ecode += length;
2476    
2477        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2249  for (;;) Line 2482  for (;;)
2482  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2483          unsigned int othercase;          unsigned int othercase;
2484          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2485              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2486            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2487          else oclength = 0;          else oclength = 0;
2488  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2489    
2490          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2491            {            {
2492            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2493                memcmp(eptr, charptr, length) == 0) eptr += length;
2494  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2495            /* Need braces because of following else */            else if (oclength > 0 &&
2496            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2497                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2498    #endif  /* SUPPORT_UCP */
2499            else            else
2500              {              {
2501              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2502              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2503              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2504            }            }
2505    
2506          if (min == max) continue;          if (min == max) continue;
# Line 2278  for (;;) Line 2511  for (;;)
2511              {              {
2512              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2513              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2514              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2515              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2516                  memcmp(eptr, charptr, length) == 0) eptr += length;
2517  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2518              /* Need braces because of following else */              else if (oclength > 0 &&
2519              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2520                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2521    #endif  /* SUPPORT_UCP */
2522              else              else
2523                {                {
2524                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2525                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2526                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2527              }              }
2528            /* Control never gets here */            /* Control never gets here */
2529            }            }
# Line 2300  for (;;) Line 2533  for (;;)
2533            pp = eptr;            pp = eptr;
2534            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2535              {              {
2536              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2537              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2538  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2539              else if (oclength == 0) break;              else if (oclength > 0 &&
2540              else                       eptr <= md->end_subject - oclength &&
2541                {                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
               if (memcmp(eptr, occhars, oclength) != 0) break;  
               eptr += oclength;  
               }  
 #else   /* without SUPPORT_UCP */  
             else break;  
2542  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2543                else break;
2544              }              }
2545    
2546            if (possessive) continue;            if (possessive) continue;
2547    
2548            for(;;)            for(;;)
2549             {              {
2550             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2551             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2552             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2553  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2554             eptr--;              eptr--;
2555             BACKCHAR(eptr);              BACKCHAR(eptr);
2556  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2557             eptr -= length;              eptr -= length;
2558  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2559             }              }
2560            }            }
2561          /* Control never gets here */          /* Control never gets here */
2562          }          }
# Line 2339  for (;;) Line 2569  for (;;)
2569  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2570    
2571      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2572        {  
2573        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2574    
2575      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2576      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2360  for (;;) Line 2588  for (;;)
2588        {        {
2589        fc = md->lcc[fc];        fc = md->lcc[fc];
2590        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2591            {
2592            if (eptr >= md->end_subject)
2593              {
2594              SCHECK_PARTIAL();
2595              RRETURN(MATCH_NOMATCH);
2596              }
2597          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2598            }
2599        if (min == max) continue;        if (min == max) continue;
2600        if (minimize)        if (minimize)
2601          {          {
# Line 2368  for (;;) Line 2603  for (;;)
2603            {            {
2604            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2605            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2606            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
2607                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2608                {
2609                SCHECK_PARTIAL();
2610              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2611                }
2612              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2613            }            }
2614          /* Control never gets here */          /* Control never gets here */
2615          }          }
# Line 2382  for (;;) Line 2621  for (;;)
2621            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2622            eptr++;            eptr++;
2623            }            }
2624    
2625          if (possessive) continue;          if (possessive) continue;
2626    
2627          while (eptr >= pp)          while (eptr >= pp)
2628            {            {
2629            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
# Line 2398  for (;;) Line 2639  for (;;)
2639    
2640      else      else
2641        {        {
2642        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2643            {
2644            if (eptr >= md->end_subject)
2645              {
2646              SCHECK_PARTIAL();
2647              RRETURN(MATCH_NOMATCH);
2648              }
2649            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2650            }
2651    
2652        if (min == max) continue;        if (min == max) continue;
2653    
2654        if (minimize)        if (minimize)
2655          {          {
2656          for (fi = min;; fi++)          for (fi = min;; fi++)
2657            {            {
2658            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2659            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2660            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) RRETURN(MATCH_NOMATCH);
2661              if (eptr >= md->end_subject)
2662                {
2663                SCHECK_PARTIAL();
2664              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2665                }
2666              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2667            }            }
2668          /* Control never gets here */          /* Control never gets here */
2669          }          }
# Line 2420  for (;;) Line 2676  for (;;)
2676            eptr++;            eptr++;
2677            }            }
2678          if (possessive) continue;          if (possessive) continue;
2679    
2680          while (eptr >= pp)          while (eptr >= pp)
2681            {            {
2682            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
# Line 2435  for (;;) Line 2692  for (;;)
2692      checking can be multibyte. */      checking can be multibyte. */
2693    
2694      case OP_NOT:      case OP_NOT:
2695      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2696          {
2697          SCHECK_PARTIAL();
2698          RRETURN(MATCH_NOMATCH);
2699          }
2700      ecode++;      ecode++;
2701      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2702      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2512  for (;;) Line 2773  for (;;)
2773      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2774      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2775    
2776      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2777    
2778      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2779      fc = *ecode++;      fc = *ecode++;
2780    
2781      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2542  for (;;) Line 2800  for (;;)
2800          register unsigned int d;          register unsigned int d;
2801          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2802            {            {
2803              if (eptr >= md->end_subject)
2804                {
2805                SCHECK_PARTIAL();
2806                RRETURN(MATCH_NOMATCH);
2807                }
2808            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2809            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
2810            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2553  for (;;) Line 2816  for (;;)
2816        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2817          {          {
2818          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2819              {
2820              if (eptr >= md->end_subject)
2821                {
2822                SCHECK_PARTIAL();
2823                RRETURN(MATCH_NOMATCH);
2824                }
2825            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2826              }
2827          }          }
2828    
2829        if (min == max) continue;        if (min == max) continue;
# Line 2569  for (;;) Line 2839  for (;;)
2839              {              {
2840              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2841              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2842                if (fi >= max) RRETURN(MATCH_NOMATCH);
2843                if (eptr >= md->end_subject)
2844                  {
2845                  SCHECK_PARTIAL();
2846                  RRETURN(MATCH_NOMATCH);
2847                  }
2848              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2849              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2850              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2851              }              }
2852            }            }
2853          else          else
# Line 2583  for (;;) Line 2858  for (;;)
2858              {              {
2859              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2860              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2861              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) RRETURN(MATCH_NOMATCH);
2862                if (eptr >= md->end_subject)
2863                  {
2864                  SCHECK_PARTIAL();
2865                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2866                  }
2867                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2868              }              }
2869            }            }
2870          /* Control never gets here */          /* Control never gets here */
# Line 2653  for (;;) Line 2933  for (;;)
2933          register unsigned int d;          register unsigned int d;
2934          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2935            {            {
2936              if (eptr >= md->end_subject)
2937                {
2938                SCHECK_PARTIAL();
2939                RRETURN(MATCH_NOMATCH);
2940                }
2941            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2942            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
2943            }            }
# Line 2662  for (;;) Line 2947  for (;;)
2947        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2948          {          {
2949          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2950              {
2951              if (eptr >= md->end_subject)
2952                {
2953                SCHECK_PARTIAL();
2954                RRETURN(MATCH_NOMATCH);
2955                }
2956            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2957              }
2958          }          }
2959    
2960        if (min == max) continue;        if (min == max) continue;
# Line 2678  for (;;) Line 2970  for (;;)
2970              {              {
2971              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2972              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2973              GETCHARINC(d, eptr);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2974              if (fi >= max || eptr >= md->end_subject || fc == d)              if (eptr >= md->end_subject)
2975                  {
2976                  SCHECK_PARTIAL();
2977                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2978                  }
2979                GETCHARINC(d, eptr);
2980                if (fc == d) RRETURN(MATCH_NOMATCH);
2981              }              }
2982            }            }
2983          else          else
# Line 2691  for (;;) Line 2988  for (;;)
2988              {              {
2989              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2990              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2991              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) RRETURN(MATCH_NOMATCH);
2992                if (eptr >= md->end_subject)
2993                  {
2994                  SCHECK_PARTIAL();
2995                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2996                  }
2997                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2998              }              }
2999            }            }
3000          /* Control never gets here */          /* Control never gets here */
# Line 2826  for (;;) Line 3128  for (;;)
3128    
3129      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3130      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3131      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3132      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3133      and single-bytes. */      and single-bytes. */
3134    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3135      if (min > 0)      if (min > 0)
3136        {        {
3137  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2844  for (;;) Line 3143  for (;;)
3143            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3144            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3145              {              {
3146              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3147                  {
3148                  SCHECK_PARTIAL();
3149                  RRETURN(MATCH_NOMATCH);
3150                  }
3151              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3152              }              }
3153            break;            break;
# Line 2852  for (;;) Line 3155  for (;;)
3155            case PT_LAMP:            case PT_LAMP:
3156            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3157              {              {
3158              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3159                  {
3160                  SCHECK_PARTIAL();
3161                  RRETURN(MATCH_NOMATCH);
3162                  }
3163              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3164              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3165              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3166                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3167                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2865  for (;;) Line 3172  for (;;)
3172            case PT_GC:            case PT_GC:
3173            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3174              {              {
3175              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3176                  {
3177                  SCHECK_PARTIAL();
3178                  RRETURN(MATCH_NOMATCH);
3179                  }
3180              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3181              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3182              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3183                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3184              }              }
# Line 2876  for (;;) Line 3187  for (;;)
3187            case PT_PC:            case PT_PC:
3188            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3189              {              {
3190              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3191                  {
3192                  SCHECK_PARTIAL();
3193                  RRETURN(MATCH_NOMATCH);
3194                  }
3195              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3196              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3197              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3198                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3199              }              }
# Line 2887  for (;;) Line 3202  for (;;)
3202            case PT_SC:            case PT_SC:
3203            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3204              {              {
3205              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3206                  {
3207                  SCHECK_PARTIAL();
3208                  RRETURN(MATCH_NOMATCH);
3209                  }
3210              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3211              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3212              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3213                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3214              }              }
# Line 2907  for (;;) Line 3226  for (;;)
3226          {          {
3227          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3228            {            {
3229              if (eptr >= md->end_subject)
3230                {
3231                SCHECK_PARTIAL();
3232                RRETURN(MATCH_NOMATCH);
3233                }
3234            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3235            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3236            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3237            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3238              {              {
3239              int len = 1;              int len = 1;
3240              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3241                {                else { GETCHARLEN(c, eptr, len); }
3242                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3243              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3244              eptr += len;              eptr += len;
3245              }              }
# Line 2935  for (;;) Line 3257  for (;;)
3257          case OP_ANY:          case OP_ANY:
3258          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3259            {            {
3260            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3261                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3262                SCHECK_PARTIAL();
3263              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3264                }
3265              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3266              eptr++;
3267              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3268              }
3269            break;
3270    
3271            case OP_ALLANY:
3272            for (i = 1; i <= min; i++)
3273              {
3274              if (eptr >= md->end_subject)
3275                {
3276                SCHECK_PARTIAL();
3277                RRETURN(MATCH_NOMATCH);
3278                }
3279            eptr++;            eptr++;
3280            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3281            }            }
3282          break;          break;
3283    
3284          case OP_ANYBYTE:          case OP_ANYBYTE:
3285            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3286          eptr += min;          eptr += min;
3287          break;          break;
3288    
3289          case OP_ANYNL:          case OP_ANYNL:
3290          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3291            {            {
3292            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3293                {
3294                SCHECK_PARTIAL();
3295                RRETURN(MATCH_NOMATCH);
3296                }
3297            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3298            switch(c)            switch(c)
3299              {              {
# Line 2976  for (;;) Line 3319  for (;;)
3319          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3320          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3321            {            {
3322            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3323                {
3324                SCHECK_PARTIAL();
3325                RRETURN(MATCH_NOMATCH);
3326                }
3327            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3328            switch(c)            switch(c)
3329              {              {
# Line 3008  for (;;) Line 3355  for (;;)
3355          case OP_HSPACE:          case OP_HSPACE:
3356          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3357            {            {
3358            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3359                {
3360                SCHECK_PARTIAL();
3361                RRETURN(MATCH_NOMATCH);
3362                }
3363            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3364            switch(c)            switch(c)
3365              {              {
# Line 3040  for (;;) Line 3391  for (;;)
3391          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3392          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3393            {            {
3394            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3395                {
3396                SCHECK_PARTIAL();
3397                RRETURN(MATCH_NOMATCH);
3398                }
3399            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3400            switch(c)            switch(c)
3401              {              {
# Line 3060  for (;;) Line 3415  for (;;)
3415          case OP_VSPACE:          case OP_VSPACE:
3416          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3417            {            {
3418            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3419                {
3420                SCHECK_PARTIAL();
3421                RRETURN(MATCH_NOMATCH);
3422                }
3423            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3424            switch(c)            switch(c)
3425              {              {
# Line 3080  for (;;) Line 3439  for (;;)
3439          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3440          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3441            {            {
3442            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3443                {
3444                SCHECK_PARTIAL();
3445                RRETURN(MATCH_NOMATCH);
3446                }
3447            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3448            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3449              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 3090  for (;;) Line 3453  for (;;)
3453          case OP_DIGIT:          case OP_DIGIT:
3454          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3455            {            {
3456            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3457               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3458                SCHECK_PARTIAL();
3459                RRETURN(MATCH_NOMATCH);
3460                }
3461              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3462              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3463            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3464            }            }
# Line 3100  for (;;) Line 3467  for (;;)
3467          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3468          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3469            {            {
3470            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3471               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))              {
3472                SCHECK_PARTIAL();
3473                RRETURN(MATCH_NOMATCH);
3474                }
3475              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3476              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3477            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3478            }            }
# Line 3110  for (;;) Line 3481  for (;;)
3481          case OP_WHITESPACE:          case OP_WHITESPACE:
3482          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3483            {            {
3484            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3485               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3486                SCHECK_PARTIAL();
3487                RRETURN(MATCH_NOMATCH);
3488                }
3489              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3490              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3491            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3492            }            }
# Line 3130  for (;;) Line 3505  for (;;)
3505          case OP_WORDCHAR:          case OP_WORDCHAR:
3506          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3507            {            {
3508            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3509               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3510                SCHECK_PARTIAL();
3511                RRETURN(MATCH_NOMATCH);
3512                }
3513              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3514              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3515            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3516            }            }
# Line 3145  for (;;) Line 3524  for (;;)
3524  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3525    
3526        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3527        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3528    
3529        switch(ctype)        switch(ctype)
3530          {          {
3531          case OP_ANY:          case OP_ANY:
3532          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3533            {            {
3534            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3535              {              {
3536              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3537              eptr++;              RRETURN(MATCH_NOMATCH);
3538              }              }
3539              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3540              eptr++;
3541            }            }
         else eptr += min;  
3542          break;          break;
3543    
3544          case OP_ANYBYTE:          case OP_ALLANY:
3545            if (eptr > md->end_subject - min)
3546              {
3547              SCHECK_PARTIAL();
3548              RRETURN(MATCH_NOMATCH);
3549              }
3550          eptr += min;          eptr += min;
3551          break;          break;
3552    
3553          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
3554          bytes are present in this case. */          if (eptr > md->end_subject - min)
3555              {
3556              SCHECK_PARTIAL();
3557              RRETURN(MATCH_NOMATCH);
3558              }
3559            eptr += min;
3560            break;
3561    
3562          case OP_ANYNL:          case OP_ANYNL:
3563          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3564            {            {
3565            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3566                {
3567                SCHECK_PARTIAL();
3568                RRETURN(MATCH_NOMATCH);
3569                }
3570            switch(*eptr++)            switch(*eptr++)
3571              {              {
3572              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3194  for (;;) Line 3588  for (;;)
3588          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3589          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3590            {            {
3591            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3592                {
3593                SCHECK_PARTIAL();
3594                RRETURN(MATCH_NOMATCH);
3595                }
3596            switch(*eptr++)            switch(*eptr++)
3597              {              {
3598              default: break;              default: break;
# Line 3209  for (;;) Line 3607  for (;;)
3607          case OP_HSPACE:          case OP_HSPACE:
3608          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3609            {            {
3610            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3611                {
3612                SCHECK_PARTIAL();
3613                RRETURN(MATCH_NOMATCH);
3614                }
3615            switch(*eptr++)            switch(*eptr++)
3616              {              {
3617              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3224  for (;;) Line 3626  for (;;)
3626          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3627          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3628            {            {
3629            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3630                {
3631                SCHECK_PARTIAL();
3632                RRETURN(MATCH_NOMATCH);
3633                }
3634            switch(*eptr++)            switch(*eptr++)
3635              {              {
3636              default: break;              default: break;
# Line 3241  for (;;) Line 3647  for (;;)
3647          case OP_VSPACE:          case OP_VSPACE:
3648          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3649            {            {
3650            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3651                {
3652                SCHECK_PARTIAL();
3653                RRETURN(MATCH_NOMATCH);
3654                }
3655            switch(*eptr++)            switch(*eptr++)
3656              {              {
3657              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3257  for (;;) Line 3667  for (;;)
3667    
3668          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3669          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3670              {
3671              if (eptr >= md->end_subject)
3672                {
3673                SCHECK_PARTIAL();
3674                RRETURN(MATCH_NOMATCH);
3675                }
3676            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3677              }
3678          break;          break;
3679    
3680          case OP_DIGIT:          case OP_DIGIT:
3681          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3682              {
3683              if (eptr >= md->end_subject)
3684                {
3685                SCHECK_PARTIAL();
3686                RRETURN(MATCH_NOMATCH);
3687                }
3688            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3689              }
3690          break;          break;
3691    
3692          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3693          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3694              {
3695              if (eptr >= md->end_subject)
3696                {
3697                SCHECK_PARTIAL();
3698                RRETURN(MATCH_NOMATCH);
3699                }
3700            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3701              }
3702          break;          break;
3703    
3704          case OP_WHITESPACE:          case OP_WHITESPACE:
3705          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3706              {
3707              if (eptr >= md->end_subject)
3708                {
3709                SCHECK_PARTIAL();
3710                RRETURN(MATCH_NOMATCH);
3711                }
3712            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3713              }
3714          break;          break;
3715    
3716          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3717          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3718              {
3719              if (eptr >= md->end_subject)
3720                {
3721                SCHECK_PARTIAL();
3722                RRETURN(MATCH_NOMATCH);
3723                }
3724            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3725              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3726              }
3727          break;          break;
3728    
3729          case OP_WORDCHAR:          case OP_WORDCHAR:
3730          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3731              {
3732              if (eptr >= md->end_subject)
3733                {
3734                SCHECK_PARTIAL();
3735                RRETURN(MATCH_NOMATCH);
3736                }
3737            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3738              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3739              }
3740          break;          break;
3741    
3742          default:          default:
# Line 3312  for (;;) Line 3764  for (;;)
3764              {              {
3765              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3766              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3767              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3768                if (eptr >= md->end_subject)
3769                  {
3770                  SCHECK_PARTIAL();
3771                  RRETURN(MATCH_NOMATCH);
3772                  }
3773              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3774              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3775              }              }
# Line 3323  for (;;) Line 3780  for (;;)
3780              {              {
3781              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3782              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3783              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3784                if (eptr >= md->end_subject)
3785                  {
3786                  SCHECK_PARTIAL();
3787                  RRETURN(MATCH_NOMATCH);
3788                  }
3789              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3790              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3791              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3792                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3793                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3338  for (;;) Line 3800  for (;;)
3800              {              {
3801              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3802              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3803              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3804                if (eptr >= md->end_subject)
3805                  {
3806                  SCHECK_PARTIAL();
3807                  RRETURN(MATCH_NOMATCH);
3808                  }
3809              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3810              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3811              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3812                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3813              }              }
# Line 3351  for (;;) Line 3818  for (;;)
3818              {              {
3819              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3820              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3821              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3822                if (eptr >= md->end_subject)
3823                  {
3824                  SCHECK_PARTIAL();
3825                  RRETURN(MATCH_NOMATCH);
3826                  }
3827              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3828              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3829              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3830                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3831              }              }
# Line 3364  for (;;) Line 3836  for (;;)
3836              {              {
3837              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3838              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3839              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3840                if (eptr >= md->end_subject)
3841                  {
3842                  SCHECK_PARTIAL();
3843                  RRETURN(MATCH_NOMATCH);
3844                  }
3845              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3846              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3847              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3848                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3849              }              }
# Line 3386  for (;;) Line 3863  for (;;)
3863            {            {
3864            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3865            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3866            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3867              if (eptr >= md->end_subject)
3868                {
3869                SCHECK_PARTIAL();
3870                RRETURN(MATCH_NOMATCH);
3871                }
3872            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3873            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3874            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3875            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3876              {              {
3877              int len = 1;              int len = 1;
3878              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3879                {                else { GETCHARLEN(c, eptr, len); }
3880                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3881              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3882              eptr += len;              eptr += len;
3883              }              }
# Line 3415  for (;;) Line 3895  for (;;)
3895            {            {
3896            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3897            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3898            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
3899                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&            if (eptr >= md->end_subject)
3900                  IS_NEWLINE(eptr)))              {
3901                SCHECK_PARTIAL();
3902                RRETURN(MATCH_NOMATCH);
3903                }
3904              if (ctype == OP_ANY && IS_NEWLINE(eptr))
3905              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
3906            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3907            switch(ctype)            switch(ctype)
3908              {              {
3909              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3910              break;              case OP_ALLANY:
   
3911              case OP_ANYBYTE:              case OP_ANYBYTE:
3912              break;              break;
3913    
# Line 3576  for (;;) Line 4058  for (;;)
4058            {            {
4059            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4060            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4061            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
4062                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
4063                {
4064                SCHECK_PARTIAL();
4065                RRETURN(MATCH_NOMATCH);
4066                }
4067              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4068              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
4069            c = *eptr++;            c = *eptr++;
4070            switch(ctype)            switch(ctype)
4071              {              {
4072              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
4073              break;              case OP_ALLANY:
   
4074              case OP_ANYBYTE:              case OP_ANYBYTE:
4075              break;              break;
4076    
# Line 3718  for (;;) Line 4203  for (;;)
4203              int len = 1;              int len = 1;
4204              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4205              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4206              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4207              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4208                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4209                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3733  for (;;) Line 4218  for (;;)
4218              int len = 1;              int len = 1;
4219              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4220              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4221              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4222              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4223                break;                break;
4224              eptr+= len;              eptr+= len;
# Line 3746  for (;;) Line 4231  for (;;)
4231              int len = 1;              int len = 1;
4232              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4233              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4234              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4235              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4236                break;                break;
4237              eptr+= len;              eptr+= len;
# Line 3759  for (;;) Line 4244  for (;;)
4244              int len = 1;              int len = 1;
4245              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4246              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4247              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
4248              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4249                break;                break;
4250              eptr+= len;              eptr+= len;
# Line 3788  for (;;) Line 4273  for (;;)
4273            {            {
4274            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
4275            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4276            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4277            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
4278            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4279              {              {
# Line 3797  for (;;) Line 4282  for (;;)
4282                {                {
4283                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4284                }                }
4285              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4286              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4287              eptr += len;              eptr += len;
4288              }              }
# Line 3819  for (;;) Line 4304  for (;;)
4304                BACKCHAR(eptr);                BACKCHAR(eptr);
4305                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4306                }                }
4307              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4308              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4309              eptr--;              eptr--;
4310              }              }
# Line 3839  for (;;) Line 4324  for (;;)
4324            case OP_ANY:            case OP_ANY:
4325            if (max < INT_MAX)            if (max < INT_MAX)
4326              {              {
4327              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
4328                {                {
4329                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4330                  {                eptr++;
4331                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
4332                }                }
4333              }              }
4334    
# Line 3863  for (;;) Line 4336  for (;;)
4336    
4337            else            else
4338              {              {
4339              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
4340                {                {
4341                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4342                  {                eptr++;
4343                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
4344                }                }
4345              else              }
4346              break;
4347    
4348              case OP_ALLANY:
4349              if (max < INT_MAX)
4350                {
4351                for (i = min; i < max; i++)
4352                {                {
4353                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
4354                  eptr++;
4355                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4356                }                }
4357              }              }
4358              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
4359            break;            break;
4360    
4361            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 4064  for (;;) Line 4543  for (;;)
4543          switch(ctype)          switch(ctype)
4544            {            {
4545            case OP_ANY:            case OP_ANY:
4546            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4547              {              {
4548              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4549                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4550              }              }
4551            /* For DOTALL case, fall through and treat as \C */            break;
4552    
4553              case OP_ALLANY:
4554            case OP_ANYBYTE:            case OP_ANYBYTE:
4555            c = max - min;            c = max - min;
4556            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4246  HEAP_RETURN: Line 4722  HEAP_RETURN:
4722  switch (frame->Xwhere)  switch (frame->Xwhere)
4723    {    {
4724    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4725    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4726    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4727    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4728    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(53) LBL(54)
4729    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)  #ifdef SUPPORT_UTF8
4730    LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4731      LBL(32) LBL(34) LBL(42) LBL(46)
4732    #ifdef SUPPORT_UCP
4733      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4734    #endif  /* SUPPORT_UCP */
4735    #endif  /* SUPPORT_UTF8 */
4736    default:    default:
4737    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4738    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 4343  Returns:          > 0 => success; value Line 4824  Returns:          > 0 => success; value
4824                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4825  */  */
4826    
4827  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4828  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4829    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4830    int offsetcount)    int offsetcount)
# Line 4367  const uschar *tables; Line 4848  const uschar *tables;
4848  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4849  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4850  USPTR end_subject;  USPTR end_subject;
4851    USPTR start_partial = NULL;
4852  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
4853    
4854  pcre_study_data internal_study;  pcre_study_data internal_study;
# Line 4445  end_subject = md->end_subject; Line 4927  end_subject = md->end_subject;
4927    
4928  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4929  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4930    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4931    
4932  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4933  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4934  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4935  md->partial = (options & PCRE_PARTIAL) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
4936    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
4937                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
4938  md->hitend = FALSE;  md->hitend = FALSE;
4939    
4940  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
# Line 4469  switch (options & (PCRE_BSR_ANYCRLF|PCRE Line 4954  switch (options & (PCRE_BSR_ANYCRLF|PCRE
4954    md->bsr_anycrlf = TRUE;    md->bsr_anycrlf = TRUE;
4955  #else  #else
4956    md->bsr_anycrlf = FALSE;    md->bsr_anycrlf = FALSE;
4957  #endif  #endif
4958    break;    break;
4959    
4960    case PCRE_BSR_ANYCRLF:    case PCRE_BSR_ANYCRLF:
# Line 4490  switch ((((options & PCRE_NEWLINE_BITS) Line 4975  switch ((((options & PCRE_NEWLINE_BITS)
4975          (pcre_uint32)options) & PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4976    {    {
4977    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4978    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
4979    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
4980    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4981         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
4982    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4983    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4984    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 4523  else Line 5008  else
5008      }      }
5009    }    }
5010    
5011  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching was originally supported only for a restricted set of
5012  moment. */  regexes; from release 8.00 there are no restrictions, but the bits are still
5013    defined (though never set). So there's no harm in leaving this code. */
5014    
5015  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5016    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
# Line 4535  back the character offset. */ Line 5021  back the character offset. */
5021  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5022  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5023    {    {
5024    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
5025      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
5026    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5027      {      {
5028      int tb = ((uschar *)subject)[start_offset];      int tb = ((USPTR)subject)[start_offset];
5029      if (tb > 127)      if (tb > 127)
5030        {        {
5031        tb &= 0xc0;        tb &= 0xc0;
# Line 4645  for(;;) Line 5131  for(;;)
5131      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
5132      }      }
5133    
5134    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* If firstline is TRUE, the start of the match is constrained to the first
5135    start of the match is constrained to the first line of a multiline string.    line of a multiline string. That is, the match must be before or at the first
5136    That is, the match must be before or at the first newline. Implement this by    newline. Implement this by temporarily adjusting end_subject so that we stop
5137    temporarily adjusting end_subject so that we stop scanning at a newline. If    scanning at a newline. If the match fails at the newline, later code breaks
5138    the match fails at the newline, later code breaks this loop. */    this loop. */
5139    
5140    if (firstline)    if (firstline)
5141      {      {
5142      USPTR t = start_match;      USPTR t = start_match;
5143    #ifdef SUPPORT_UTF8
5144        if (utf8)
5145          {
5146          while (t < md->end_subject && !IS_NEWLINE(t))
5147            {
5148            t++;
5149            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5150            }
5151          }
5152        else
5153    #endif
5154      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5155      end_subject = t;      end_subject = t;
5156      }      }
5157    
5158    /* Now test for a unique first byte */    /* There are some optimizations that avoid running the match if a known
5159      starting point is not found, or if a known later character is not present.
5160      However, there is an option that disables these, for testing and for ensuring
5161      that all callouts do actually occur. */
5162    
5163    if (first_byte >= 0)    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5164      {      {
5165      if (first_byte_caseless)      /* Advance to a unique first byte if there is one. */
5166        while (start_match < end_subject &&  
5167               md->lcc[*start_match] != first_byte)      if (first_byte >= 0)
5168          start_match++;        {
5169      else        if (first_byte_caseless)
5170        while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5171          start_match++;            start_match++;
5172      }        else
5173            while (start_match < end_subject && *start_match != first_byte)
5174              start_match++;
5175          }
5176    
5177    /* Or to just after a linebreak for a multiline match if possible */      /* Or to just after a linebreak for a multiline match */
5178    
5179    else if (startline)      else if (startline)
     {  
     if (start_match > md->start_subject + start_offset)  
5180        {        {
5181        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        if (start_match > md->start_subject + start_offset)
5182          start_match++;          {
5183    #ifdef SUPPORT_UTF8
5184            if (utf8)
5185              {
5186              while (start_match < end_subject && !WAS_NEWLINE(start_match))
5187                {
5188                start_match++;
5189                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5190                  start_match++;
5191                }
5192              }
5193            else
5194    #endif
5195            while (start_match < end_subject && !WAS_NEWLINE(start_match))
5196              start_match++;
5197    
5198        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5199        and we are now at a LF, advance the match position by one more character.          and we are now at a LF, advance the match position by one more character.
5200        */          */
5201    
5202        if (start_match[-1] == '\r' &&          if (start_match[-1] == CHAR_CR &&
5203             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5204             start_match < end_subject &&               start_match < end_subject &&
5205             *start_match == '\n')               *start_match == CHAR_NL)
5206          start_match++;            start_match++;
5207            }
5208        }        }
     }  
5209    
5210    /* Or to a non-unique first char after study */      /* Or to a non-unique first byte after study */
5211    
5212    else if (start_bits != NULL)      else if (start_bits != NULL)
     {  
     while (start_match < end_subject)  
5213        {        {
5214        register unsigned int c = *start_match;        while (start_match < end_subject)
5215        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;          {
5216            register unsigned int c = *start_match;
5217            if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
5218              else break;
5219            }
5220        }        }
5221      }      }   /* Starting optimizations */
5222    
5223    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5224    
# Line 4713  for(;;) Line 5230  for(;;)
5230    printf("\n");    printf("\n");
5231  #endif  #endif
5232    
5233    /* If req_byte is set, we know that that character must appear in the subject    /* If req_byte is set, we know that that character must appear in the
5234    for the match to succeed. If the first character is set, req_byte must be    subject for the match to succeed. If the first character is set, req_byte
5235    later in the subject; otherwise the test starts at the match point. This    must be later in the subject; otherwise the test starts at the match point.
5236    optimization can save a huge amount of backtracking in patterns with nested    This optimization can save a huge amount of backtracking in patterns with
5237    unlimited repeats that aren't going to match. Writing separate code for    nested unlimited repeats that aren't going to match. Writing separate code
5238    cased/caseless versions makes it go faster, as does using an autoincrement    for cased/caseless versions makes it go faster, as does using an
5239    and backing off on a match.    autoincrement and backing off on a match.
5240    
5241    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end
5242    take a long time, and give bad performance on quite ordinary patterns. This    can take a long time, and give bad performance on quite ordinary patterns.
5243    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte    This showed up when somebody was matching something like /^\d+C/ on a
5244    string... so we don't do this when the string is sufficiently long.    32-megabyte string... so we don't do this when the string is sufficiently
5245      long.
5246    
5247    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, or if
5248    */    disabling is explicitly requested. */
5249    
5250    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
5251          req_byte >= 0 &&
5252        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
5253        !md->partial)        !md->partial)
5254      {      {
# Line 4773  for(;;) Line 5292  for(;;)
5292        }        }
5293      }      }
5294    
5295    /* OK, we can now run the match. */    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5296      first starting point for which a partial match was found. */
5297    
5298    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
5299      md->start_used_ptr = start_match;
5300    md->match_call_count = 0;    md->match_call_count = 0;
5301    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
5302      if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
5303    
5304    switch(rc)    switch(rc)
5305      {      {
# Line 4807  for(;;) Line 5329  for(;;)
5329      rc = MATCH_NOMATCH;      rc = MATCH_NOMATCH;
5330      goto ENDLOOP;      goto ENDLOOP;
5331    
5332      /* Any other return is some kind of error. */      /* Any other return is either a match, or some kind of error. */
5333    
5334      default:      default:
5335      goto ENDLOOP;      goto ENDLOOP;
# Line 4837  for(;;) Line 5359  for(;;)
5359    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
5360    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
5361    
5362    if (start_match[-1] == '\r' &&    if (start_match[-1] == CHAR_CR &&
5363        start_match < end_subject &&        start_match < end_subject &&
5364        *start_match == '\n' &&        *start_match == CHAR_NL &&
5365        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
5366          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
5367           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||
# Line 4889  if (rc == MATCH_MATCH) Line 5411  if (rc == MATCH_MATCH)
5411    too many to fit into the vector. */    too many to fit into the vector. */
5412    
5413    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
5414    
5415    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of
5416    md->start_match_ptr might be modified if \K was encountered on the success    md->start_match_ptr might be modified if \K was encountered on the success
5417    matching path. */    matching path. */
# Line 4913  if (using_temporary_offsets) Line 5435  if (using_temporary_offsets)
5435    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
5436    }    }
5437    
5438  if (rc != MATCH_NOMATCH)  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
5439    {    {
5440    DPRINTF((">>>> error: returning %d\n", rc));    DPRINTF((">>>> error: returning %d\n", rc));
5441    return rc;    return rc;
5442    }    }
5443  else if (md->partial && md->hitend)  else if (start_partial != NULL)
5444    {    {
5445    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
5446      if (offsetcount > 1)
5447        {
5448        offsets[0] = start_partial - (USPTR)subject;
5449        offsets[1] = end_subject - (USPTR)subject;
5450        }
5451    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;
5452    }    }
5453  else  else

Legend:
Removed from v.231  
changed lines
  Added in v.446

  ViewVC Help
Powered by ViewVC 1.1.5