/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 366 by ph10, Mon Jul 14 15:45:32 2008 UTC revision 444 by ph10, Sun Sep 13 16:26:39 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 322  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327    const uschar *Xmstart;    USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 333  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
# Line 360  typedef struct heapframe { Line 362  typedef struct heapframe {
362    uschar Xocchars[8];    uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    unsigned int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
# Line 395  typedef struct heapframe { Line 398  typedef struct heapframe {
398    
399  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
400  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
401  same response.  same response. */
402    
403    /* These macros pack up tests that are used for partial matching, and which
404    appears several times in the code. We set the "hit end" flag if the pointer is
405    at the end of the subject and also past the start of the subject (i.e.
406    something has been matched). For hard partial matching, we then return
407    immediately. The second one is used when we already know we are past the end of
408    the subject. */
409    
410    #define CHECK_PARTIAL()\
411      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
412        {\
413        md->hitend = TRUE;\
414        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415        }
416    
417  Performance note: It might be tempting to extract commonly used fields from the  #define SCHECK_PARTIAL()\
418  md structure (e.g. utf8, end_subject) into individual variables to improve    if (md->partial && eptr > mstart)\
419        {\
420        md->hitend = TRUE;\
421        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
422        }
423    
424    
425    /* Performance note: It might be tempting to extract commonly used fields from
426    the md structure (e.g. utf8, end_subject) into individual variables to improve
427  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
428  made performance worse.  made performance worse.
429    
# Line 425  Returns:       MATCH_MATCH if matched Line 450  Returns:       MATCH_MATCH if matched
450  */  */
451    
452  static int  static int
453  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
454    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
455    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
456  {  {
# Line 439  register unsigned int c;   /* Character Line 464  register unsigned int c;   /* Character
464  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
465    
466  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
467    int condcode;
468    
469  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
470  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 481  HEAP_RECURSE: Line 507  HEAP_RECURSE:
507  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
508  #endif  #endif
509  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
510    #define codelink           frame->Xcodelink
511  #define data               frame->Xdata  #define data               frame->Xdata
512  #define next               frame->Xnext  #define next               frame->Xnext
513  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 561  int oclength; Line 588  int oclength;
588  uschar occhars[8];  uschar occhars[8];
589  #endif  #endif
590    
591    int codelink;
592  int ctype;  int ctype;
593  int length;  int length;
594  int max;  int max;
# Line 636  for (;;) Line 664  for (;;)
664    minimize = possessive = FALSE;    minimize = possessive = FALSE;
665    op = *ecode;    op = *ecode;
666    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > mstart)  
     md->hitend = TRUE;  
   
667    switch(op)    switch(op)
668      {      {
669      case OP_FAIL:      case OP_FAIL:
# Line 787  for (;;) Line 807  for (;;)
807    
808      case OP_COND:      case OP_COND:
809      case OP_SCOND:      case OP_SCOND:
810      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
811    
812        /* Because of the way auto-callout works during compile, a callout item is
813        inserted between OP_COND and an assertion condition. */
814    
815        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
816          {
817          if (pcre_callout != NULL)
818            {
819            pcre_callout_block cb;
820            cb.version          = 1;   /* Version 1 of the callout block */
821            cb.callout_number   = ecode[LINK_SIZE+2];
822            cb.offset_vector    = md->offset_vector;
823            cb.subject          = (PCRE_SPTR)md->start_subject;
824            cb.subject_length   = md->end_subject - md->start_subject;
825            cb.start_match      = mstart - md->start_subject;
826            cb.current_position = eptr - md->start_subject;
827            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
828            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
829            cb.capture_top      = offset_top/2;
830            cb.capture_last     = md->capture_last;
831            cb.callout_data     = md->callout_data;
832            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
833            if (rrc < 0) RRETURN(rrc);
834            }
835          ecode += _pcre_OP_lengths[OP_CALLOUT];
836          }
837    
838        condcode = ecode[LINK_SIZE+1];
839    
840        /* Now see what the actual condition is */
841    
842        if (condcode == OP_RREF)         /* Recursion test */
843        {        {
844        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
845        condition = md->recursive != NULL &&        condition = md->recursive != NULL &&
# Line 795  for (;;) Line 847  for (;;)
847        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
848        }        }
849    
850      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF)    /* Group used test */
851        {        {
852        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
853        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
854        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
855        }        }
856    
857      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
858        {        {
859        condition = FALSE;        condition = FALSE;
860        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 829  for (;;) Line 881  for (;;)
881        else        else
882          {          {
883          condition = FALSE;          condition = FALSE;
884          ecode += GET(ecode, 1);          ecode += codelink;
885          }          }
886        }        }
887    
# Line 852  for (;;) Line 904  for (;;)
904          goto TAIL_RECURSE;          goto TAIL_RECURSE;
905          }          }
906        }        }
907      else                         /* Condition false & no 2nd alternative */      else                         /* Condition false & no alternative */
908        {        {
909        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
910        }        }
# Line 878  for (;;) Line 930  for (;;)
930        break;        break;
931        }        }
932    
933      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
934      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
935        the subject. In both cases, backtracking will then try other alternatives,
936        if any. */
937    
938        if (eptr == mstart &&
939            (md->notempty ||
940              (md->notempty_atstart &&
941                mstart == md->start_subject + md->start_offset)))
942          RRETURN(MATCH_NOMATCH);
943    
944        /* Otherwise, we have a match. */
945    
     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);  
946      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
947      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
948      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
# Line 972  for (;;) Line 1033  for (;;)
1033        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1034        }        }
1035    
1036      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1037    
1038        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1039      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1040      break;      break;
1041    
# Line 1075  for (;;) Line 1137  for (;;)
1137          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1138            {            {
1139            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1140              if (new_recursive.offset_save != stacksave)
1141                (pcre_free)(new_recursive.offset_save);
1142            RRETURN(rrc);            RRETURN(rrc);
1143            }            }
1144    
# Line 1414  for (;;) Line 1478  for (;;)
1478    
1479        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1480        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1481        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1482          partial matching. */
1483    
1484  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1485        if (utf8)        if (utf8)
1486          {          {
1487          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1488            {            {
1489            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1490            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1491              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1492            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1493            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1494            }            }
1495          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1496              {
1497              SCHECK_PARTIAL();
1498              cur_is_word = FALSE;
1499              }
1500            else
1501            {            {
1502            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1503            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1435  for (;;) Line 1506  for (;;)
1506        else        else
1507  #endif  #endif
1508    
1509        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1510    
1511          {          {
1512          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1513            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1514          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1515            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1516              }
1517            if (eptr >= md->end_subject)
1518              {
1519              SCHECK_PARTIAL();
1520              cur_is_word = FALSE;
1521              }
1522            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1523          }          }
1524    
1525        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 1459  for (;;) Line 1537  for (;;)
1537      /* Fall through */      /* Fall through */
1538    
1539      case OP_ALLANY:      case OP_ALLANY:
1540      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1541          {
1542          SCHECK_PARTIAL();
1543          RRETURN(MATCH_NOMATCH);
1544          }
1545      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1546      ecode++;      ecode++;
1547      break;      break;
# Line 1468  for (;;) Line 1550  for (;;)
1550      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1551    
1552      case OP_ANYBYTE:      case OP_ANYBYTE:
1553      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1554          {
1555          SCHECK_PARTIAL();
1556          RRETURN(MATCH_NOMATCH);
1557          }
1558      ecode++;      ecode++;
1559      break;      break;
1560    
1561      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1562      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1563          {
1564          SCHECK_PARTIAL();
1565          RRETURN(MATCH_NOMATCH);
1566          }
1567      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1568      if (      if (
1569  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1486  for (;;) Line 1576  for (;;)
1576      break;      break;
1577    
1578      case OP_DIGIT:      case OP_DIGIT:
1579      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1580          {
1581          SCHECK_PARTIAL();
1582          RRETURN(MATCH_NOMATCH);
1583          }
1584      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1585      if (      if (
1586  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1499  for (;;) Line 1593  for (;;)
1593      break;      break;
1594    
1595      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1596      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1597          {
1598          SCHECK_PARTIAL();
1599          RRETURN(MATCH_NOMATCH);
1600          }
1601      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1602      if (      if (
1603  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1512  for (;;) Line 1610  for (;;)
1610      break;      break;
1611    
1612      case OP_WHITESPACE:      case OP_WHITESPACE:
1613      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1614          {
1615          SCHECK_PARTIAL();
1616          RRETURN(MATCH_NOMATCH);
1617          }
1618      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1619      if (      if (
1620  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1525  for (;;) Line 1627  for (;;)
1627      break;      break;
1628    
1629      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1630      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1631          {
1632          SCHECK_PARTIAL();
1633          RRETURN(MATCH_NOMATCH);
1634          }
1635      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1636      if (      if (
1637  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1538  for (;;) Line 1644  for (;;)
1644      break;      break;
1645    
1646      case OP_WORDCHAR:      case OP_WORDCHAR:
1647      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1648          {
1649          SCHECK_PARTIAL();
1650          RRETURN(MATCH_NOMATCH);
1651          }
1652      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1653      if (      if (
1654  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1551  for (;;) Line 1661  for (;;)
1661      break;      break;
1662    
1663      case OP_ANYNL:      case OP_ANYNL:
1664      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1665          {
1666          SCHECK_PARTIAL();
1667          RRETURN(MATCH_NOMATCH);
1668          }
1669      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1670      switch(c)      switch(c)
1671        {        {
# Line 1575  for (;;) Line 1689  for (;;)
1689      break;      break;
1690    
1691      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
1692      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1693          {
1694          SCHECK_PARTIAL();
1695          RRETURN(MATCH_NOMATCH);
1696          }
1697      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1698      switch(c)      switch(c)
1699        {        {
# Line 1605  for (;;) Line 1723  for (;;)
1723      break;      break;
1724    
1725      case OP_HSPACE:      case OP_HSPACE:
1726      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1727          {
1728          SCHECK_PARTIAL();
1729          RRETURN(MATCH_NOMATCH);
1730          }
1731      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1732      switch(c)      switch(c)
1733        {        {
# Line 1635  for (;;) Line 1757  for (;;)
1757      break;      break;
1758    
1759      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
1760      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1761          {
1762          SCHECK_PARTIAL();
1763          RRETURN(MATCH_NOMATCH);
1764          }
1765      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1766      switch(c)      switch(c)
1767        {        {
# Line 1653  for (;;) Line 1779  for (;;)
1779      break;      break;
1780    
1781      case OP_VSPACE:      case OP_VSPACE:
1782      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1783          {
1784          SCHECK_PARTIAL();
1785          RRETURN(MATCH_NOMATCH);
1786          }
1787      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1788      switch(c)      switch(c)
1789        {        {
# Line 1676  for (;;) Line 1806  for (;;)
1806    
1807      case OP_PROP:      case OP_PROP:
1808      case OP_NOTPROP:      case OP_NOTPROP:
1809      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1810          {
1811          SCHECK_PARTIAL();
1812          RRETURN(MATCH_NOMATCH);
1813          }
1814      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1815        {        {
1816        const ucd_record * prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
1817    
1818        switch(ecode[1])        switch(ecode[1])
1819          {          {
# Line 1721  for (;;) Line 1855  for (;;)
1855      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
1856    
1857      case OP_EXTUNI:      case OP_EXTUNI:
1858      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1859          {
1860          SCHECK_PARTIAL();
1861          RRETURN(MATCH_NOMATCH);
1862          }
1863      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1864        {        {
1865        int category = UCD_CATEGORY(c);        int category = UCD_CATEGORY(c);
# Line 1801  for (;;) Line 1939  for (;;)
1939          break;          break;
1940    
1941          default:               /* No repeat follows */          default:               /* No repeat follows */
1942          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1943              {
1944              CHECK_PARTIAL();
1945              RRETURN(MATCH_NOMATCH);
1946              }
1947          eptr += length;          eptr += length;
1948          continue;              /* With the main loop */          continue;              /* With the main loop */
1949          }          }
# Line 1817  for (;;) Line 1959  for (;;)
1959    
1960        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
1961          {          {
1962          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1963              {
1964              CHECK_PARTIAL();
1965              RRETURN(MATCH_NOMATCH);
1966              }
1967          eptr += length;          eptr += length;
1968          }          }
1969    
# Line 1834  for (;;) Line 1980  for (;;)
1980            {            {
1981            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1982            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1983            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) RRETURN(MATCH_NOMATCH);
1984              if (!match_ref(offset, eptr, length, md, ims))
1985                {
1986                CHECK_PARTIAL();
1987              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1988                }
1989            eptr += length;            eptr += length;
1990            }            }
1991          /* Control never gets here */          /* Control never gets here */
# Line 1862  for (;;) Line 2012  for (;;)
2012        }        }
2013      /* Control never gets here */      /* Control never gets here */
2014    
   
   
2015      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2016      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2017      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1918  for (;;) Line 2066  for (;;)
2066          {          {
2067          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2068            {            {
2069            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2070                {
2071                SCHECK_PARTIAL();
2072                RRETURN(MATCH_NOMATCH);
2073                }
2074            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2075            if (c > 255)            if (c > 255)
2076              {              {
# Line 1936  for (;;) Line 2088  for (;;)
2088          {          {
2089          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2090            {            {
2091            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2092                {
2093                SCHECK_PARTIAL();
2094                RRETURN(MATCH_NOMATCH);
2095                }
2096            c = *eptr++;            c = *eptr++;
2097            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2098            }            }
# Line 1960  for (;;) Line 2116  for (;;)
2116              {              {
2117              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2118              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2119              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2120                if (eptr >= md->end_subject)
2121                  {
2122                  SCHECK_PARTIAL();
2123                  RRETURN(MATCH_NOMATCH);
2124                  }
2125              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2126              if (c > 255)              if (c > 255)
2127                {                {
# Line 1980  for (;;) Line 2141  for (;;)
2141              {              {
2142              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2143              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2144              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2145                if (eptr >= md->end_subject)
2146                  {
2147                  SCHECK_PARTIAL();
2148                  RRETURN(MATCH_NOMATCH);
2149                  }
2150              c = *eptr++;              c = *eptr++;
2151              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2152              }              }
# Line 2047  for (;;) Line 2213  for (;;)
2213    
2214    
2215      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2216      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2217        mode, because Unicode properties are supported in non-UTF-8 mode. */
2218    
2219  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2220      case OP_XCLASS:      case OP_XCLASS:
# Line 2088  for (;;) Line 2255  for (;;)
2255    
2256        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2257          {          {
2258          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2259          GETCHARINC(c, eptr);            {
2260              SCHECK_PARTIAL();
2261              RRETURN(MATCH_NOMATCH);
2262              }
2263            GETCHARINCTEST(c, eptr);
2264          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2265          }          }
2266    
# Line 2107  for (;;) Line 2278  for (;;)
2278            {            {
2279            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2280            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2281            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
2282            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2283                {
2284                SCHECK_PARTIAL();
2285                RRETURN(MATCH_NOMATCH);
2286                }
2287              GETCHARINCTEST(c, eptr);
2288            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2289            }            }
2290          /* Control never gets here */          /* Control never gets here */
# Line 2123  for (;;) Line 2299  for (;;)
2299            {            {
2300            int len = 1;            int len = 1;
2301            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2302            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2303            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2304            eptr += len;            eptr += len;
2305            }            }
# Line 2150  for (;;) Line 2326  for (;;)
2326        length = 1;        length = 1;
2327        ecode++;        ecode++;
2328        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2329        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2330            {
2331            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2332            RRETURN(MATCH_NOMATCH);
2333            }
2334        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2335        }        }
2336      else      else
# Line 2158  for (;;) Line 2338  for (;;)
2338    
2339      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2340        {        {
2341        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2342            {
2343            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2344            RRETURN(MATCH_NOMATCH);
2345            }
2346        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2347        ecode += 2;        ecode += 2;
2348        }        }
# Line 2174  for (;;) Line 2358  for (;;)
2358        ecode++;        ecode++;
2359        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2360    
2361        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2362            {
2363            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2364            RRETURN(MATCH_NOMATCH);
2365            }
2366    
2367        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2368        can use the fast lookup table. */        can use the fast lookup table. */
# Line 2209  for (;;) Line 2397  for (;;)
2397    
2398      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2399        {        {
2400        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2401            {
2402            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2403            RRETURN(MATCH_NOMATCH);
2404            }
2405        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2406        ecode += 2;        ecode += 2;
2407        }        }
# Line 2263  for (;;) Line 2455  for (;;)
2455      case OP_MINQUERY:      case OP_MINQUERY:
2456      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2457      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2458    
2459      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2460      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2461      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2462    
2463      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2464    
2465      REPEATCHAR:      REPEATCHAR:
2466  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2278  for (;;) Line 2469  for (;;)
2469        length = 1;        length = 1;
2470        charptr = ecode;        charptr = ecode;
2471        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2472        ecode += length;        ecode += length;
2473    
2474        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2296  for (;;) Line 2486  for (;;)
2486    
2487          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2488            {            {
2489            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2490                memcmp(eptr, charptr, length) == 0) eptr += length;
2491  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2492            /* Need braces because of following else */            else if (oclength > 0 &&
2493            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2494                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2495    #endif  /* SUPPORT_UCP */
2496            else            else
2497              {              {
2498              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2499              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2500              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2501            }            }
2502    
2503          if (min == max) continue;          if (min == max) continue;
# Line 2318  for (;;) Line 2508  for (;;)
2508              {              {
2509              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2510              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2511              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2512              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2513                  memcmp(eptr, charptr, length) == 0) eptr += length;
2514  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2515              /* Need braces because of following else */              else if (oclength > 0 &&
2516              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2517                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2518    #endif  /* SUPPORT_UCP */
2519              else              else
2520                {                {
2521                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2522                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2523                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2524              }              }
2525            /* Control never gets here */            /* Control never gets here */
2526            }            }
# Line 2340  for (;;) Line 2530  for (;;)
2530            pp = eptr;            pp = eptr;
2531            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2532              {              {
2533              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2534              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2535  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2536              else if (oclength == 0) break;              else if (oclength > 0 &&
2537              else                       eptr <= md->end_subject - oclength &&
2538                {                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
               if (memcmp(eptr, occhars, oclength) != 0) break;  
               eptr += oclength;  
               }  
 #else   /* without SUPPORT_UCP */  
             else break;  
2539  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2540                else break;
2541              }              }
2542    
2543            if (possessive) continue;            if (possessive) continue;
2544    
2545            for(;;)            for(;;)
2546             {              {
2547             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2548             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2549             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2550  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2551             eptr--;              eptr--;
2552             BACKCHAR(eptr);              BACKCHAR(eptr);
2553  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2554             eptr -= length;              eptr -= length;
2555  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2556             }              }
2557            }            }
2558          /* Control never gets here */          /* Control never gets here */
2559          }          }
# Line 2379  for (;;) Line 2566  for (;;)
2566  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2567    
2568      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2569        {  
2570        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2571    
2572      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2573      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2400  for (;;) Line 2585  for (;;)
2585        {        {
2586        fc = md->lcc[fc];        fc = md->lcc[fc];
2587        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2588            {
2589            if (eptr >= md->end_subject)
2590              {
2591              SCHECK_PARTIAL();
2592              RRETURN(MATCH_NOMATCH);
2593              }
2594          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2595            }
2596        if (min == max) continue;        if (min == max) continue;
2597        if (minimize)        if (minimize)
2598          {          {
# Line 2408  for (;;) Line 2600  for (;;)
2600            {            {
2601            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2602            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2603            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
2604                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2605                {
2606                SCHECK_PARTIAL();
2607              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2608                }
2609              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2610            }            }
2611          /* Control never gets here */          /* Control never gets here */
2612          }          }
# Line 2422  for (;;) Line 2618  for (;;)
2618            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2619            eptr++;            eptr++;
2620            }            }
2621    
2622          if (possessive) continue;          if (possessive) continue;
2623    
2624          while (eptr >= pp)          while (eptr >= pp)
2625            {            {
2626            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
# Line 2438  for (;;) Line 2636  for (;;)
2636    
2637      else      else
2638        {        {
2639        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2640            {
2641            if (eptr >= md->end_subject)
2642              {
2643              SCHECK_PARTIAL();
2644              RRETURN(MATCH_NOMATCH);
2645              }
2646            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2647            }
2648    
2649        if (min == max) continue;        if (min == max) continue;
2650    
2651        if (minimize)        if (minimize)
2652          {          {
2653          for (fi = min;; fi++)          for (fi = min;; fi++)
2654            {            {
2655            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2656            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2657            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) RRETURN(MATCH_NOMATCH);
2658              if (eptr >= md->end_subject)
2659                {
2660                SCHECK_PARTIAL();
2661              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2662                }
2663              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2664            }            }
2665          /* Control never gets here */          /* Control never gets here */
2666          }          }
# Line 2460  for (;;) Line 2673  for (;;)
2673            eptr++;            eptr++;
2674            }            }
2675          if (possessive) continue;          if (possessive) continue;
2676    
2677          while (eptr >= pp)          while (eptr >= pp)
2678            {            {
2679            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
# Line 2475  for (;;) Line 2689  for (;;)
2689      checking can be multibyte. */      checking can be multibyte. */
2690    
2691      case OP_NOT:      case OP_NOT:
2692      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2693          {
2694          SCHECK_PARTIAL();
2695          RRETURN(MATCH_NOMATCH);
2696          }
2697      ecode++;      ecode++;
2698      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2699      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2552  for (;;) Line 2770  for (;;)
2770      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2771      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2772    
2773      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2774    
2775      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2776      fc = *ecode++;      fc = *ecode++;
2777    
2778      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2582  for (;;) Line 2797  for (;;)
2797          register unsigned int d;          register unsigned int d;
2798          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2799            {            {
2800              if (eptr >= md->end_subject)
2801                {
2802                SCHECK_PARTIAL();
2803                RRETURN(MATCH_NOMATCH);
2804                }
2805            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2806            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
2807            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2593  for (;;) Line 2813  for (;;)
2813        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2814          {          {
2815          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2816              {
2817              if (eptr >= md->end_subject)
2818                {
2819                SCHECK_PARTIAL();
2820                RRETURN(MATCH_NOMATCH);
2821                }
2822            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2823              }
2824          }          }
2825    
2826        if (min == max) continue;        if (min == max) continue;
# Line 2609  for (;;) Line 2836  for (;;)
2836              {              {
2837              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2838              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2839              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2840                if (eptr >= md->end_subject)
2841                  {
2842                  SCHECK_PARTIAL();
2843                  RRETURN(MATCH_NOMATCH);
2844                  }
2845              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2846              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2847              if (fc == d) RRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
   
2848              }              }
2849            }            }
2850          else          else
# Line 2624  for (;;) Line 2855  for (;;)
2855              {              {
2856              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2857              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2858              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) RRETURN(MATCH_NOMATCH);
2859                if (eptr >= md->end_subject)
2860                  {
2861                  SCHECK_PARTIAL();
2862                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2863                  }
2864                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2865              }              }
2866            }            }
2867          /* Control never gets here */          /* Control never gets here */
# Line 2694  for (;;) Line 2930  for (;;)
2930          register unsigned int d;          register unsigned int d;
2931          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2932            {            {
2933              if (eptr >= md->end_subject)
2934                {
2935                SCHECK_PARTIAL();
2936                RRETURN(MATCH_NOMATCH);
2937                }
2938            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2939            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
2940            }            }
# Line 2703  for (;;) Line 2944  for (;;)
2944        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2945          {          {
2946          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2947              {
2948              if (eptr >= md->end_subject)
2949                {
2950                SCHECK_PARTIAL();
2951                RRETURN(MATCH_NOMATCH);
2952                }
2953            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2954              }
2955          }          }
2956    
2957        if (min == max) continue;        if (min == max) continue;
# Line 2719  for (;;) Line 2967  for (;;)
2967              {              {
2968              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2969              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2970              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2971                if (eptr >= md->end_subject)
2972                  {
2973                  SCHECK_PARTIAL();
2974                  RRETURN(MATCH_NOMATCH);
2975                  }
2976              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2977              if (fc == d) RRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
2978              }              }
2979            }            }
2980          else          else
# Line 2732  for (;;) Line 2985  for (;;)
2985              {              {
2986              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2987              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2988              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) RRETURN(MATCH_NOMATCH);
2989                if (eptr >= md->end_subject)
2990                  {
2991                  SCHECK_PARTIAL();
2992                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2993                  }
2994                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2995              }              }
2996            }            }
2997          /* Control never gets here */          /* Control never gets here */
# Line 2867  for (;;) Line 3125  for (;;)
3125    
3126      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3127      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3128      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3129      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3130      and single-bytes. */      and single-bytes. */
3131    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3132      if (min > 0)      if (min > 0)
3133        {        {
3134  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2885  for (;;) Line 3140  for (;;)
3140            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3141            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3142              {              {
3143              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3144                  {
3145                  SCHECK_PARTIAL();
3146                  RRETURN(MATCH_NOMATCH);
3147                  }
3148              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3149              }              }
3150            break;            break;
# Line 2893  for (;;) Line 3152  for (;;)
3152            case PT_LAMP:            case PT_LAMP:
3153            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3154              {              {
3155              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3156                  {
3157                  SCHECK_PARTIAL();
3158                  RRETURN(MATCH_NOMATCH);
3159                  }
3160              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3161              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3162              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
# Line 2906  for (;;) Line 3169  for (;;)
3169            case PT_GC:            case PT_GC:
3170            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3171              {              {
3172              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3173                  {
3174                  SCHECK_PARTIAL();
3175                  RRETURN(MATCH_NOMATCH);
3176                  }
3177              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3178              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3179              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
# Line 2917  for (;;) Line 3184  for (;;)
3184            case PT_PC:            case PT_PC:
3185            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3186              {              {
3187              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3188                  {
3189                  SCHECK_PARTIAL();
3190                  RRETURN(MATCH_NOMATCH);
3191                  }
3192              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3193              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3194              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
# Line 2928  for (;;) Line 3199  for (;;)
3199            case PT_SC:            case PT_SC:
3200            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3201              {              {
3202              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3203                  {
3204                  SCHECK_PARTIAL();
3205                  RRETURN(MATCH_NOMATCH);
3206                  }
3207              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3208              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
3209              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
# Line 2948  for (;;) Line 3223  for (;;)
3223          {          {
3224          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3225            {            {
3226              if (eptr >= md->end_subject)
3227                {
3228                SCHECK_PARTIAL();
3229                RRETURN(MATCH_NOMATCH);
3230                }
3231            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3232            prop_category = UCD_CATEGORY(c);            prop_category = UCD_CATEGORY(c);
3233            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3234            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3235              {              {
3236              int len = 1;              int len = 1;
3237              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3238                {                else { GETCHARLEN(c, eptr, len); }
               GETCHARLEN(c, eptr, len);  
               }  
3239              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3240              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3241              eptr += len;              eptr += len;
# Line 2976  for (;;) Line 3254  for (;;)
3254          case OP_ANY:          case OP_ANY:
3255          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3256            {            {
3257            if (eptr >= md->end_subject || IS_NEWLINE(eptr))            if (eptr >= md->end_subject)
3258                {
3259                SCHECK_PARTIAL();
3260              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3261                }
3262              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3263            eptr++;            eptr++;
3264            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3265            }            }
# Line 2986  for (;;) Line 3268  for (;;)
3268          case OP_ALLANY:          case OP_ALLANY:
3269          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3270            {            {
3271            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3272                {
3273                SCHECK_PARTIAL();
3274                RRETURN(MATCH_NOMATCH);
3275                }
3276            eptr++;            eptr++;
3277            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3278            }            }
3279          break;          break;
3280    
3281          case OP_ANYBYTE:          case OP_ANYBYTE:
3282            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3283          eptr += min;          eptr += min;
3284          break;          break;
3285    
3286          case OP_ANYNL:          case OP_ANYNL:
3287          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3288            {            {
3289            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3290                {
3291                SCHECK_PARTIAL();
3292                RRETURN(MATCH_NOMATCH);
3293                }
3294            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3295            switch(c)            switch(c)
3296              {              {
# Line 3025  for (;;) Line 3316  for (;;)
3316          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3317          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3318            {            {
3319            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3320                {
3321                SCHECK_PARTIAL();
3322                RRETURN(MATCH_NOMATCH);
3323                }
3324            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3325            switch(c)            switch(c)
3326              {              {
# Line 3057  for (;;) Line 3352  for (;;)
3352          case OP_HSPACE:          case OP_HSPACE:
3353          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3354            {            {
3355            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3356                {
3357                SCHECK_PARTIAL();
3358                RRETURN(MATCH_NOMATCH);
3359                }
3360            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3361            switch(c)            switch(c)
3362              {              {
# Line 3089  for (;;) Line 3388  for (;;)
3388          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3389          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3390            {            {
3391            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3392                {
3393                SCHECK_PARTIAL();
3394                RRETURN(MATCH_NOMATCH);
3395                }
3396            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3397            switch(c)            switch(c)
3398              {              {
# Line 3109  for (;;) Line 3412  for (;;)
3412          case OP_VSPACE:          case OP_VSPACE:
3413          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3414            {            {
3415            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3416                {
3417                SCHECK_PARTIAL();
3418                RRETURN(MATCH_NOMATCH);
3419                }
3420            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3421            switch(c)            switch(c)
3422              {              {
# Line 3129  for (;;) Line 3436  for (;;)
3436          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3437          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3438            {            {
3439            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3440                {
3441                SCHECK_PARTIAL();
3442                RRETURN(MATCH_NOMATCH);
3443                }
3444            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3445            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3446              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 3139  for (;;) Line 3450  for (;;)
3450          case OP_DIGIT:          case OP_DIGIT:
3451          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3452            {            {
3453            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3454               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3455                SCHECK_PARTIAL();
3456                RRETURN(MATCH_NOMATCH);
3457                }
3458              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3459              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3460            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3461            }            }
# Line 3149  for (;;) Line 3464  for (;;)
3464          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3465          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3466            {            {
3467            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3468               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))              {
3469                SCHECK_PARTIAL();
3470                RRETURN(MATCH_NOMATCH);
3471                }
3472              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3473              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3474            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3475            }            }
# Line 3159  for (;;) Line 3478  for (;;)
3478          case OP_WHITESPACE:          case OP_WHITESPACE:
3479          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3480            {            {
3481            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3482               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3483                SCHECK_PARTIAL();
3484                RRETURN(MATCH_NOMATCH);
3485                }
3486              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3487              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3488            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3489            }            }
# Line 3179  for (;;) Line 3502  for (;;)
3502          case OP_WORDCHAR:          case OP_WORDCHAR:
3503          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3504            {            {
3505            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3506               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3507                SCHECK_PARTIAL();
3508                RRETURN(MATCH_NOMATCH);
3509                }
3510              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3511              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3512            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3513            }            }
# Line 3194  for (;;) Line 3521  for (;;)
3521  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3522    
3523        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3524        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3525    
3526        switch(ctype)        switch(ctype)
3527          {          {
3528          case OP_ANY:          case OP_ANY:
3529          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3530            {            {
3531              if (eptr >= md->end_subject)
3532                {
3533                SCHECK_PARTIAL();
3534                RRETURN(MATCH_NOMATCH);
3535                }
3536            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3537            eptr++;            eptr++;
3538            }            }
3539          break;          break;
3540    
3541          case OP_ALLANY:          case OP_ALLANY:
3542            if (eptr > md->end_subject - min)
3543              {
3544              SCHECK_PARTIAL();
3545              RRETURN(MATCH_NOMATCH);
3546              }
3547          eptr += min;          eptr += min;
3548          break;          break;
3549    
3550          case OP_ANYBYTE:          case OP_ANYBYTE:
3551            if (eptr > md->end_subject - min)
3552              {
3553              SCHECK_PARTIAL();
3554              RRETURN(MATCH_NOMATCH);
3555              }
3556          eptr += min;          eptr += min;
3557          break;          break;
3558    
         /* Because of the CRLF case, we can't assume the minimum number of  
         bytes are present in this case. */  
   
3559          case OP_ANYNL:          case OP_ANYNL:
3560          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3561            {            {
3562            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3563                {
3564                SCHECK_PARTIAL();
3565                RRETURN(MATCH_NOMATCH);
3566                }
3567            switch(*eptr++)            switch(*eptr++)
3568              {              {
3569              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3243  for (;;) Line 3585  for (;;)
3585          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3586          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3587            {            {
3588            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3589                {
3590                SCHECK_PARTIAL();
3591                RRETURN(MATCH_NOMATCH);
3592                }
3593            switch(*eptr++)            switch(*eptr++)
3594              {              {
3595              default: break;              default: break;
# Line 3258  for (;;) Line 3604  for (;;)
3604          case OP_HSPACE:          case OP_HSPACE:
3605          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3606            {            {
3607            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3608                {
3609                SCHECK_PARTIAL();
3610                RRETURN(MATCH_NOMATCH);
3611                }
3612            switch(*eptr++)            switch(*eptr++)
3613              {              {
3614              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3273  for (;;) Line 3623  for (;;)
3623          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3624          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3625            {            {
3626            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3627                {
3628                SCHECK_PARTIAL();
3629                RRETURN(MATCH_NOMATCH);
3630                }
3631            switch(*eptr++)            switch(*eptr++)
3632              {              {
3633              default: break;              default: break;
# Line 3290  for (;;) Line 3644  for (;;)
3644          case OP_VSPACE:          case OP_VSPACE:
3645          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3646            {            {
3647            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3648                {
3649                SCHECK_PARTIAL();
3650                RRETURN(MATCH_NOMATCH);
3651                }
3652            switch(*eptr++)            switch(*eptr++)
3653              {              {
3654              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3306  for (;;) Line 3664  for (;;)
3664    
3665          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3666          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3667              {
3668              if (eptr >= md->end_subject)
3669                {
3670                SCHECK_PARTIAL();
3671                RRETURN(MATCH_NOMATCH);
3672                }
3673            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3674              }
3675          break;          break;
3676    
3677          case OP_DIGIT:          case OP_DIGIT:
3678          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3679              {
3680              if (eptr >= md->end_subject)
3681                {
3682                SCHECK_PARTIAL();
3683                RRETURN(MATCH_NOMATCH);
3684                }
3685            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3686              }
3687          break;          break;
3688    
3689          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3690          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3691              {
3692              if (eptr >= md->end_subject)
3693                {
3694                SCHECK_PARTIAL();
3695                RRETURN(MATCH_NOMATCH);
3696                }
3697            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3698              }
3699          break;          break;
3700    
3701          case OP_WHITESPACE:          case OP_WHITESPACE:
3702          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3703              {
3704              if (eptr >= md->end_subject)
3705                {
3706                SCHECK_PARTIAL();
3707                RRETURN(MATCH_NOMATCH);
3708                }
3709            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3710              }
3711          break;          break;
3712    
3713          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3714          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3715              {
3716              if (eptr >= md->end_subject)
3717                {
3718                SCHECK_PARTIAL();
3719                RRETURN(MATCH_NOMATCH);
3720                }
3721            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3722              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3723              }
3724          break;          break;
3725    
3726          case OP_WORDCHAR:          case OP_WORDCHAR:
3727          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3728              {
3729              if (eptr >= md->end_subject)
3730                {
3731                SCHECK_PARTIAL();
3732                RRETURN(MATCH_NOMATCH);
3733                }
3734            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3735              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3736              }
3737          break;          break;
3738    
3739          default:          default:
# Line 3361  for (;;) Line 3761  for (;;)
3761              {              {
3762              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3763              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3764              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3765                if (eptr >= md->end_subject)
3766                  {
3767                  SCHECK_PARTIAL();
3768                  RRETURN(MATCH_NOMATCH);
3769                  }
3770              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3771              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3772              }              }
# Line 3372  for (;;) Line 3777  for (;;)
3777              {              {
3778              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3779              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3780              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3781                if (eptr >= md->end_subject)
3782                  {
3783                  SCHECK_PARTIAL();
3784                  RRETURN(MATCH_NOMATCH);
3785                  }
3786              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3787              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3788              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
# Line 3387  for (;;) Line 3797  for (;;)
3797              {              {
3798              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3799              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3800              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3801                if (eptr >= md->end_subject)
3802                  {
3803                  SCHECK_PARTIAL();
3804                  RRETURN(MATCH_NOMATCH);
3805                  }
3806              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3807              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3808              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
# Line 3400  for (;;) Line 3815  for (;;)
3815              {              {
3816              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3817              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3818              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3819                if (eptr >= md->end_subject)
3820                  {
3821                  SCHECK_PARTIAL();
3822                  RRETURN(MATCH_NOMATCH);
3823                  }
3824              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3825              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3826              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
# Line 3413  for (;;) Line 3833  for (;;)
3833              {              {
3834              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3835              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3836              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3837                if (eptr >= md->end_subject)
3838                  {
3839                  SCHECK_PARTIAL();
3840                  RRETURN(MATCH_NOMATCH);
3841                  }
3842              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3843              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
3844              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
# Line 3435  for (;;) Line 3860  for (;;)
3860            {            {
3861            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3862            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3863            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3864              if (eptr >= md->end_subject)
3865                {
3866                SCHECK_PARTIAL();
3867                RRETURN(MATCH_NOMATCH);
3868                }
3869            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3870            prop_category = UCD_CATEGORY(c);            prop_category = UCD_CATEGORY(c);
3871            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3872            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3873              {              {
3874              int len = 1;              int len = 1;
3875              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3876                {                else { GETCHARLEN(c, eptr, len); }
               GETCHARLEN(c, eptr, len);  
               }  
3877              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3878              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3879              eptr += len;              eptr += len;
# Line 3464  for (;;) Line 3892  for (;;)
3892            {            {
3893            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3894            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3895            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
3896                 (ctype == OP_ANY && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
3897                {
3898                SCHECK_PARTIAL();
3899                RRETURN(MATCH_NOMATCH);
3900                }
3901              if (ctype == OP_ANY && IS_NEWLINE(eptr))
3902              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
3903            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3904            switch(ctype)            switch(ctype)
3905              {              {
# Line 3623  for (;;) Line 4055  for (;;)
4055            {            {
4056            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4057            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4058            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
4059                 (ctype == OP_ANY && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
4060                {
4061                SCHECK_PARTIAL();
4062                RRETURN(MATCH_NOMATCH);
4063                }
4064              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4065              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
4066            c = *eptr++;            c = *eptr++;
4067            switch(ctype)            switch(ctype)
4068              {              {
# Line 4409  const uschar *tables; Line 4845  const uschar *tables;
4845  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4846  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4847  USPTR end_subject;  USPTR end_subject;
4848    USPTR start_partial = NULL;
4849  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
4850    
4851  pcre_study_data internal_study;  pcre_study_data internal_study;
# Line 4492  md->jscript_compat = (re->options & PCRE Line 4929  md->jscript_compat = (re->options & PCRE
4929  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4930  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4931  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4932  md->partial = (options & PCRE_PARTIAL) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
4933    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
4934                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
4935  md->hitend = FALSE;  md->hitend = FALSE;
4936    
4937  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
# Line 4533  switch ((((options & PCRE_NEWLINE_BITS) Line 4972  switch ((((options & PCRE_NEWLINE_BITS)
4972          (pcre_uint32)options) & PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4973    {    {
4974    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4975    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
4976    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
4977    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4978         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
4979    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4980    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4981    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 4566  else Line 5005  else
5005      }      }
5006    }    }
5007    
5008  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching was originally supported only for a restricted set of
5009  moment. */  regexes; from release 8.00 there are no restrictions, but the bits are still
5010    defined (though never set). So there's no harm in leaving this code. */
5011    
5012  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5013    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
# Line 4578  back the character offset. */ Line 5018  back the character offset. */
5018  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5019  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5020    {    {
5021    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
5022      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
5023    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5024      {      {
5025      int tb = ((uschar *)subject)[start_offset];      int tb = ((USPTR)subject)[start_offset];
5026      if (tb > 127)      if (tb > 127)
5027        {        {
5028        tb &= 0xc0;        tb &= 0xc0;
# Line 4688  for(;;) Line 5128  for(;;)
5128      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
5129      }      }
5130    
5131    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* If firstline is TRUE, the start of the match is constrained to the first
5132    start of the match is constrained to the first line of a multiline string.    line of a multiline string. That is, the match must be before or at the first
5133    That is, the match must be before or at the first newline. Implement this by    newline. Implement this by temporarily adjusting end_subject so that we stop
5134    temporarily adjusting end_subject so that we stop scanning at a newline. If    scanning at a newline. If the match fails at the newline, later code breaks
5135    the match fails at the newline, later code breaks this loop. */    this loop. */
5136    
5137    if (firstline)    if (firstline)
5138      {      {
5139      USPTR t = start_match;      USPTR t = start_match;
5140  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5141      if (utf8)      if (utf8)
5142        {        {
5143        while (t < md->end_subject && !IS_NEWLINE(t))        while (t < md->end_subject && !IS_NEWLINE(t))
5144          {          {
5145          t++;          t++;
5146          while (t < end_subject && (*t & 0xc0) == 0x80) t++;          while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5147          }          }
5148        }        }
5149      else      else
5150  #endif  #endif
5151      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5152      end_subject = t;      end_subject = t;
5153      }      }
5154    
5155    /* Now advance to a unique first byte if there is one. */    /* There are some optimizations that avoid running the match if a known
5156      starting point is not found, or if a known later character is not present.
5157      However, there is an option that disables these, for testing and for ensuring
5158      that all callouts do actually occur. */
5159    
5160    if (first_byte >= 0)    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5161      {      {
5162      if (first_byte_caseless)      /* Advance to a unique first byte if there is one. */
       while (start_match < end_subject && md->lcc[*start_match] != first_byte)  
         start_match++;  
     else  
       while (start_match < end_subject && *start_match != first_byte)  
         start_match++;  
     }  
5163    
5164    /* Or to just after a linebreak for a multiline match */      if (first_byte >= 0)
5165          {
5166          if (first_byte_caseless)
5167            while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5168              start_match++;
5169          else
5170            while (start_match < end_subject && *start_match != first_byte)
5171              start_match++;
5172          }
5173    
5174    else if (startline)      /* Or to just after a linebreak for a multiline match */
5175      {  
5176      if (start_match > md->start_subject + start_offset)      else if (startline)
5177        {        {
5178  #ifdef SUPPORT_UTF8        if (start_match > md->start_subject + start_offset)
       if (utf8)  
5179          {          {
5180          while (start_match < end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
5181            if (utf8)
5182            {            {
5183            start_match++;            while (start_match < end_subject && !WAS_NEWLINE(start_match))
5184            while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              {
5185              start_match++;              start_match++;
5186            }              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5187                  start_match++;
5188                }
5189              }
5190            else
5191    #endif
5192            while (start_match < end_subject && !WAS_NEWLINE(start_match))
5193              start_match++;
5194    
5195            /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5196            and we are now at a LF, advance the match position by one more character.
5197            */
5198    
5199            if (start_match[-1] == CHAR_CR &&
5200                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5201                 start_match < end_subject &&
5202                 *start_match == CHAR_NL)
5203              start_match++;
5204          }          }
       else  
 #endif  
       while (start_match < end_subject && !WAS_NEWLINE(start_match))  
         start_match++;  
   
       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,  
       and we are now at a LF, advance the match position by one more character.  
       */  
   
       if (start_match[-1] == '\r' &&  
            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&  
            start_match < end_subject &&  
            *start_match == '\n')  
         start_match++;  
5205        }        }
     }  
5206    
5207    /* Or to a non-unique first byte after study */      /* Or to a non-unique first byte after study */
5208    
5209    else if (start_bits != NULL)      else if (start_bits != NULL)
     {  
     while (start_match < end_subject)  
5210        {        {
5211        register unsigned int c = *start_match;        while (start_match < end_subject)
5212        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          {
5213          else break;          register unsigned int c = *start_match;
5214            if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
5215              else break;
5216            }
5217        }        }
5218      }      }   /* Starting optimizations */
5219    
5220    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5221    
# Line 4779  for(;;) Line 5227  for(;;)
5227    printf("\n");    printf("\n");
5228  #endif  #endif
5229    
5230    /* If req_byte is set, we know that that character must appear in the subject    /* If req_byte is set, we know that that character must appear in the
5231    for the match to succeed. If the first character is set, req_byte must be    subject for the match to succeed. If the first character is set, req_byte
5232    later in the subject; otherwise the test starts at the match point. This    must be later in the subject; otherwise the test starts at the match point.
5233    optimization can save a huge amount of backtracking in patterns with nested    This optimization can save a huge amount of backtracking in patterns with
5234    unlimited repeats that aren't going to match. Writing separate code for    nested unlimited repeats that aren't going to match. Writing separate code
5235    cased/caseless versions makes it go faster, as does using an autoincrement    for cased/caseless versions makes it go faster, as does using an
5236    and backing off on a match.    autoincrement and backing off on a match.
5237    
5238    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end
5239    take a long time, and give bad performance on quite ordinary patterns. This    can take a long time, and give bad performance on quite ordinary patterns.
5240    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte    This showed up when somebody was matching something like /^\d+C/ on a
5241    string... so we don't do this when the string is sufficiently long.    32-megabyte string... so we don't do this when the string is sufficiently
5242      long.
5243    
5244    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, or if
5245    */    disabling is explicitly requested. */
5246    
5247    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
5248          req_byte >= 0 &&
5249        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
5250        !md->partial)        !md->partial)
5251      {      {
# Line 4839  for(;;) Line 5289  for(;;)
5289        }        }
5290      }      }
5291    
5292    /* OK, we can now run the match. */    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5293      first starting point for which a partial match was found. */
5294    
5295    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
5296      md->start_used_ptr = start_match;
5297    md->match_call_count = 0;    md->match_call_count = 0;
5298    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
5299      if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
5300    
5301    switch(rc)    switch(rc)
5302      {      {
# Line 4873  for(;;) Line 5326  for(;;)
5326      rc = MATCH_NOMATCH;      rc = MATCH_NOMATCH;
5327      goto ENDLOOP;      goto ENDLOOP;
5328    
5329      /* Any other return is some kind of error. */      /* Any other return is either a match, or some kind of error. */
5330    
5331      default:      default:
5332      goto ENDLOOP;      goto ENDLOOP;
# Line 4903  for(;;) Line 5356  for(;;)
5356    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
5357    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
5358    
5359    if (start_match[-1] == '\r' &&    if (start_match[-1] == CHAR_CR &&
5360        start_match < end_subject &&        start_match < end_subject &&
5361        *start_match == '\n' &&        *start_match == CHAR_NL &&
5362        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
5363          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
5364           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||
# Line 4979  if (using_temporary_offsets) Line 5432  if (using_temporary_offsets)
5432    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
5433    }    }
5434    
5435  if (rc != MATCH_NOMATCH)  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
5436    {    {
5437    DPRINTF((">>>> error: returning %d\n", rc));    DPRINTF((">>>> error: returning %d\n", rc));
5438    return rc;    return rc;
5439    }    }
5440  else if (md->partial && md->hitend)  else if (start_partial != NULL)
5441    {    {
5442    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
5443      if (offsetcount > 1)
5444        {
5445        offsets[0] = start_partial - (USPTR)subject;
5446        offsets[1] = end_subject - (USPTR)subject;
5447        }
5448    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;
5449    }    }
5450  else  else

Legend:
Removed from v.366  
changed lines
  Added in v.444

  ViewVC Help
Powered by ViewVC 1.1.5