/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 182 by ph10, Wed Jun 13 15:09:54 2007 UTC revision 501 by ph10, Sun Mar 7 11:49:54 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 83  static const char rep_max[] = { 0, 0, 0, Line 89  static const char rep_max[] = { 0, 0, 0,
89    
90    
91    
92  #ifdef DEBUG  #ifdef PCRE_DEBUG
93  /*************************************************  /*************************************************
94  *        Debugging function to print chars       *  *        Debugging function to print chars       *
95  *************************************************/  *************************************************/
# Line 135  match_ref(int offset, register USPTR ept Line 141  match_ref(int offset, register USPTR ept
141  {  {
142  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
143    
144  #ifdef DEBUG  #ifdef PCRE_DEBUG
145  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
146    printf("matching subject <null>");    printf("matching subject <null>");
147  else  else
# Line 152  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 205  variable instead of being passed in the Line 237  variable instead of being passed in the
237  ****************************************************************************  ****************************************************************************
238  ***************************************************************************/  ***************************************************************************/
239    
240    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241  /* Numbers for RMATCH calls */  below must be updated in sync.  */
242    
243  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248           RM51,  RM52, RM53, RM54 };
249    
250  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
251  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
252  actuall used in this definition. */  actually used in this definition. */
253    
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256    
257  #ifdef DEBUG  #ifdef PCRE_DEBUG
258  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259    { \    { \
260    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
261    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
262    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
263    }    }
264  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 236  actuall used in this definition. */ Line 268  actuall used in this definition. */
268    }    }
269  #else  #else
270  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
272  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
273  #endif  #endif
274    
# Line 256  argument of match(), which never changes Line 288  argument of match(), which never changes
288    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
289    newframe->Xecode = rb;\    newframe->Xecode = rb;\
290    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
291      newframe->Xmarkptr = markptr;\
292    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
293    newframe->Xims = re;\    newframe->Xims = re;\
294    newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
# Line 290  typedef struct heapframe { Line 323  typedef struct heapframe {
323    
324    /* Function arguments that may change */    /* Function arguments that may change */
325    
326    const uschar *Xeptr;    USPTR Xeptr;
327    const uschar *Xecode;    const uschar *Xecode;
328    const uschar *Xmstart;    USPTR Xmstart;
329      USPTR Xmarkptr;
330    int Xoffset_top;    int Xoffset_top;
331    long int Xims;    long int Xims;
332    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 301  typedef struct heapframe { Line 335  typedef struct heapframe {
335    
336    /* Function local variables */    /* Function local variables */
337    
338    const uschar *Xcallpat;    USPTR Xcallpat;
339    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
340    const uschar *Xdata;    USPTR Xcharptr;
341    const uschar *Xnext;  #endif
342    const uschar *Xpp;    USPTR Xdata;
343    const uschar *Xprev;    USPTR Xnext;
344    const uschar *Xsaved_eptr;    USPTR Xpp;
345      USPTR Xprev;
346      USPTR Xsaved_eptr;
347    
348    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
349    
# Line 328  typedef struct heapframe { Line 364  typedef struct heapframe {
364    uschar Xocchars[8];    uschar Xocchars[8];
365  #endif  #endif
366    
367      int Xcodelink;
368    int Xctype;    int Xctype;
369    unsigned int Xfc;    unsigned int Xfc;
370    int Xfi;    int Xfi;
# Line 363  typedef struct heapframe { Line 400  typedef struct heapframe {
400    
401  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
402  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
403  same response.  same response. */
404    
405    /* These macros pack up tests that are used for partial matching, and which
406    appears several times in the code. We set the "hit end" flag if the pointer is
407    at the end of the subject and also past the start of the subject (i.e.
408    something has been matched). For hard partial matching, we then return
409    immediately. The second one is used when we already know we are past the end of
410    the subject. */
411    
412  Performance note: It might be tempting to extract commonly used fields from the  #define CHECK_PARTIAL()\
413  md structure (e.g. utf8, end_subject) into individual variables to improve    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
414        {\
415        md->hitend = TRUE;\
416        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
417        }
418    
419    #define SCHECK_PARTIAL()\
420      if (md->partial != 0 && eptr > mstart)\
421        {\
422        md->hitend = TRUE;\
423        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
424        }
425    
426    
427    /* Performance note: It might be tempting to extract commonly used fields from
428    the md structure (e.g. utf8, end_subject) into individual variables to improve
429  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
430  made performance worse.  made performance worse.
431    
# Line 375  Arguments: Line 434  Arguments:
434     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
435     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
436                   by encountering \K)                   by encountering \K)
437       markptr     pointer to the most recent MARK name, or NULL
438     offset_top  current top pointer     offset_top  current top pointer
439     md          pointer to "static" info for the match     md          pointer to "static" info for the match
440     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 384  Arguments: Line 444  Arguments:
444                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
445                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
446                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
447     rdepth      the recursion depth     rdepth      the recursion depth
448    
449  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 394  Returns:       MATCH_MATCH if matched Line 453  Returns:       MATCH_MATCH if matched
453  */  */
454    
455  static int  static int
456  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR
457    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    markptr, int offset_top, match_data *md, unsigned long int ims,
458    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
459  {  {
460  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
461  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 408  register unsigned int c;   /* Character Line 467  register unsigned int c;   /* Character
467  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
468    
469  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
470    int condcode;
471    
472  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
473  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 423  frame->Xprevframe = NULL;            /* Line 483  frame->Xprevframe = NULL;            /*
483  frame->Xeptr = eptr;  frame->Xeptr = eptr;
484  frame->Xecode = ecode;  frame->Xecode = ecode;
485  frame->Xmstart = mstart;  frame->Xmstart = mstart;
486    frame->Xmarkptr = markptr;
487  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
488  frame->Xims = ims;  frame->Xims = ims;
489  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 438  HEAP_RECURSE: Line 499  HEAP_RECURSE:
499  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
500  #define ecode              frame->Xecode  #define ecode              frame->Xecode
501  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
502    #define markptr            frame->Xmarkptr
503  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
504  #define ims                frame->Xims  #define ims                frame->Xims
505  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 450  HEAP_RECURSE: Line 512  HEAP_RECURSE:
512  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
513  #endif  #endif
514  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
515    #define codelink           frame->Xcodelink
516  #define data               frame->Xdata  #define data               frame->Xdata
517  #define next               frame->Xnext  #define next               frame->Xnext
518  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 530  int oclength; Line 593  int oclength;
593  uschar occhars[8];  uschar occhars[8];
594  #endif  #endif
595    
596    int codelink;
597  int ctype;  int ctype;
598  int length;  int length;
599  int max;  int max;
# Line 563  TAIL_RECURSE: Line 627  TAIL_RECURSE:
627  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
628  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
629  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
630  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
631  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
632  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
633  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
# Line 586  original_ims = ims;    /* Save for reset Line 650  original_ims = ims;    /* Save for reset
650  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
651  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
652  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
653  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
654  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
655  already used. */  block that is used is on the stack, so a new one may be required for each
656    match(). */
657    
658  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
659    {    {
660    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
661    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
662      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
663    }    }
664    
665  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 611  for (;;) Line 669  for (;;)
669    minimize = possessive = FALSE;    minimize = possessive = FALSE;
670    op = *ecode;    op = *ecode;
671    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > mstart)  
     md->hitend = TRUE;  
   
672    switch(op)    switch(op)
673      {      {
674        case OP_FAIL:
675        RRETURN(MATCH_NOMATCH);
676    
677        case OP_PRUNE:
678        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
679          ims, eptrb, flags, RM51);
680        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
681        RRETURN(MATCH_PRUNE);
682    
683        case OP_COMMIT:
684        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
685          ims, eptrb, flags, RM52);
686        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
687        RRETURN(MATCH_COMMIT);
688    
689        case OP_SKIP:
690        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
691          ims, eptrb, flags, RM53);
692        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
693        md->start_match_ptr = eptr;   /* Pass back current position */
694        RRETURN(MATCH_SKIP);
695    
696        case OP_THEN:
697        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
698          ims, eptrb, flags, RM54);
699        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
700        RRETURN(MATCH_THEN);
701    
702      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
703      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
704      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 640  for (;;) Line 718  for (;;)
718      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
719      offset = number << 1;      offset = number << 1;
720    
721  #ifdef DEBUG  #ifdef PCRE_DEBUG
722      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
723      printf("subject=");      printf("subject=");
724      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 662  for (;;) Line 740  for (;;)
740          {          {
741          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
742            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
743          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
744          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
745          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
746          }          }
# Line 677  for (;;) Line 755  for (;;)
755        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
756        }        }
757    
758      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
759      bracket. */      as a non-capturing bracket. */
760    
761        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
762        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
763    
764      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
765    
766        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
767        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
768    
769      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
770      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
771      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
772      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
773        is set.*/
774    
775      case OP_BRA:      case OP_BRA:
776      case OP_SBRA:      case OP_SBRA:
# Line 693  for (;;) Line 778  for (;;)
778      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
779      for (;;)      for (;;)
780        {        {
781        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
782          {          {
783          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
784          flags |= match_tail_recursed;            {
785          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
786          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
787              goto TAIL_RECURSE;
788              }
789    
790            /* Possibly empty group; can't use tail recursion. */
791    
792            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
793              eptrb, flags, RM48);
794            RRETURN(rrc);
795          }          }
796    
797        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 706  for (;;) Line 799  for (;;)
799    
800        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
801          eptrb, flags, RM2);          eptrb, flags, RM2);
802        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
804        }        }
805      /* Control never reaches here. */      /* Control never reaches here. */
# Line 719  for (;;) Line 812  for (;;)
812    
813      case OP_COND:      case OP_COND:
814      case OP_SCOND:      case OP_SCOND:
815      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
816    
817        /* Because of the way auto-callout works during compile, a callout item is
818        inserted between OP_COND and an assertion condition. */
819    
820        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
821        {        {
822        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
823        condition = md->recursive != NULL &&          {
824          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
825        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
826            cb.callout_number   = ecode[LINK_SIZE+2];
827            cb.offset_vector    = md->offset_vector;
828            cb.subject          = (PCRE_SPTR)md->start_subject;
829            cb.subject_length   = md->end_subject - md->start_subject;
830            cb.start_match      = mstart - md->start_subject;
831            cb.current_position = eptr - md->start_subject;
832            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
833            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
834            cb.capture_top      = offset_top/2;
835            cb.capture_last     = md->capture_last;
836            cb.callout_data     = md->callout_data;
837            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
838            if (rrc < 0) RRETURN(rrc);
839            }
840          ecode += _pcre_OP_lengths[OP_CALLOUT];
841          }
842    
843        condcode = ecode[LINK_SIZE+1];
844    
845        /* Now see what the actual condition is */
846    
847        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
848          {
849          if (md->recursive == NULL)                /* Not recursing => FALSE */
850            {
851            condition = FALSE;
852            ecode += GET(ecode, 1);
853            }
854          else
855            {
856            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
857            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
858    
859            /* If the test is for recursion into a specific subpattern, and it is
860            false, but the test was set up by name, scan the table to see if the
861            name refers to any other numbers, and test them. The condition is true
862            if any one is set. */
863    
864            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
865              {
866              uschar *slotA = md->name_table;
867              for (i = 0; i < md->name_count; i++)
868                {
869                if (GET2(slotA, 0) == recno) break;
870                slotA += md->name_entry_size;
871                }
872    
873              /* Found a name for the number - there can be only one; duplicate
874              names for different numbers are allowed, but not vice versa. First
875              scan down for duplicates. */
876    
877              if (i < md->name_count)
878                {
879                uschar *slotB = slotA;
880                while (slotB > md->name_table)
881                  {
882                  slotB -= md->name_entry_size;
883                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
884                    {
885                    condition = GET2(slotB, 0) == md->recursive->group_num;
886                    if (condition) break;
887                    }
888                  else break;
889                  }
890    
891                /* Scan up for duplicates */
892    
893                if (!condition)
894                  {
895                  slotB = slotA;
896                  for (i++; i < md->name_count; i++)
897                    {
898                    slotB += md->name_entry_size;
899                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
900                      {
901                      condition = GET2(slotB, 0) == md->recursive->group_num;
902                      if (condition) break;
903                      }
904                    else break;
905                    }
906                  }
907                }
908              }
909    
910            /* Chose branch according to the condition */
911    
912            ecode += condition? 3 : GET(ecode, 1);
913            }
914        }        }
915    
916      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
917        {        {
918        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
919        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
920    
921          /* If the numbered capture is unset, but the reference was by name,
922          scan the table to see if the name refers to any other numbers, and test
923          them. The condition is true if any one is set. This is tediously similar
924          to the code above, but not close enough to try to amalgamate. */
925    
926          if (!condition && condcode == OP_NCREF)
927            {
928            int refno = offset >> 1;
929            uschar *slotA = md->name_table;
930    
931            for (i = 0; i < md->name_count; i++)
932              {
933              if (GET2(slotA, 0) == refno) break;
934              slotA += md->name_entry_size;
935              }
936    
937            /* Found a name for the number - there can be only one; duplicate names
938            for different numbers are allowed, but not vice versa. First scan down
939            for duplicates. */
940    
941            if (i < md->name_count)
942              {
943              uschar *slotB = slotA;
944              while (slotB > md->name_table)
945                {
946                slotB -= md->name_entry_size;
947                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
948                  {
949                  offset = GET2(slotB, 0) << 1;
950                  condition = offset < offset_top &&
951                    md->offset_vector[offset] >= 0;
952                  if (condition) break;
953                  }
954                else break;
955                }
956    
957              /* Scan up for duplicates */
958    
959              if (!condition)
960                {
961                slotB = slotA;
962                for (i++; i < md->name_count; i++)
963                  {
964                  slotB += md->name_entry_size;
965                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
966                    {
967                    offset = GET2(slotB, 0) << 1;
968                    condition = offset < offset_top &&
969                      md->offset_vector[offset] >= 0;
970                    if (condition) break;
971                    }
972                  else break;
973                  }
974                }
975              }
976            }
977    
978          /* Chose branch according to the condition */
979    
980        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
981        }        }
982    
983      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
984        {        {
985        condition = FALSE;        condition = FALSE;
986        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 754  for (;;) Line 1000  for (;;)
1000          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1001          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1002          }          }
1003        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1004          {          {
1005          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1006          }          }
1007        else        else
1008          {          {
1009          condition = FALSE;          condition = FALSE;
1010          ecode += GET(ecode, 1);          ecode += codelink;
1011          }          }
1012        }        }
1013    
1014      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1015      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1016      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1017        group. If the second alternative doesn't exist, we can just plough on. */
1018    
1019      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1020        {        {
1021        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1022        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1023        goto TAIL_RECURSE;          {
1024            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1025            RRETURN(rrc);
1026            }
1027          else                       /* Group must match something */
1028            {
1029            flags = 0;
1030            goto TAIL_RECURSE;
1031            }
1032        }        }
1033      else      else                         /* Condition false & no alternative */
1034        {        {
1035        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1036        }        }
1037      break;      break;
1038    
1039    
1040      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1041      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1042    
1043        case OP_CLOSE:
1044        number = GET2(ecode, 1);
1045        offset = number << 1;
1046    
1047    #ifdef PCRE_DEBUG
1048          printf("end bracket %d at *ACCEPT", number);
1049          printf("\n");
1050    #endif
1051    
1052        md->capture_last = number;
1053        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1054          {
1055          md->offset_vector[offset] =
1056            md->offset_vector[md->offset_end - number];
1057          md->offset_vector[offset+1] = eptr - md->start_subject;
1058          if (offset_top <= offset) offset_top = offset + 2;
1059          }
1060        ecode += 3;
1061        break;
1062    
1063    
1064        /* End of the pattern, either real or forced. If we are in a top-level
1065        recursion, we should restore the offsets appropriately and continue from
1066        after the call. */
1067    
1068        case OP_ACCEPT:
1069      case OP_END:      case OP_END:
1070      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1071        {        {
# Line 793  for (;;) Line 1074  for (;;)
1074        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1075        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1076          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1077        mstart = rec->save_start;        offset_top = rec->save_offset_top;
1078        ims = original_ims;        ims = original_ims;
1079        ecode = rec->after_call;        ecode = rec->after_call;
1080        break;        break;
1081        }        }
1082    
1083      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1084      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1085        the subject. In both cases, backtracking will then try other alternatives,
1086        if any. */
1087    
1088        if (eptr == mstart &&
1089            (md->notempty ||
1090              (md->notempty_atstart &&
1091                mstart == md->start_subject + md->start_offset)))
1092          RRETURN(MATCH_NOMATCH);
1093    
1094        /* Otherwise, we have a match. */
1095    
     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);  
1096      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1097      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1098      md->start_match_ptr = mstart;  /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1099      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
1100    
1101      /* Change option settings */      /* Change option settings */
# Line 828  for (;;) Line 1118  for (;;)
1118        {        {
1119        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1120          RM4);          RM4);
1121        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1122        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1123            mstart = md->start_match_ptr;   /* In case \K reset it */
1124            break;
1125            }
1126          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1127        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1128        }        }
1129      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 847  for (;;) Line 1141  for (;;)
1141      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1142      continue;      continue;
1143    
1144      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1145        PRUNE, or COMMIT means we must assume failure without checking subsequent
1146        branches. */
1147    
1148      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1149      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 856  for (;;) Line 1152  for (;;)
1152        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1153          RM5);          RM5);
1154        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1155        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1156            {
1157            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1158            break;
1159            }
1160          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1161        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1162        }        }
1163      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 880  for (;;) Line 1181  for (;;)
1181          {          {
1182          eptr--;          eptr--;
1183          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1184          BACKCHAR(eptr)          BACKCHAR(eptr);
1185          }          }
1186        }        }
1187      else      else
# Line 893  for (;;) Line 1194  for (;;)
1194        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1195        }        }
1196    
1197      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1198    
1199        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1200      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1201      break;      break;
1202    
# Line 973  for (;;) Line 1275  for (;;)
1275    
1276        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1277              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1278        new_recursive.save_start = mstart;        new_recursive.save_offset_top = offset_top;
       mstart = eptr;  
1279    
1280        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1281        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 993  for (;;) Line 1294  for (;;)
1294              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1295            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1296            }            }
1297          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1298            {            {
1299            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1300              if (new_recursive.offset_save != stacksave)
1301                (pcre_free)(new_recursive.offset_save);
1302            RRETURN(rrc);            RRETURN(rrc);
1303            }            }
1304    
# Line 1019  for (;;) Line 1322  for (;;)
1322      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1323      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1324      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1325      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1326        the start-of-match value in case it was changed by \K. */
1327    
1328      case OP_ONCE:      case OP_ONCE:
1329      prev = ecode;      prev = ecode;
# Line 1027  for (;;) Line 1331  for (;;)
1331    
1332      do      do
1333        {        {
1334        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1335          eptrb, 0, RM7);        if (rrc == MATCH_MATCH)
1336        if (rrc == MATCH_MATCH) break;          {
1337        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1338            break;
1339            }
1340          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1341        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1342        }        }
1343      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1073  for (;;) Line 1380  for (;;)
1380    
1381      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1382        {        {
1383        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
         RM8);  
1384        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1385        ecode = prev;        ecode = prev;
1386        flags = match_tail_recursed;        flags = 0;
1387        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1388        }        }
1389      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1085  for (;;) Line 1391  for (;;)
1391        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1392        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1393        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1394        flags = match_tail_recursed;        flags = 0;
1395        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1396        }        }
1397      /* Control never gets here */      /* Control never gets here */
# Line 1097  for (;;) Line 1403  for (;;)
1403      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1404      break;      break;
1405    
1406      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1407      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1408      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1409      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1410      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1411    
1412      case OP_BRAZERO:      case OP_BRAZERO:
1413        {        {
# Line 1123  for (;;) Line 1429  for (;;)
1429        }        }
1430      break;      break;
1431    
1432        case OP_SKIPZERO:
1433          {
1434          next = ecode+1;
1435          do next += GET(next,1); while (*next == OP_ALT);
1436          ecode = next + 1 + LINK_SIZE;
1437          }
1438        break;
1439    
1440      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1441    
1442      case OP_KET:      case OP_KET:
# Line 1141  for (;;) Line 1455  for (;;)
1455        }        }
1456      else saved_eptr = NULL;      else saved_eptr = NULL;
1457    
1458      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1459      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1460      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1461        it was changed by \K. */
1462    
1463      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1464          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1151  for (;;) Line 1466  for (;;)
1466        {        {
1467        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1468        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1469          md->start_match_ptr = mstart;
1470        RRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);
1471        }        }
1472    
# Line 1165  for (;;) Line 1481  for (;;)
1481        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1482        offset = number << 1;        offset = number << 1;
1483    
1484  #ifdef DEBUG  #ifdef PCRE_DEBUG
1485        printf("end bracket %d", number);        printf("end bracket %d", number);
1486        printf("\n");        printf("\n");
1487  #endif  #endif
# Line 1187  for (;;) Line 1503  for (;;)
1503          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1504          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1505          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         mstart = rec->save_start;  
1506          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1507            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1508            offset_top = rec->save_offset_top;
1509          ecode = rec->after_call;          ecode = rec->after_call;
1510          ims = original_ims;          ims = original_ims;
1511          break;          break;
# Line 1216  for (;;) Line 1532  for (;;)
1532    
1533      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1534      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1535      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1536        unlimited repeat of a group that can match an empty string. */
1537    
1538      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1539    
1540      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1541        {        {
1542        RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
         RM12);  
1543        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1544          if (flags != 0)    /* Could match an empty string */
1545            {
1546            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1547            RRETURN(rrc);
1548            }
1549        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1550        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1551        }        }
1552      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1234  for (;;) Line 1554  for (;;)
1554        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1555        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1556        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1557        flags = match_tail_recursed;        flags = 0;
1558        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1559        }        }
1560      /* Control never gets here */      /* Control never gets here */
# Line 1325  for (;;) Line 1645  for (;;)
1645    
1646        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1647        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1648        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1649          partial matching. */
1650    
1651  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1652        if (utf8)        if (utf8)
1653          {          {
1654          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1655            {            {
1656            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1657            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1658              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1659            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1660            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1661            }            }
1662          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1663              {
1664              SCHECK_PARTIAL();
1665              cur_is_word = FALSE;
1666              }
1667            else
1668            {            {
1669            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1670            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1346  for (;;) Line 1673  for (;;)
1673        else        else
1674  #endif  #endif
1675    
1676        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1677    
1678          {          {
1679          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1680            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1681          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1682            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1683              }
1684            if (eptr >= md->end_subject)
1685              {
1686              SCHECK_PARTIAL();
1687              cur_is_word = FALSE;
1688              }
1689            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1690          }          }
1691    
1692        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 1366  for (;;) Line 1700  for (;;)
1700      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1701    
1702      case OP_ANY:      case OP_ANY:
1703      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1704        /* Fall through */
1705    
1706        case OP_ALLANY:
1707        if (eptr++ >= md->end_subject)
1708        {        {
1709        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1710          RRETURN(MATCH_NOMATCH);
1711        }        }
1712      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1713      ecode++;      ecode++;
1714      break;      break;
1715    
# Line 1380  for (;;) Line 1717  for (;;)
1717      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1718    
1719      case OP_ANYBYTE:      case OP_ANYBYTE:
1720      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1721          {
1722          SCHECK_PARTIAL();
1723          RRETURN(MATCH_NOMATCH);
1724          }
1725      ecode++;      ecode++;
1726      break;      break;
1727    
1728      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1729      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1730          {
1731          SCHECK_PARTIAL();
1732          RRETURN(MATCH_NOMATCH);
1733          }
1734      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1735      if (      if (
1736  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1398  for (;;) Line 1743  for (;;)
1743      break;      break;
1744    
1745      case OP_DIGIT:      case OP_DIGIT:
1746      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1747          {
1748          SCHECK_PARTIAL();
1749          RRETURN(MATCH_NOMATCH);
1750          }
1751      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1752      if (      if (
1753  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1411  for (;;) Line 1760  for (;;)
1760      break;      break;
1761    
1762      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1763      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1764          {
1765          SCHECK_PARTIAL();
1766          RRETURN(MATCH_NOMATCH);
1767          }
1768      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1769      if (      if (
1770  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1424  for (;;) Line 1777  for (;;)
1777      break;      break;
1778    
1779      case OP_WHITESPACE:      case OP_WHITESPACE:
1780      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1781          {
1782          SCHECK_PARTIAL();
1783          RRETURN(MATCH_NOMATCH);
1784          }
1785      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1786      if (      if (
1787  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1437  for (;;) Line 1794  for (;;)
1794      break;      break;
1795    
1796      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1797      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1798          {
1799          SCHECK_PARTIAL();
1800          RRETURN(MATCH_NOMATCH);
1801          }
1802      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1803      if (      if (
1804  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1450  for (;;) Line 1811  for (;;)
1811      break;      break;
1812    
1813      case OP_WORDCHAR:      case OP_WORDCHAR:
1814      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1815          {
1816          SCHECK_PARTIAL();
1817          RRETURN(MATCH_NOMATCH);
1818          }
1819      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1820      if (      if (
1821  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1463  for (;;) Line 1828  for (;;)
1828      break;      break;
1829    
1830      case OP_ANYNL:      case OP_ANYNL:
1831      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1832          {
1833          SCHECK_PARTIAL();
1834          RRETURN(MATCH_NOMATCH);
1835          }
1836      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1837      switch(c)      switch(c)
1838        {        {
# Line 1471  for (;;) Line 1840  for (;;)
1840        case 0x000d:        case 0x000d:
1841        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1842        break;        break;
1843    
1844        case 0x000a:        case 0x000a:
1845          break;
1846    
1847        case 0x000b:        case 0x000b:
1848        case 0x000c:        case 0x000c:
1849        case 0x0085:        case 0x0085:
1850        case 0x2028:        case 0x2028:
1851        case 0x2029:        case 0x2029:
1852          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1853        break;        break;
1854        }        }
1855      ecode++;      ecode++;
1856      break;      break;
1857    
1858      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
1859      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1860          {
1861          SCHECK_PARTIAL();
1862          RRETURN(MATCH_NOMATCH);
1863          }
1864      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1865      switch(c)      switch(c)
1866        {        {
# Line 1513  for (;;) Line 1890  for (;;)
1890      break;      break;
1891    
1892      case OP_HSPACE:      case OP_HSPACE:
1893      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1894          {
1895          SCHECK_PARTIAL();
1896          RRETURN(MATCH_NOMATCH);
1897          }
1898      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1899      switch(c)      switch(c)
1900        {        {
# Line 1543  for (;;) Line 1924  for (;;)
1924      break;      break;
1925    
1926      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
1927      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1928          {
1929          SCHECK_PARTIAL();
1930          RRETURN(MATCH_NOMATCH);
1931          }
1932      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1933      switch(c)      switch(c)
1934        {        {
# Line 1561  for (;;) Line 1946  for (;;)
1946      break;      break;
1947    
1948      case OP_VSPACE:      case OP_VSPACE:
1949      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1950          {
1951          SCHECK_PARTIAL();
1952          RRETURN(MATCH_NOMATCH);
1953          }
1954      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1955      switch(c)      switch(c)
1956        {        {
# Line 1584  for (;;) Line 1973  for (;;)
1973    
1974      case OP_PROP:      case OP_PROP:
1975      case OP_NOTPROP:      case OP_NOTPROP:
1976      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1977          {
1978          SCHECK_PARTIAL();
1979          RRETURN(MATCH_NOMATCH);
1980          }
1981      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1982        {        {
1983        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1984    
1985        switch(ecode[1])        switch(ecode[1])
1986          {          {
# Line 1597  for (;;) Line 1989  for (;;)
1989          break;          break;
1990    
1991          case PT_LAMP:          case PT_LAMP:
1992          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1993               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1994               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1995            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1996           break;           break;
1997    
1998          case PT_GC:          case PT_GC:
1999          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2000            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2001          break;          break;
2002    
2003          case PT_PC:          case PT_PC:
2004          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2005            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2006          break;          break;
2007    
2008          case PT_SC:          case PT_SC:
2009          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2010            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2011          break;          break;
2012    
# Line 1630  for (;;) Line 2022  for (;;)
2022      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2023    
2024      case OP_EXTUNI:      case OP_EXTUNI:
2025      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2026          {
2027          SCHECK_PARTIAL();
2028          RRETURN(MATCH_NOMATCH);
2029          }
2030      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2031        {        {
2032        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2033        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
2034        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2035          {          {
# Line 1643  for (;;) Line 2038  for (;;)
2038            {            {
2039            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2040            }            }
2041          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2042          if (category != ucp_M) break;          if (category != ucp_M) break;
2043          eptr += len;          eptr += len;
2044          }          }
# Line 1664  for (;;) Line 2059  for (;;)
2059      case OP_REF:      case OP_REF:
2060        {        {
2061        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2062        ecode += 3;                                 /* Advance past item */        ecode += 3;
2063    
2064          /* If the reference is unset, there are two possibilities:
2065    
2066        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
2067        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
2068        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
2069        minima. */        quantifiers with zero minima.
2070    
2071        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
2072          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
2073          md->offset_vector[offset+1] - md->offset_vector[offset];  
2074          Otherwise, set the length to the length of what was matched by the
2075          referenced subpattern. */
2076    
2077          if (offset >= offset_top || md->offset_vector[offset] < 0)
2078            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2079          else
2080            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2081    
2082        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2083    
# Line 1702  for (;;) Line 2106  for (;;)
2106          break;          break;
2107    
2108          default:               /* No repeat follows */          default:               /* No repeat follows */
2109          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2110              {
2111              CHECK_PARTIAL();
2112              RRETURN(MATCH_NOMATCH);
2113              }
2114          eptr += length;          eptr += length;
2115          continue;              /* With the main loop */          continue;              /* With the main loop */
2116          }          }
# Line 1718  for (;;) Line 2126  for (;;)
2126    
2127        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2128          {          {
2129          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2130              {
2131              CHECK_PARTIAL();
2132              RRETURN(MATCH_NOMATCH);
2133              }
2134          eptr += length;          eptr += length;
2135          }          }
2136    
# Line 1735  for (;;) Line 2147  for (;;)
2147            {            {
2148            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2149            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2150            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) RRETURN(MATCH_NOMATCH);
2151              if (!match_ref(offset, eptr, length, md, ims))
2152                {
2153                CHECK_PARTIAL();
2154              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2155                }
2156            eptr += length;            eptr += length;
2157            }            }
2158          /* Control never gets here */          /* Control never gets here */
# Line 1749  for (;;) Line 2165  for (;;)
2165          pp = eptr;          pp = eptr;
2166          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2167            {            {
2168            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2169                {
2170                CHECK_PARTIAL();
2171                break;
2172                }
2173            eptr += length;            eptr += length;
2174            }            }
2175          while (eptr >= pp)          while (eptr >= pp)
# Line 1763  for (;;) Line 2183  for (;;)
2183        }        }
2184      /* Control never gets here */      /* Control never gets here */
2185    
   
   
2186      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2187      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2188      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1819  for (;;) Line 2237  for (;;)
2237          {          {
2238          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2239            {            {
2240            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2241                {
2242                SCHECK_PARTIAL();
2243                RRETURN(MATCH_NOMATCH);
2244                }
2245            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2246            if (c > 255)            if (c > 255)
2247              {              {
# Line 1837  for (;;) Line 2259  for (;;)
2259          {          {
2260          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2261            {            {
2262            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2263                {
2264                SCHECK_PARTIAL();
2265                RRETURN(MATCH_NOMATCH);
2266                }
2267            c = *eptr++;            c = *eptr++;
2268            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2269            }            }
# Line 1861  for (;;) Line 2287  for (;;)
2287              {              {
2288              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2289              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2290              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2291                if (eptr >= md->end_subject)
2292                  {
2293                  SCHECK_PARTIAL();
2294                  RRETURN(MATCH_NOMATCH);
2295                  }
2296              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2297              if (c > 255)              if (c > 255)
2298                {                {
# Line 1881  for (;;) Line 2312  for (;;)
2312              {              {
2313              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2314              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2315              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2316                if (eptr >= md->end_subject)
2317                  {
2318                  SCHECK_PARTIAL();
2319                  RRETURN(MATCH_NOMATCH);
2320                  }
2321              c = *eptr++;              c = *eptr++;
2322              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2323              }              }
# Line 1902  for (;;) Line 2338  for (;;)
2338            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2339              {              {
2340              int len = 1;              int len = 1;
2341              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2342                  {
2343                  SCHECK_PARTIAL();
2344                  break;
2345                  }
2346              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2347              if (c > 255)              if (c > 255)
2348                {                {
# Line 1928  for (;;) Line 2368  for (;;)
2368            {            {
2369            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2370              {              {
2371              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2372                  {
2373                  SCHECK_PARTIAL();
2374                  break;
2375                  }
2376              c = *eptr;              c = *eptr;
2377              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2378              eptr++;              eptr++;
# Line 1948  for (;;) Line 2392  for (;;)
2392    
2393    
2394      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2395      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2396        mode, because Unicode properties are supported in non-UTF-8 mode. */
2397    
2398  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2399      case OP_XCLASS:      case OP_XCLASS:
# Line 1989  for (;;) Line 2434  for (;;)
2434    
2435        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2436          {          {
2437          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2438          GETCHARINC(c, eptr);            {
2439              SCHECK_PARTIAL();
2440              RRETURN(MATCH_NOMATCH);
2441              }
2442            GETCHARINCTEST(c, eptr);
2443          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2444          }          }
2445    
# Line 2008  for (;;) Line 2457  for (;;)
2457            {            {
2458            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2459            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2460            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
2461            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2462            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2463                SCHECK_PARTIAL();
2464                RRETURN(MATCH_NOMATCH);
2465                }
2466              GETCHARINCTEST(c, eptr);
2467              if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2468            }            }
2469          /* Control never gets here */          /* Control never gets here */
2470          }          }
# Line 2023  for (;;) Line 2477  for (;;)
2477          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2478            {            {
2479            int len = 1;            int len = 1;
2480            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2481            GETCHARLEN(c, eptr, len);              {
2482                SCHECK_PARTIAL();
2483                break;
2484                }
2485              GETCHARLENTEST(c, eptr, len);
2486            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2487            eptr += len;            eptr += len;
2488            }            }
# Line 2033  for (;;) Line 2491  for (;;)
2491            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2492            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2493            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2494            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2495            }            }
2496          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2497          }          }
# Line 2051  for (;;) Line 2509  for (;;)
2509        length = 1;        length = 1;
2510        ecode++;        ecode++;
2511        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2512        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2513            {
2514            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2515            RRETURN(MATCH_NOMATCH);
2516            }
2517        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2518        }        }
2519      else      else
# Line 2059  for (;;) Line 2521  for (;;)
2521    
2522      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2523        {        {
2524        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2525            {
2526            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2527            RRETURN(MATCH_NOMATCH);
2528            }
2529        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2530        ecode += 2;        ecode += 2;
2531        }        }
# Line 2075  for (;;) Line 2541  for (;;)
2541        ecode++;        ecode++;
2542        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2543    
2544        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2545            {
2546            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2547            RRETURN(MATCH_NOMATCH);
2548            }
2549    
2550        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2551        can use the fast lookup table. */        can use the fast lookup table. */
# Line 2099  for (;;) Line 2569  for (;;)
2569          if (fc != dc)          if (fc != dc)
2570            {            {
2571  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2572            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2573  #endif  #endif
2574              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2575            }            }
# Line 2110  for (;;) Line 2580  for (;;)
2580    
2581      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2582        {        {
2583        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2584            {
2585            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2586            RRETURN(MATCH_NOMATCH);
2587            }
2588        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2589        ecode += 2;        ecode += 2;
2590        }        }
# Line 2164  for (;;) Line 2638  for (;;)
2638      case OP_MINQUERY:      case OP_MINQUERY:
2639      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2640      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2641    
2642      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2643      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2644      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2645    
2646      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2647    
2648      REPEATCHAR:      REPEATCHAR:
2649  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2179  for (;;) Line 2652  for (;;)
2652        length = 1;        length = 1;
2653        charptr = ecode;        charptr = ecode;
2654        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2655        ecode += length;        ecode += length;
2656    
2657        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2190  for (;;) Line 2662  for (;;)
2662  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2663          unsigned int othercase;          unsigned int othercase;
2664          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2665              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2666            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2667          else oclength = 0;          else oclength = 0;
2668  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2669    
2670          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2671            {            {
2672            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2673                memcmp(eptr, charptr, length) == 0) eptr += length;
2674  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2675            /* Need braces because of following else */            else if (oclength > 0 &&
2676            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2677                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2678    #endif  /* SUPPORT_UCP */
2679            else            else
2680              {              {
2681              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2682              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2683              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2684            }            }
2685    
2686          if (min == max) continue;          if (min == max) continue;
# Line 2219  for (;;) Line 2691  for (;;)
2691              {              {
2692              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2693              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2694              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2695              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2696                  memcmp(eptr, charptr, length) == 0) eptr += length;
2697  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2698              /* Need braces because of following else */              else if (oclength > 0 &&
2699              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2700                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2701    #endif  /* SUPPORT_UCP */
2702              else              else
2703                {                {
2704                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2705                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2706                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2707              }              }
2708            /* Control never gets here */            /* Control never gets here */
2709            }            }
# Line 2241  for (;;) Line 2713  for (;;)
2713            pp = eptr;            pp = eptr;
2714            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2715              {              {
2716              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2717              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2718  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2719              else if (oclength == 0) break;              else if (oclength > 0 &&
2720                         eptr <= md->end_subject - oclength &&
2721                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2722    #endif  /* SUPPORT_UCP */
2723              else              else
2724                {                {
2725                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2726                eptr += oclength;                break;
2727                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2728              }              }
2729    
2730            if (possessive) continue;            if (possessive) continue;
2731    
2732            for(;;)            for(;;)
2733             {              {
2734             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2735             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2736             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2737  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2738             eptr--;              eptr--;
2739             BACKCHAR(eptr);              BACKCHAR(eptr);
2740  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2741             eptr -= length;              eptr -= length;
2742  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2743             }              }
2744            }            }
2745          /* Control never gets here */          /* Control never gets here */
2746          }          }
# Line 2280  for (;;) Line 2753  for (;;)
2753  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2754    
2755      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2756        {  
2757        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2758    
2759      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2760      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2301  for (;;) Line 2772  for (;;)
2772        {        {
2773        fc = md->lcc[fc];        fc = md->lcc[fc];
2774        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2775            {
2776            if (eptr >= md->end_subject)
2777              {
2778              SCHECK_PARTIAL();
2779              RRETURN(MATCH_NOMATCH);
2780              }
2781          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2782            }
2783        if (min == max) continue;        if (min == max) continue;
2784        if (minimize)        if (minimize)
2785          {          {
# Line 2309  for (;;) Line 2787  for (;;)
2787            {            {
2788            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2789            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2790            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
2791                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2792                {
2793                SCHECK_PARTIAL();
2794              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2795                }
2796              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2797            }            }
2798          /* Control never gets here */          /* Control never gets here */
2799          }          }
# Line 2320  for (;;) Line 2802  for (;;)
2802          pp = eptr;          pp = eptr;
2803          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2804            {            {
2805            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2806                {
2807                SCHECK_PARTIAL();
2808                break;
2809                }
2810              if (fc != md->lcc[*eptr]) break;
2811            eptr++;            eptr++;
2812            }            }
2813    
2814          if (possessive) continue;          if (possessive) continue;
2815    
2816          while (eptr >= pp)          while (eptr >= pp)
2817            {            {
2818            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
# Line 2339  for (;;) Line 2828  for (;;)
2828    
2829      else      else
2830        {        {
2831        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2832            {
2833            if (eptr >= md->end_subject)
2834              {
2835              SCHECK_PARTIAL();
2836              RRETURN(MATCH_NOMATCH);
2837              }
2838            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2839            }
2840    
2841        if (min == max) continue;        if (min == max) continue;
2842    
2843        if (minimize)        if (minimize)
2844          {          {
2845          for (fi = min;; fi++)          for (fi = min;; fi++)
2846            {            {
2847            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2848            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2849            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) RRETURN(MATCH_NOMATCH);
2850              if (eptr >= md->end_subject)
2851                {
2852                SCHECK_PARTIAL();
2853              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2854                }
2855              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2856            }            }
2857          /* Control never gets here */          /* Control never gets here */
2858          }          }
# Line 2357  for (;;) Line 2861  for (;;)
2861          pp = eptr;          pp = eptr;
2862          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2863            {            {
2864            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2865                {
2866                SCHECK_PARTIAL();
2867                break;
2868                }
2869              if (fc != *eptr) break;
2870            eptr++;            eptr++;
2871            }            }
2872          if (possessive) continue;          if (possessive) continue;
2873    
2874          while (eptr >= pp)          while (eptr >= pp)
2875            {            {
2876            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
# Line 2376  for (;;) Line 2886  for (;;)
2886      checking can be multibyte. */      checking can be multibyte. */
2887    
2888      case OP_NOT:      case OP_NOT:
2889      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2890          {
2891          SCHECK_PARTIAL();
2892          RRETURN(MATCH_NOMATCH);
2893          }
2894      ecode++;      ecode++;
2895      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2896      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2453  for (;;) Line 2967  for (;;)
2967      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2968      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2969    
2970      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2971    
2972      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2973      fc = *ecode++;      fc = *ecode++;
2974    
2975      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2483  for (;;) Line 2994  for (;;)
2994          register unsigned int d;          register unsigned int d;
2995          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2996            {            {
2997              if (eptr >= md->end_subject)
2998                {
2999                SCHECK_PARTIAL();
3000                RRETURN(MATCH_NOMATCH);
3001                }
3002            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3003            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3004            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2494  for (;;) Line 3010  for (;;)
3010        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3011          {          {
3012          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3013              {
3014              if (eptr >= md->end_subject)
3015                {
3016                SCHECK_PARTIAL();
3017                RRETURN(MATCH_NOMATCH);
3018                }
3019            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3020              }
3021          }          }
3022    
3023        if (min == max) continue;        if (min == max) continue;
# Line 2510  for (;;) Line 3033  for (;;)
3033              {              {
3034              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3035              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3036                if (fi >= max) RRETURN(MATCH_NOMATCH);
3037                if (eptr >= md->end_subject)
3038                  {
3039                  SCHECK_PARTIAL();
3040                  RRETURN(MATCH_NOMATCH);
3041                  }
3042              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3043              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3044              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3045              }              }
3046            }            }
3047          else          else
# Line 2524  for (;;) Line 3052  for (;;)
3052              {              {
3053              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3054              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3055              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) RRETURN(MATCH_NOMATCH);
3056                if (eptr >= md->end_subject)
3057                  {
3058                  SCHECK_PARTIAL();
3059                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3060                  }
3061                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3062              }              }
3063            }            }
3064          /* Control never gets here */          /* Control never gets here */
# Line 2545  for (;;) Line 3078  for (;;)
3078            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3079              {              {
3080              int len = 1;              int len = 1;
3081              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3082                  {
3083                  SCHECK_PARTIAL();
3084                  break;
3085                  }
3086              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3087              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3088              if (fc == d) break;              if (fc == d) break;
# Line 2566  for (;;) Line 3103  for (;;)
3103            {            {
3104            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3105              {              {
3106              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3107                  {
3108                  SCHECK_PARTIAL();
3109                  break;
3110                  }
3111                if (fc == md->lcc[*eptr]) break;
3112              eptr++;              eptr++;
3113              }              }
3114            if (possessive) continue;            if (possessive) continue;
# Line 2594  for (;;) Line 3136  for (;;)
3136          register unsigned int d;          register unsigned int d;
3137          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3138            {            {
3139              if (eptr >= md->end_subject)
3140                {
3141                SCHECK_PARTIAL();
3142                RRETURN(MATCH_NOMATCH);
3143                }
3144            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3145            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
3146            }            }
# Line 2603  for (;;) Line 3150  for (;;)
3150        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3151          {          {
3152          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3153              {
3154              if (eptr >= md->end_subject)
3155                {
3156                SCHECK_PARTIAL();
3157                RRETURN(MATCH_NOMATCH);
3158                }
3159            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3160              }
3161          }          }
3162    
3163        if (min == max) continue;        if (min == max) continue;
# Line 2619  for (;;) Line 3173  for (;;)
3173              {              {
3174              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3175              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3176              GETCHARINC(d, eptr);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3177              if (fi >= max || eptr >= md->end_subject || fc == d)              if (eptr >= md->end_subject)
3178                  {
3179                  SCHECK_PARTIAL();
3180                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3181                  }
3182                GETCHARINC(d, eptr);
3183                if (fc == d) RRETURN(MATCH_NOMATCH);
3184              }              }
3185            }            }
3186          else          else
# Line 2632  for (;;) Line 3191  for (;;)
3191              {              {
3192              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3193              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3194              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) RRETURN(MATCH_NOMATCH);
3195                if (eptr >= md->end_subject)
3196                  {
3197                  SCHECK_PARTIAL();
3198                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3199                  }
3200                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3201              }              }
3202            }            }
3203          /* Control never gets here */          /* Control never gets here */
# Line 2653  for (;;) Line 3217  for (;;)
3217            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3218              {              {
3219              int len = 1;              int len = 1;
3220              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3221                  {
3222                  SCHECK_PARTIAL();
3223                  break;
3224                  }
3225              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3226              if (fc == d) break;              if (fc == d) break;
3227              eptr += len;              eptr += len;
# Line 2673  for (;;) Line 3241  for (;;)
3241            {            {
3242            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3243              {              {
3244              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3245                  {
3246                  SCHECK_PARTIAL();
3247                  break;
3248                  }
3249                if (fc == *eptr) break;
3250              eptr++;              eptr++;
3251              }              }
3252            if (possessive) continue;            if (possessive) continue;
# Line 2767  for (;;) Line 3340  for (;;)
3340    
3341      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3342      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3343      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3344      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3345      and single-bytes. */      and single-bytes. */
3346    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3347      if (min > 0)      if (min > 0)
3348        {        {
3349  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2785  for (;;) Line 3355  for (;;)
3355            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3356            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3357              {              {
3358              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3359              GETCHARINC(c, eptr);                {
3360                  SCHECK_PARTIAL();
3361                  RRETURN(MATCH_NOMATCH);
3362                  }
3363                GETCHARINCTEST(c, eptr);
3364              }              }
3365            break;            break;
3366    
3367            case PT_LAMP:            case PT_LAMP:
3368            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3369              {              {
3370              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3371              GETCHARINC(c, eptr);                {
3372              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3373                  RRETURN(MATCH_NOMATCH);
3374                  }
3375                GETCHARINCTEST(c, eptr);
3376                prop_chartype = UCD_CHARTYPE(c);
3377              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3378                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3379                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2806  for (;;) Line 3384  for (;;)
3384            case PT_GC:            case PT_GC:
3385            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3386              {              {
3387              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3388              GETCHARINC(c, eptr);                {
3389              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3390                  RRETURN(MATCH_NOMATCH);
3391                  }
3392                GETCHARINCTEST(c, eptr);
3393                prop_category = UCD_CATEGORY(c);
3394              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3395                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3396              }              }
# Line 2817  for (;;) Line 3399  for (;;)
3399            case PT_PC:            case PT_PC:
3400            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3401              {              {
3402              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3403              GETCHARINC(c, eptr);                {
3404              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3405                  RRETURN(MATCH_NOMATCH);
3406                  }
3407                GETCHARINCTEST(c, eptr);
3408                prop_chartype = UCD_CHARTYPE(c);
3409              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3410                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3411              }              }
# Line 2828  for (;;) Line 3414  for (;;)
3414            case PT_SC:            case PT_SC:
3415            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3416              {              {
3417              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3418              GETCHARINC(c, eptr);                {
3419              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3420                  RRETURN(MATCH_NOMATCH);
3421                  }
3422                GETCHARINCTEST(c, eptr);
3423                prop_script = UCD_SCRIPT(c);
3424              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3425                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3426              }              }
# Line 2848  for (;;) Line 3438  for (;;)
3438          {          {
3439          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3440            {            {
3441              if (eptr >= md->end_subject)
3442                {
3443                SCHECK_PARTIAL();
3444                RRETURN(MATCH_NOMATCH);
3445                }
3446            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3447            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3448            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3449            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3450              {              {
3451              int len = 1;              int len = 1;
3452              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3453                {                else { GETCHARLEN(c, eptr, len); }
3454                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3455              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3456              eptr += len;              eptr += len;
3457              }              }
# Line 2876  for (;;) Line 3469  for (;;)
3469          case OP_ANY:          case OP_ANY:
3470          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3471            {            {
3472            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3473                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3474                SCHECK_PARTIAL();
3475                RRETURN(MATCH_NOMATCH);
3476                }
3477              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3478              eptr++;
3479              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3480              }
3481            break;
3482    
3483            case OP_ALLANY:
3484            for (i = 1; i <= min; i++)
3485              {
3486              if (eptr >= md->end_subject)
3487                {
3488                SCHECK_PARTIAL();
3489              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3490                }
3491            eptr++;            eptr++;
3492            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3493            }            }
3494          break;          break;
3495    
3496          case OP_ANYBYTE:          case OP_ANYBYTE:
3497            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3498          eptr += min;          eptr += min;
3499          break;          break;
3500    
3501          case OP_ANYNL:          case OP_ANYNL:
3502          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3503            {            {
3504            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3505                {
3506                SCHECK_PARTIAL();
3507                RRETURN(MATCH_NOMATCH);
3508                }
3509            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3510            switch(c)            switch(c)
3511              {              {
# Line 2899  for (;;) Line 3513  for (;;)
3513              case 0x000d:              case 0x000d:
3514              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3515              break;              break;
3516    
3517              case 0x000a:              case 0x000a:
3518                break;
3519    
3520              case 0x000b:              case 0x000b:
3521              case 0x000c:              case 0x000c:
3522              case 0x0085:              case 0x0085:
3523              case 0x2028:              case 0x2028:
3524              case 0x2029:              case 0x2029:
3525                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3526              break;              break;
3527              }              }
3528            }            }
# Line 2913  for (;;) Line 3531  for (;;)
3531          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3532          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3533            {            {
3534            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3535                {
3536                SCHECK_PARTIAL();
3537                RRETURN(MATCH_NOMATCH);
3538                }
3539            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3540            switch(c)            switch(c)
3541              {              {
# Line 2945  for (;;) Line 3567  for (;;)
3567          case OP_HSPACE:          case OP_HSPACE:
3568          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3569            {            {
3570            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3571                {
3572                SCHECK_PARTIAL();
3573                RRETURN(MATCH_NOMATCH);
3574                }
3575            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3576            switch(c)            switch(c)
3577              {              {
# Line 2977  for (;;) Line 3603  for (;;)
3603          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3604          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3605            {            {
3606            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3607                {
3608                SCHECK_PARTIAL();
3609                RRETURN(MATCH_NOMATCH);
3610                }
3611            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3612            switch(c)            switch(c)
3613              {              {
# Line 2997  for (;;) Line 3627  for (;;)
3627          case OP_VSPACE:          case OP_VSPACE:
3628          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3629            {            {
3630            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3631                {
3632                SCHECK_PARTIAL();
3633                RRETURN(MATCH_NOMATCH);
3634                }
3635            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3636            switch(c)            switch(c)
3637              {              {
# Line 3017  for (;;) Line 3651  for (;;)
3651          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3652          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3653            {            {
3654            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3655                {
3656                SCHECK_PARTIAL();
3657                RRETURN(MATCH_NOMATCH);
3658                }
3659            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3660            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3661              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 3027  for (;;) Line 3665  for (;;)
3665          case OP_DIGIT:          case OP_DIGIT:
3666          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3667            {            {
3668            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3669               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3670                SCHECK_PARTIAL();
3671                RRETURN(MATCH_NOMATCH);
3672                }
3673              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3674              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3675            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3676            }            }
# Line 3037  for (;;) Line 3679  for (;;)
3679          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3680          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3681            {            {
3682            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3683               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3684                SCHECK_PARTIAL();
3685              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3686            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              }
3687              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3688                RRETURN(MATCH_NOMATCH);
3689              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3690            }            }
3691          break;          break;
3692    
3693          case OP_WHITESPACE:          case OP_WHITESPACE:
3694          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3695            {            {
3696            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3697               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3698                SCHECK_PARTIAL();
3699                RRETURN(MATCH_NOMATCH);
3700                }
3701              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3702              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3703            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3704            }            }
# Line 3057  for (;;) Line 3707  for (;;)
3707          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3708          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3709            {            {
3710            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3711               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3712                SCHECK_PARTIAL();
3713              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3714            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              }
3715              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3716                RRETURN(MATCH_NOMATCH);
3717              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3718            }            }
3719          break;          break;
3720    
3721          case OP_WORDCHAR:          case OP_WORDCHAR:
3722          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3723            {            {
3724            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3725               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3726                SCHECK_PARTIAL();
3727                RRETURN(MATCH_NOMATCH);
3728                }
3729              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3730              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3731            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3732            }            }
# Line 3082  for (;;) Line 3740  for (;;)
3740  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3741    
3742        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3743        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3744    
3745        switch(ctype)        switch(ctype)
3746          {          {
3747          case OP_ANY:          case OP_ANY:
3748          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3749            {            {
3750            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3751              {              {
3752              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3753              eptr++;              RRETURN(MATCH_NOMATCH);
3754              }              }
3755              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3756              eptr++;
3757            }            }
         else eptr += min;  
3758          break;          break;
3759    
3760          case OP_ANYBYTE:          case OP_ALLANY:
3761            if (eptr > md->end_subject - min)
3762              {
3763              SCHECK_PARTIAL();
3764              RRETURN(MATCH_NOMATCH);
3765              }
3766          eptr += min;          eptr += min;
3767          break;          break;
3768    
3769          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
3770          bytes are present in this case. */          if (eptr > md->end_subject - min)
3771              {
3772              SCHECK_PARTIAL();
3773              RRETURN(MATCH_NOMATCH);
3774              }
3775            eptr += min;
3776            break;
3777    
3778          case OP_ANYNL:          case OP_ANYNL:
3779          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3780            {            {
3781            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3782                {
3783                SCHECK_PARTIAL();
3784                RRETURN(MATCH_NOMATCH);
3785                }
3786            switch(*eptr++)            switch(*eptr++)
3787              {              {
3788              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3117  for (;;) Line 3790  for (;;)
3790              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3791              break;              break;
3792              case 0x000a:              case 0x000a:
3793                break;
3794    
3795              case 0x000b:              case 0x000b:
3796              case 0x000c:              case 0x000c:
3797              case 0x0085:              case 0x0085:
3798                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3799              break;              break;
3800              }              }
3801            }            }
# Line 3128  for (;;) Line 3804  for (;;)
3804          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3805          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3806            {            {
3807            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3808                {
3809                SCHECK_PARTIAL();
3810                RRETURN(MATCH_NOMATCH);
3811                }
3812            switch(*eptr++)            switch(*eptr++)
3813              {              {
3814              default: break;              default: break;
# Line 3143  for (;;) Line 3823  for (;;)
3823          case OP_HSPACE:          case OP_HSPACE:
3824          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3825            {            {
3826            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3827                {
3828                SCHECK_PARTIAL();
3829                RRETURN(MATCH_NOMATCH);
3830                }
3831            switch(*eptr++)            switch(*eptr++)
3832              {              {
3833              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3158  for (;;) Line 3842  for (;;)
3842          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3843          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3844            {            {
3845            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3846                {
3847                SCHECK_PARTIAL();
3848                RRETURN(MATCH_NOMATCH);
3849                }
3850            switch(*eptr++)            switch(*eptr++)
3851              {              {
3852              default: break;              default: break;
# Line 3175  for (;;) Line 3863  for (;;)
3863          case OP_VSPACE:          case OP_VSPACE:
3864          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3865            {            {
3866            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3867                {
3868                SCHECK_PARTIAL();
3869                RRETURN(MATCH_NOMATCH);
3870                }
3871            switch(*eptr++)            switch(*eptr++)
3872              {              {
3873              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3191  for (;;) Line 3883  for (;;)
3883    
3884          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3885          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3886              {
3887              if (eptr >= md->end_subject)
3888                {
3889                SCHECK_PARTIAL();
3890                RRETURN(MATCH_NOMATCH);
3891                }
3892            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3893              }
3894          break;          break;
3895    
3896          case OP_DIGIT:          case OP_DIGIT:
3897          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3898              {
3899              if (eptr >= md->end_subject)
3900                {
3901                SCHECK_PARTIAL();
3902                RRETURN(MATCH_NOMATCH);
3903                }
3904            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3905              }
3906          break;          break;
3907    
3908          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3909          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3910              {
3911              if (eptr >= md->end_subject)
3912                {
3913                SCHECK_PARTIAL();
3914                RRETURN(MATCH_NOMATCH);
3915                }
3916            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3917              }
3918          break;          break;
3919    
3920          case OP_WHITESPACE:          case OP_WHITESPACE:
3921          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3922              {
3923              if (eptr >= md->end_subject)
3924                {
3925                SCHECK_PARTIAL();
3926                RRETURN(MATCH_NOMATCH);
3927                }
3928            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3929              }
3930          break;          break;
3931    
3932          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3933          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3934              {
3935              if (eptr >= md->end_subject)
3936                {
3937                SCHECK_PARTIAL();
3938                RRETURN(MATCH_NOMATCH);
3939                }
3940            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3941              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3942              }
3943          break;          break;
3944    
3945          case OP_WORDCHAR:          case OP_WORDCHAR:
3946          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3947              {
3948              if (eptr >= md->end_subject)
3949                {
3950                SCHECK_PARTIAL();
3951                RRETURN(MATCH_NOMATCH);
3952                }
3953            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3954              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3955              }
3956          break;          break;
3957    
3958          default:          default:
# Line 3246  for (;;) Line 3980  for (;;)
3980              {              {
3981              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3982              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3983              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3984                if (eptr >= md->end_subject)
3985                  {
3986                  SCHECK_PARTIAL();
3987                  RRETURN(MATCH_NOMATCH);
3988                  }
3989              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3990              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3991              }              }
# Line 3257  for (;;) Line 3996  for (;;)
3996              {              {
3997              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3998              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3999              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4000                if (eptr >= md->end_subject)
4001                  {
4002                  SCHECK_PARTIAL();
4003                  RRETURN(MATCH_NOMATCH);
4004                  }
4005              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4006              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4007              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4008                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4009                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3272  for (;;) Line 4016  for (;;)
4016              {              {
4017              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4018              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4019              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4020                if (eptr >= md->end_subject)
4021                  {
4022                  SCHECK_PARTIAL();
4023                  RRETURN(MATCH_NOMATCH);
4024                  }
4025              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4026              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4027              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4028                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4029              }              }
# Line 3285  for (;;) Line 4034  for (;;)
4034              {              {
4035              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4036              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4037              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4038                if (eptr >= md->end_subject)
4039                  {
4040                  SCHECK_PARTIAL();
4041                  RRETURN(MATCH_NOMATCH);
4042                  }
4043              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4044              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4045              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4046                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4047              }              }
# Line 3298  for (;;) Line 4052  for (;;)
4052              {              {
4053              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4054              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4055              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
4056                if (eptr >= md->end_subject)
4057                  {
4058                  SCHECK_PARTIAL();
4059                  RRETURN(MATCH_NOMATCH);
4060                  }
4061              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4062              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
4063              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4064                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4065              }              }
# Line 3320  for (;;) Line 4079  for (;;)
4079            {            {
4080            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4081            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4082            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
4083              if (eptr >= md->end_subject)
4084                {
4085                SCHECK_PARTIAL();
4086                RRETURN(MATCH_NOMATCH);
4087                }
4088            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4089            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4090            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
4091            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4092              {              {
4093              int len = 1;              int len = 1;
4094              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
4095                {                else { GETCHARLEN(c, eptr, len); }
4096                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
4097              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4098              eptr += len;              eptr += len;
4099              }              }
# Line 3349  for (;;) Line 4111  for (;;)
4111            {            {
4112            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4113            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4114            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
4115                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&            if (eptr >= md->end_subject)
4116                  IS_NEWLINE(eptr)))              {
4117                SCHECK_PARTIAL();
4118                RRETURN(MATCH_NOMATCH);
4119                }
4120              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4121              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
4122            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4123            switch(ctype)            switch(ctype)
4124              {              {
4125              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
4126              break;              case OP_ALLANY:
   
4127              case OP_ANYBYTE:              case OP_ANYBYTE:
4128              break;              break;
4129    
# Line 3371  for (;;) Line 4135  for (;;)
4135                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4136                break;                break;
4137                case 0x000a:                case 0x000a:
4138                  break;
4139    
4140                case 0x000b:                case 0x000b:
4141                case 0x000c:                case 0x000c:
4142                case 0x0085:                case 0x0085:
4143                case 0x2028:                case 0x2028:
4144                case 0x2029:                case 0x2029:
4145                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4146                break;                break;
4147                }                }
4148              break;              break;
# Line 3507  for (;;) Line 4274  for (;;)
4274            {            {
4275            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4276            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4277            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
4278                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
4279                {
4280                SCHECK_PARTIAL();
4281                RRETURN(MATCH_NOMATCH);
4282                }
4283              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4284              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
4285            c = *eptr++;            c = *eptr++;
4286            switch(ctype)            switch(ctype)
4287              {              {
4288              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
4289              break;              case OP_ALLANY:
   
4290              case OP_ANYBYTE:              case OP_ANYBYTE:
4291              break;              break;
4292    
# Line 3527  for (;;) Line 4297  for (;;)
4297                case 0x000d:                case 0x000d:
4298                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4299                break;                break;
4300    
4301                case 0x000a:                case 0x000a:
4302                  break;
4303    
4304                case 0x000b:                case 0x000b:
4305                case 0x000c:                case 0x000c:
4306                case 0x0085:                case 0x0085:
4307                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4308                break;                break;
4309                }                }
4310              break;              break;
# Line 3632  for (;;) Line 4406  for (;;)
4406            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4407              {              {
4408              int len = 1;              int len = 1;
4409              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4410                  {
4411                  SCHECK_PARTIAL();
4412                  break;
4413                  }
4414              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4415              if (prop_fail_result) break;              if (prop_fail_result) break;
4416              eptr+= len;              eptr+= len;
# Line 3643  for (;;) Line 4421  for (;;)
4421            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4422              {              {
4423              int len = 1;              int len = 1;
4424              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4425                  {
4426                  SCHECK_PARTIAL();
4427                  break;
4428                  }
4429              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4430              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4431              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4432                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4433                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3658  for (;;) Line 4440  for (;;)
4440            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4441              {              {
4442              int len = 1;              int len = 1;
4443              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4444                  {
4445                  SCHECK_PARTIAL();
4446                  break;
4447                  }
4448              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4449              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4450              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4451                break;                break;
4452              eptr+= len;              eptr+= len;
# Line 3671  for (;;) Line 4457  for (;;)
4457            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4458              {              {
4459              int len = 1;              int len = 1;
4460              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4461                  {
4462                  SCHECK_PARTIAL();
4463                  break;
4464                  }
4465              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4466              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4467              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4468                break;                break;
4469              eptr+= len;              eptr+= len;
# Line 3684  for (;;) Line 4474  for (;;)
4474            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4475              {              {
4476              int len = 1;              int len = 1;
4477              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4478                  {
4479                  SCHECK_PARTIAL();
4480                  break;
4481                  }
4482              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4483              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
4484              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4485                break;                break;
4486              eptr+= len;              eptr+= len;
# Line 3702  for (;;) Line 4496  for (;;)
4496            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4497            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4498            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4499            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
4500            }            }
4501          }          }
4502    
# Line 3713  for (;;) Line 4507  for (;;)
4507          {          {
4508          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4509            {            {
4510            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
4511                {
4512                SCHECK_PARTIAL();
4513                break;
4514                }
4515            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4516            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4517            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
4518            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4519              {              {
# Line 3724  for (;;) Line 4522  for (;;)
4522                {                {
4523                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4524                }                }
4525              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4526              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4527              eptr += len;              eptr += len;
4528              }              }
# Line 3733  for (;;) Line 4531  for (;;)
4531          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4532    
4533          if (possessive) continue;          if (possessive) continue;
4534    
4535          for(;;)          for(;;)
4536            {            {
4537            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
# Line 3741  for (;;) Line 4540  for (;;)
4540            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
4541              {              {
4542              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
4543              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
4544                {                {
4545                  BACKCHAR(eptr);
4546                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4547                }                }
4548              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4549              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4550              eptr--;              eptr--;
4551              }              }
# Line 3764  for (;;) Line 4563  for (;;)
4563          switch(ctype)          switch(ctype)
4564            {            {
4565            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
4566            if (max < INT_MAX)            if (max < INT_MAX)
4567              {              {
4568              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
4569                {                {
4570                for (i = min; i < max; i++)                if (eptr >= md->end_subject)
4571                  {                  {
4572                  if (eptr >= md->end_subject) break;                  SCHECK_PARTIAL();
4573                  eptr++;                  break;
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
4574                  }                  }
4575                  if (IS_NEWLINE(eptr)) break;
4576                  eptr++;
4577                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4578                }                }
4579              }              }
4580    
# Line 3795  for (;;) Line 4582  for (;;)
4582    
4583            else            else
4584              {              {
4585              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
4586                {                {
4587                for (i = min; i < max; i++)                if (eptr >= md->end_subject)
4588                  {                  {
4589                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                  SCHECK_PARTIAL();
4590                  eptr++;                  break;
4591                  }                  }
4592                break;                if (IS_NEWLINE(eptr)) break;
4593                  eptr++;
4594                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4595                }                }
4596              else              }
4597              break;
4598    
4599              case OP_ALLANY:
4600              if (max < INT_MAX)
4601                {
4602                for (i = min; i < max; i++)
4603                {                {
4604                c = max - min;                if (eptr >= md->end_subject)
4605                if (c > (unsigned int)(md->end_subject - eptr))                  {
4606                  c = md->end_subject - eptr;                  SCHECK_PARTIAL();
4607                eptr += c;                  break;
4608                    }
4609                  eptr++;
4610                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4611                }                }
4612              }              }
4613              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
4614            break;            break;
4615    
4616            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 3819  for (;;) Line 4618  for (;;)
4618            case OP_ANYBYTE:            case OP_ANYBYTE:
4619            c = max - min;            c = max - min;
4620            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
4621              c = md->end_subject - eptr;              {
4622            eptr += c;              eptr = md->end_subject;
4623                SCHECK_PARTIAL();
4624                }
4625              else eptr += c;
4626            break;            break;
4627    
4628            case OP_ANYNL:            case OP_ANYNL:
4629            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4630              {              {
4631              int len = 1;              int len = 1;
4632              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4633                  {
4634                  SCHECK_PARTIAL();
4635                  break;
4636                  }
4637              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4638              if (c == 0x000d)              if (c == 0x000d)
4639                {                {
# Line 3836  for (;;) Line 4642  for (;;)
4642                }                }
4643              else              else
4644                {                {
4645                if (c != 0x000a && c != 0x000b && c != 0x000c &&                if (c != 0x000a &&
4646                    c != 0x0085 && c != 0x2028 && c != 0x2029)                    (md->bsr_anycrlf ||
4647                       (c != 0x000b && c != 0x000c &&
4648                        c != 0x0085 && c != 0x2028 && c != 0x2029)))
4649                  break;                  break;
4650                eptr += len;                eptr += len;
4651                }                }
# Line 3850  for (;;) Line 4658  for (;;)
4658              {              {
4659              BOOL gotspace;              BOOL gotspace;
4660              int len = 1;              int len = 1;
4661              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4662                  {
4663                  SCHECK_PARTIAL();
4664                  break;
4665                  }
4666              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4667              switch(c)              switch(c)
4668                {                {
# Line 3888  for (;;) Line 4700  for (;;)
4700              {              {
4701              BOOL gotspace;              BOOL gotspace;
4702              int len = 1;              int len = 1;
4703              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4704                  {
4705                  SCHECK_PARTIAL();
4706                  break;
4707                  }
4708              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4709              switch(c)              switch(c)
4710                {                {
# Line 3912  for (;;) Line 4728  for (;;)
4728            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4729              {              {
4730              int len = 1;              int len = 1;
4731              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4732                  {
4733                  SCHECK_PARTIAL();
4734                  break;
4735                  }
4736              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4737              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4738              eptr+= len;              eptr+= len;
# Line 3923  for (;;) Line 4743  for (;;)
4743            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4744              {              {
4745              int len = 1;              int len = 1;
4746              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4747                  {
4748                  SCHECK_PARTIAL();
4749                  break;
4750                  }
4751              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4752              if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;              if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
4753              eptr+= len;              eptr+= len;
# Line 3934  for (;;) Line 4758  for (;;)
4758            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4759              {              {
4760              int len = 1;              int len = 1;
4761              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4762                  {
4763                  SCHECK_PARTIAL();
4764                  break;
4765                  }
4766              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4767              if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;              if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4768              eptr+= len;              eptr+= len;
# Line 3945  for (;;) Line 4773  for (;;)
4773            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4774              {              {
4775              int len = 1;              int len = 1;
4776              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4777                  {
4778                  SCHECK_PARTIAL();
4779                  break;
4780                  }
4781              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4782              if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;              if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4783              eptr+= len;              eptr+= len;
# Line 3956  for (;;) Line 4788  for (;;)
4788            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4789              {              {
4790              int len = 1;              int len = 1;
4791              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4792                  {
4793                  SCHECK_PARTIAL();
4794                  break;
4795                  }
4796              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4797              if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;              if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4798              eptr+= len;              eptr+= len;
# Line 3967  for (;;) Line 4803  for (;;)
4803            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4804              {              {
4805              int len = 1;              int len = 1;
4806              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4807                  {
4808                  SCHECK_PARTIAL();
4809                  break;
4810                  }
4811              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4812              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4813              eptr+= len;              eptr+= len;
# Line 3990  for (;;) Line 4830  for (;;)
4830            }            }
4831          }          }
4832        else        else
4833  #endif  #endif  /* SUPPORT_UTF8 */
4834    
4835        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4836          {          {
4837          switch(ctype)          switch(ctype)
4838            {            {
4839            case OP_ANY:            case OP_ANY:
4840            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4841              {              {
4842              for (i = min; i < max; i++)              if (eptr >= md->end_subject)
4843                {                {
4844                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                SCHECK_PARTIAL();
4845                eptr++;                break;
4846                }                }
4847              break;              if (IS_NEWLINE(eptr)) break;
4848                eptr++;
4849              }              }
4850            /* For DOTALL case, fall through and treat as \C */            break;
4851    
4852              case OP_ALLANY:
4853            case OP_ANYBYTE:            case OP_ANYBYTE:
4854            c = max - min;            c = max - min;
4855            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
4856              c = md->end_subject - eptr;              {
4857            eptr += c;              eptr = md->end_subject;
4858                SCHECK_PARTIAL();
4859                }
4860              else eptr += c;
4861            break;            break;
4862    
4863            case OP_ANYNL:            case OP_ANYNL:
4864            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4865              {              {
4866              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4867                  {
4868                  SCHECK_PARTIAL();
4869                  break;
4870                  }
4871              c = *eptr;              c = *eptr;
4872              if (c == 0x000d)              if (c == 0x000d)
4873                {                {
# Line 4027  for (;;) Line 4876  for (;;)
4876                }                }
4877              else              else
4878                {                {
4879                if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)                if (c != 0x000a &&
4880                      (md->bsr_anycrlf ||
4881                        (c != 0x000b && c != 0x000c && c != 0x0085)))
4882                  break;                  break;
4883                eptr++;                eptr++;
4884                }                }
# Line 4037  for (;;) Line 4888  for (;;)
4888            case OP_NOT_HSPACE:            case OP_NOT_HSPACE:
4889            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4890              {              {
4891              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4892                  {
4893                  SCHECK_PARTIAL();
4894                  break;
4895                  }
4896              c = *eptr;              c = *eptr;
4897              if (c == 0x09 || c == 0x20 || c == 0xa0) break;              if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4898              eptr++;              eptr++;
# Line 4047  for (;;) Line 4902  for (;;)
4902            case OP_HSPACE:            case OP_HSPACE:
4903            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4904              {              {
4905              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4906                  {
4907                  SCHECK_PARTIAL();
4908                  break;
4909                  }
4910              c = *eptr;              c = *eptr;
4911              if (c != 0x09 && c != 0x20 && c != 0xa0) break;              if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4912              eptr++;              eptr++;
# Line 4057  for (;;) Line 4916  for (;;)
4916            case OP_NOT_VSPACE:            case OP_NOT_VSPACE:
4917            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4918              {              {
4919              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4920                  {
4921                  SCHECK_PARTIAL();
4922                  break;
4923                  }
4924              c = *eptr;              c = *eptr;
4925              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4926                break;                break;
# Line 4068  for (;;) Line 4931  for (;;)
4931            case OP_VSPACE:            case OP_VSPACE:
4932            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4933              {              {
4934              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4935                  {
4936                  SCHECK_PARTIAL();
4937                  break;
4938                  }
4939              c = *eptr;              c = *eptr;
4940              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4941                break;                break;
# Line 4079  for (;;) Line 4946  for (;;)
4946            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4947            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4948              {              {
4949              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)              if (eptr >= md->end_subject)
4950                  {
4951                  SCHECK_PARTIAL();
4952                break;                break;
4953                  }
4954                if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
4955              eptr++;              eptr++;
4956              }              }
4957            break;            break;
# Line 4088  for (;;) Line 4959  for (;;)
4959            case OP_DIGIT:            case OP_DIGIT:
4960            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4961              {              {
4962              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)              if (eptr >= md->end_subject)
4963                  {
4964                  SCHECK_PARTIAL();
4965                break;                break;
4966                  }
4967                if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
4968              eptr++;              eptr++;
4969              }              }
4970            break;            break;
# Line 4097  for (;;) Line 4972  for (;;)
4972            case OP_NOT_WHITESPACE:            case OP_NOT_WHITESPACE:
4973            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4974              {              {
4975              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)              if (eptr >= md->end_subject)
4976                  {
4977                  SCHECK_PARTIAL();
4978                break;                break;
4979                  }
4980                if ((md->ctypes[*eptr] & ctype_space) != 0) break;
4981              eptr++;              eptr++;
4982              }              }
4983            break;            break;
# Line 4106  for (;;) Line 4985  for (;;)
4985            case OP_WHITESPACE:            case OP_WHITESPACE:
4986            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4987              {              {
4988              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)              if (eptr >= md->end_subject)
4989                  {
4990                  SCHECK_PARTIAL();
4991                break;                break;
4992                  }
4993                if ((md->ctypes[*eptr] & ctype_space) == 0) break;
4994              eptr++;              eptr++;
4995              }              }
4996            break;            break;
# Line 4115  for (;;) Line 4998  for (;;)
4998            case OP_NOT_WORDCHAR:            case OP_NOT_WORDCHAR:
4999            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5000              {              {
5001              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)              if (eptr >= md->end_subject)
5002                  {
5003                  SCHECK_PARTIAL();
5004                break;                break;
5005                  }
5006                if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5007              eptr++;              eptr++;
5008              }              }
5009            break;            break;
# Line 4124  for (;;) Line 5011  for (;;)
5011            case OP_WORDCHAR:            case OP_WORDCHAR:
5012            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5013              {              {
5014              if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)              if (eptr >= md->end_subject)
5015                  {
5016                  SCHECK_PARTIAL();
5017                break;                break;
5018                  }
5019                if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5020              eptr++;              eptr++;
5021              }              }
5022            break;            break;
# Line 4177  HEAP_RETURN: Line 5068  HEAP_RETURN:
5068  switch (frame->Xwhere)  switch (frame->Xwhere)
5069    {    {
5070    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5071    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5072    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5073    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5074    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(53) LBL(54)
5075    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)  #ifdef SUPPORT_UTF8
5076      LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5077      LBL(32) LBL(34) LBL(42) LBL(46)
5078    #ifdef SUPPORT_UCP
5079      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5080    #endif  /* SUPPORT_UCP */
5081    #endif  /* SUPPORT_UTF8 */
5082    default:    default:
5083    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5084    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 4273  Returns:          > 0 => success; value Line 5170  Returns:          > 0 => success; value
5170                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
5171  */  */
5172    
5173  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5174  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5175    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5176    int offsetcount)    int offsetcount)
# Line 4297  const uschar *tables; Line 5194  const uschar *tables;
5194  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
5195  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
5196  USPTR end_subject;  USPTR end_subject;
5197    USPTR start_partial = NULL;
5198  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
5199    
5200  pcre_study_data internal_study;  pcre_study_data internal_study;
5201  const pcre_study_data *study;  const pcre_study_data *study;
# Line 4314  if (re == NULL || subject == NULL || Line 5211  if (re == NULL || subject == NULL ||
5211     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5212  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5213    
5214    /* This information is for finding all the numbers associated with a given
5215    name, for condition testing. */
5216    
5217    md->name_table = (uschar *)re + re->name_table_offset;
5218    md->name_count = re->name_count;
5219    md->name_entry_size = re->name_entry_size;
5220    
5221  /* Fish out the optional data from the extra_data structure, first setting  /* Fish out the optional data from the extra_data structure, first setting
5222  the default values. */  the default values. */
5223    
# Line 4361  if (re->magic_number != MAGIC_NUMBER) Line 5265  if (re->magic_number != MAGIC_NUMBER)
5265  /* Set up other data */  /* Set up other data */
5266    
5267  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5268  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
5269  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
5270    
5271  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
# Line 4376  end_subject = md->end_subject; Line 5280  end_subject = md->end_subject;
5280    
5281  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5282  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5283    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5284    
5285  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
5286  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5287  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5288  md->partial = (options & PCRE_PARTIAL) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5289    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5290                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5291  md->hitend = FALSE;  md->hitend = FALSE;
5292    
5293  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
5294    
5295  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
5296  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
5297    
5298    /* Handle different \R options. */
5299    
5300    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5301      {
5302      case 0:
5303      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5304        md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5305      else
5306    #ifdef BSR_ANYCRLF
5307      md->bsr_anycrlf = TRUE;
5308    #else
5309      md->bsr_anycrlf = FALSE;
5310    #endif
5311      break;
5312    
5313      case PCRE_BSR_ANYCRLF:
5314      md->bsr_anycrlf = TRUE;
5315      break;
5316    
5317      case PCRE_BSR_UNICODE:
5318      md->bsr_anycrlf = FALSE;
5319      break;
5320    
5321      default: return PCRE_ERROR_BADNEWLINE;
5322      }
5323    
5324  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
5325  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
5326    
5327  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5328         PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5329    {    {
5330    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
5331    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5332    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5333    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5334         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5335    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5336    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5337    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 4429  else Line 5361  else
5361      }      }
5362    }    }
5363    
5364  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching was originally supported only for a restricted set of
5365  moment. */  regexes; from release 8.00 there are no restrictions, but the bits are still
5366    defined (though never set). So there's no harm in leaving this code. */
5367    
5368  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5369    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
5370    
5371  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 4441  back the character offset. */ Line 5374  back the character offset. */
5374  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5375  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5376    {    {
5377    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
5378      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
5379    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5380      {      {
5381      int tb = ((uschar *)subject)[start_offset];      int tb = ((USPTR)subject)[start_offset];
5382      if (tb > 127)      if (tb > 127)
5383        {        {
5384        tb &= 0xc0;        tb &= 0xc0;
# Line 4509  studied, there may be a bitmap of possib Line 5442  studied, there may be a bitmap of possib
5442    
5443  if (!anchored)  if (!anchored)
5444    {    {
5445    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
5446      {      {
5447      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
5448      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 4517  if (!anchored) Line 5450  if (!anchored)
5450      }      }
5451    else    else
5452      if (!startline && study != NULL &&      if (!startline && study != NULL &&
5453        (study->options & PCRE_STUDY_MAPPED) != 0)        (study->flags & PCRE_STUDY_MAPPED) != 0)
5454          start_bits = study->start_bits;          start_bits = study->start_bits;
5455    }    }
5456    
5457  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
5458  character" set. */  character" set. */
5459    
5460  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
5461    {    {
5462    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
5463    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 4540  the loop runs just once. */ Line 5473  the loop runs just once. */
5473  for(;;)  for(;;)
5474    {    {
5475    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
5476      USPTR new_start_match;
5477    
5478    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
5479    
# Line 4550  for(;;) Line 5484  for(;;)
5484      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
5485      }      }
5486    
5487    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* If firstline is TRUE, the start of the match is constrained to the first
5488    start of the match is constrained to the first line of a multiline string.    line of a multiline string. That is, the match must be before or at the first
5489    That is, the match must be before or at the first newline. Implement this by    newline. Implement this by temporarily adjusting end_subject so that we stop
5490    temporarily adjusting end_subject so that we stop scanning at a newline. If    scanning at a newline. If the match fails at the newline, later code breaks
5491    the match fails at the newline, later code breaks this loop. */    this loop. */
5492    
5493    if (firstline)    if (firstline)
5494      {      {
5495      USPTR t = start_match;      USPTR t = start_match;
5496    #ifdef SUPPORT_UTF8
5497        if (utf8)
5498          {
5499          while (t < md->end_subject && !IS_NEWLINE(t))
5500            {
5501            t++;
5502            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5503            }
5504          }
5505        else
5506    #endif
5507      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5508      end_subject = t;      end_subject = t;
5509      }      }
5510    
5511    /* Now test for a unique first byte */    /* There are some optimizations that avoid running the match if a known
5512      starting point is not found, or if a known later character is not present.
5513      However, there is an option that disables these, for testing and for ensuring
5514      that all callouts do actually occur. */
5515    
5516    if (first_byte >= 0)    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5517      {      {
5518      if (first_byte_caseless)      /* Advance to a unique first byte if there is one. */
       while (start_match < end_subject &&  
              md->lcc[*start_match] != first_byte)  
         start_match++;  
     else  
       while (start_match < end_subject && *start_match != first_byte)  
         start_match++;  
     }  
5519    
5520    /* Or to just after a linebreak for a multiline match if possible */      if (first_byte >= 0)
5521          {
5522          if (first_byte_caseless)
5523            while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5524              start_match++;
5525          else
5526            while (start_match < end_subject && *start_match != first_byte)
5527              start_match++;
5528          }
5529    
5530    else if (startline)      /* Or to just after a linebreak for a multiline match */
5531      {  
5532      if (start_match > md->start_subject + start_offset)      else if (startline)
5533        {        {
5534        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        if (start_match > md->start_subject + start_offset)
5535          start_match++;          {
5536    #ifdef SUPPORT_UTF8
5537            if (utf8)
5538              {
5539              while (start_match < end_subject && !WAS_NEWLINE(start_match))
5540                {
5541                start_match++;
5542                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5543                  start_match++;
5544                }
5545              }
5546            else
5547    #endif
5548            while (start_match < end_subject && !WAS_NEWLINE(start_match))
5549              start_match++;
5550    
5551            /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5552            and we are now at a LF, advance the match position by one more character.
5553            */
5554    
5555        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,          if (start_match[-1] == CHAR_CR &&
5556        and we are now at a LF, advance the match position by one more character.               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5557        */               start_match < end_subject &&
5558                 *start_match == CHAR_NL)
5559        if (start_match[-1] == '\r' &&            start_match++;
5560             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&          }
            start_match < end_subject &&  
            *start_match == '\n')  
         start_match++;  
5561        }        }
     }  
5562    
5563    /* Or to a non-unique first char after study */      /* Or to a non-unique first byte after study */
5564    
5565    else if (start_bits != NULL)      else if (start_bits != NULL)
     {