/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 164 by ph10, Fri May 4 15:11:14 2007 UTC revision 517 by ph10, Wed May 5 10:44:20 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 83  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 135  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 152  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 189  obtained from malloc() instead instead o Line 231  obtained from malloc() instead instead o
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234  The original heap-recursive code used longjmp(). However, it seems that this  The original heap-recursive code used longjmp(). However, it seems that this
235  can be very slow on some operating systems. Following a suggestion from Stan  can be very slow on some operating systems. Following a suggestion from Stan
236  Switzer, the use of longjmp() has been abolished, at the cost of having to  Switzer, the use of longjmp() has been abolished, at the cost of having to
237  provide a unique number for each call to RMATCH. There is no way of generating  provide a unique number for each call to RMATCH. There is no way of generating
# Line 198  them stand out more clearly. Line 240  them stand out more clearly.
240    
241  Crude tests on x86 Linux show a small speedup of around 5-8%. However, on  Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242  FreeBSD, avoiding longjmp() more than halves the time taken to run the standard  FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243  tests. Furthermore, not using longjmp() means that local dynamic variables  tests. Furthermore, not using longjmp() means that local dynamic variables
244  don't have indeterminate values; this has meant that the frame size can be  don't have indeterminate values; this has meant that the frame size can be
245  reduced because the result can be "passed back" by straight setting of the  reduced because the result can be "passed back" by straight setting of the
246  variable instead of being passed in the frame.  variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251  /* Numbers for RMATCH calls */  below must be updated in sync.  */
252    
253  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58 };
259    
260  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
261  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
262  actuall used in this definition. */  actually used in this definition. */
263    
264  #ifndef NO_RECURSE  #ifndef NO_RECURSE
265  #define REGISTER register  #define REGISTER register
266    
267  #ifdef DEBUG  #ifdef PCRE_DEBUG
268  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
269    { \    { \
270    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
271    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
272    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
273    }    }
274  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 236  actuall used in this definition. */ Line 278  actuall used in this definition. */
278    }    }
279  #else  #else
280  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
282  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
283  #endif  #endif
284    
# Line 255  argument of match(), which never changes Line 297  argument of match(), which never changes
297    frame->Xwhere = rw; \    frame->Xwhere = rw; \
298    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
299    newframe->Xecode = rb;\    newframe->Xecode = rb;\
300      newframe->Xmstart = mstart;\
301      newframe->Xmarkptr = markptr;\
302    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
303    newframe->Xims = re;\    newframe->Xims = re;\
304    newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
# Line 289  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function arguments that may change */    /* Function arguments that may change */
335    
336    const uschar *Xeptr;    USPTR Xeptr;
337    const uschar *Xecode;    const uschar *Xecode;
338      USPTR Xmstart;
339      USPTR Xmarkptr;
340    int Xoffset_top;    int Xoffset_top;
341    long int Xims;    long int Xims;
342    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 299  typedef struct heapframe { Line 345  typedef struct heapframe {
345    
346    /* Function local variables */    /* Function local variables */
347    
348    const uschar *Xcallpat;    USPTR Xcallpat;
349    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
350    const uschar *Xdata;    USPTR Xcharptr;
351    const uschar *Xnext;  #endif
352    const uschar *Xpp;    USPTR Xdata;
353    const uschar *Xprev;    USPTR Xnext;
354    const uschar *Xsaved_eptr;    USPTR Xpp;
355      USPTR Xprev;
356      USPTR Xsaved_eptr;
357    
358    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
359    
# Line 326  typedef struct heapframe { Line 374  typedef struct heapframe {
374    uschar Xocchars[8];    uschar Xocchars[8];
375  #endif  #endif
376    
377      int Xcodelink;
378    int Xctype;    int Xctype;
379    unsigned int Xfc;    unsigned int Xfc;
380    int Xfi;    int Xfi;
# Line 344  typedef struct heapframe { Line 393  typedef struct heapframe {
393    /* Where to jump back to */    /* Where to jump back to */
394    
395    int Xwhere;    int Xwhere;
396    
397  } heapframe;  } heapframe;
398    
399  #endif  #endif
# Line 361  typedef struct heapframe { Line 410  typedef struct heapframe {
410    
411  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
412  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
413  same response.  same response. */
414    
415    /* These macros pack up tests that are used for partial matching, and which
416    appears several times in the code. We set the "hit end" flag if the pointer is
417    at the end of the subject and also past the start of the subject (i.e.
418    something has been matched). For hard partial matching, we then return
419    immediately. The second one is used when we already know we are past the end of
420    the subject. */
421    
422    #define CHECK_PARTIAL()\
423      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
424        {\
425        md->hitend = TRUE;\
426        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
427        }
428    
429  Performance note: It might be tempting to extract commonly used fields from the  #define SCHECK_PARTIAL()\
430  md structure (e.g. utf8, end_subject) into individual variables to improve    if (md->partial != 0 && eptr > mstart)\
431        {\
432        md->hitend = TRUE;\
433        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
434        }
435    
436    
437    /* Performance note: It might be tempting to extract commonly used fields from
438    the md structure (e.g. utf8, end_subject) into individual variables to improve
439  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
440  made performance worse.  made performance worse.
441    
442  Arguments:  Arguments:
443     eptr        pointer to current character in subject     eptr        pointer to current character in subject
444     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
445       mstart      pointer to the current match start position (can be modified
446                     by encountering \K)
447       markptr     pointer to the most recent MARK name, or NULL
448     offset_top  current top pointer     offset_top  current top pointer
449     md          pointer to "static" info for the match     md          pointer to "static" info for the match
450     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 380  Arguments: Line 454  Arguments:
454                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
455                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
456                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
457     rdepth      the recursion depth     rdepth      the recursion depth
458    
459  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
460                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
461                   a negative MATCH_xxx value for PRUNE, SKIP, etc
462                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
463                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
464  */  */
465    
466  static int  static int
467  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
468    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
469    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
470  {  {
471  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
472  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 404  register unsigned int c;   /* Character Line 478  register unsigned int c;   /* Character
478  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
479    
480  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
481    int condcode;
482    
483  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
484  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 418  frame->Xprevframe = NULL;            /* Line 493  frame->Xprevframe = NULL;            /*
493    
494  frame->Xeptr = eptr;  frame->Xeptr = eptr;
495  frame->Xecode = ecode;  frame->Xecode = ecode;
496    frame->Xmstart = mstart;
497    frame->Xmarkptr = markptr;
498  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
499  frame->Xims = ims;  frame->Xims = ims;
500  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 432  HEAP_RECURSE: Line 509  HEAP_RECURSE:
509    
510  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
511  #define ecode              frame->Xecode  #define ecode              frame->Xecode
512    #define mstart             frame->Xmstart
513    #define markptr            frame->Xmarkptr
514  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
515  #define ims                frame->Xims  #define ims                frame->Xims
516  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 444  HEAP_RECURSE: Line 523  HEAP_RECURSE:
523  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
524  #endif  #endif
525  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
526    #define codelink           frame->Xcodelink
527  #define data               frame->Xdata  #define data               frame->Xdata
528  #define next               frame->Xnext  #define next               frame->Xnext
529  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 524  int oclength; Line 604  int oclength;
604  uschar occhars[8];  uschar occhars[8];
605  #endif  #endif
606    
607    int codelink;
608  int ctype;  int ctype;
609  int length;  int length;
610  int max;  int max;
# Line 557  TAIL_RECURSE: Line 638  TAIL_RECURSE:
638  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
639  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
640  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
641  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
642  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
643  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
644  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
# Line 580  original_ims = ims;    /* Save for reset Line 661  original_ims = ims;    /* Save for reset
661  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
662  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
663  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
664  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
665  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
666  already used. */  block that is used is on the stack, so a new one may be required for each
667    match(). */
668    
669  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
670    {    {
671    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
672    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
673      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
674    }    }
675    
676  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 605  for (;;) Line 680  for (;;)
680    minimize = possessive = FALSE;    minimize = possessive = FALSE;
681    op = *ecode;    op = *ecode;
682    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
683    switch(op)    switch(op)
684      {      {
685        case OP_MARK:
686        markptr = ecode + 2;
687        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
688          ims, eptrb, flags, RM55);
689    
690        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
691        argument, and we must check whether that argument matches this MARK's
692        argument. It is passed back in md->start_match_ptr (an overloading of that
693        variable). If it does match, we reset that variable to the current subject
694        position and return MATCH_SKIP. Otherwise, pass back the return code
695        unaltered. */
696    
697        if (rrc == MATCH_SKIP_ARG &&
698            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
699          {
700          md->start_match_ptr = eptr;
701          RRETURN(MATCH_SKIP);
702          }
703    
704        if (md->mark == NULL) md->mark = markptr;
705        RRETURN(rrc);
706    
707        case OP_FAIL:
708        MRRETURN(MATCH_NOMATCH);
709    
710        case OP_COMMIT:
711        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
712          ims, eptrb, flags, RM52);
713        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
714        MRRETURN(MATCH_COMMIT);
715    
716        case OP_PRUNE:
717        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718          ims, eptrb, flags, RM51);
719        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
720        MRRETURN(MATCH_PRUNE);
721    
722        case OP_PRUNE_ARG:
723        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
724          ims, eptrb, flags, RM56);
725        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
726        md->mark = ecode + 2;
727        RRETURN(MATCH_PRUNE);
728    
729        case OP_SKIP:
730        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
731          ims, eptrb, flags, RM53);
732        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
733        md->start_match_ptr = eptr;   /* Pass back current position */
734        MRRETURN(MATCH_SKIP);
735    
736        case OP_SKIP_ARG:
737        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
738          ims, eptrb, flags, RM57);
739        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
740    
741        /* Pass back the current skip name by overloading md->start_match_ptr and
742        returning the special MATCH_SKIP_ARG return code. This will either be
743        caught by a matching MARK, or get to the top, where it is treated the same
744        as PRUNE. */
745    
746        md->start_match_ptr = ecode + 2;
747        RRETURN(MATCH_SKIP_ARG);
748    
749        case OP_THEN:
750        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
751          ims, eptrb, flags, RM54);
752        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
753        MRRETURN(MATCH_THEN);
754    
755        case OP_THEN_ARG:
756        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757          ims, eptrb, flags, RM58);
758        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
759        md->mark = ecode + 2;
760        RRETURN(MATCH_THEN);
761    
762      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
763      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
764      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 634  for (;;) Line 778  for (;;)
778      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
779      offset = number << 1;      offset = number << 1;
780    
781  #ifdef DEBUG  #ifdef PCRE_DEBUG
782      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
783      printf("subject=");      printf("subject=");
784      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 656  for (;;) Line 800  for (;;)
800          {          {
801          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
802            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
803          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
804          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
805          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
806          }          }
# Line 668  for (;;) Line 812  for (;;)
812        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
813        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
814    
815          if (rrc != MATCH_THEN) md->mark = markptr;
816        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
817        }        }
818    
819      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
820      bracket. */      as a non-capturing bracket. */
821    
822        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
824    
825      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
826    
827        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
829    
830      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
831      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
832      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
833      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
834        is set.*/
835    
836      case OP_BRA:      case OP_BRA:
837      case OP_SBRA:      case OP_SBRA:
# Line 687  for (;;) Line 839  for (;;)
839      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
840      for (;;)      for (;;)
841        {        {
842        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
843          {          {
844          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
845          flags |= match_tail_recursed;            {
846          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
847          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
848              goto TAIL_RECURSE;
849              }
850    
851            /* Possibly empty group; can't use tail recursion. */
852    
853            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
854              eptrb, flags, RM48);
855            if (rrc == MATCH_NOMATCH) md->mark = markptr;
856            RRETURN(rrc);
857          }          }
858    
859        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 700  for (;;) Line 861  for (;;)
861    
862        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
863          eptrb, flags, RM2);          eptrb, flags, RM2);
864        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
865        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
866        }        }
867      /* Control never reaches here. */      /* Control never reaches here. */
# Line 713  for (;;) Line 874  for (;;)
874    
875      case OP_COND:      case OP_COND:
876      case OP_SCOND:      case OP_SCOND:
877      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
878    
879        /* Because of the way auto-callout works during compile, a callout item is
880        inserted between OP_COND and an assertion condition. */
881    
882        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
883        {        {
884        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
885        condition = md->recursive != NULL &&          {
886          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
887        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
888            cb.callout_number   = ecode[LINK_SIZE+2];
889            cb.offset_vector    = md->offset_vector;
890            cb.subject          = (PCRE_SPTR)md->start_subject;
891            cb.subject_length   = md->end_subject - md->start_subject;
892            cb.start_match      = mstart - md->start_subject;
893            cb.current_position = eptr - md->start_subject;
894            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
895            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
896            cb.capture_top      = offset_top/2;
897            cb.capture_last     = md->capture_last;
898            cb.callout_data     = md->callout_data;
899            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
900            if (rrc < 0) RRETURN(rrc);
901            }
902          ecode += _pcre_OP_lengths[OP_CALLOUT];
903          }
904    
905        condcode = ecode[LINK_SIZE+1];
906    
907        /* Now see what the actual condition is */
908    
909        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
910          {
911          if (md->recursive == NULL)                /* Not recursing => FALSE */
912            {
913            condition = FALSE;
914            ecode += GET(ecode, 1);
915            }
916          else
917            {
918            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
919            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
920    
921            /* If the test is for recursion into a specific subpattern, and it is
922            false, but the test was set up by name, scan the table to see if the
923            name refers to any other numbers, and test them. The condition is true
924            if any one is set. */
925    
926            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
927              {
928              uschar *slotA = md->name_table;
929              for (i = 0; i < md->name_count; i++)
930                {
931                if (GET2(slotA, 0) == recno) break;
932                slotA += md->name_entry_size;
933                }
934    
935              /* Found a name for the number - there can be only one; duplicate
936              names for different numbers are allowed, but not vice versa. First
937              scan down for duplicates. */
938    
939              if (i < md->name_count)
940                {
941                uschar *slotB = slotA;
942                while (slotB > md->name_table)
943                  {
944                  slotB -= md->name_entry_size;
945                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
946                    {
947                    condition = GET2(slotB, 0) == md->recursive->group_num;
948                    if (condition) break;
949                    }
950                  else break;
951                  }
952    
953                /* Scan up for duplicates */
954    
955                if (!condition)
956                  {
957                  slotB = slotA;
958                  for (i++; i < md->name_count; i++)
959                    {
960                    slotB += md->name_entry_size;
961                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
962                      {
963                      condition = GET2(slotB, 0) == md->recursive->group_num;
964                      if (condition) break;
965                      }
966                    else break;
967                    }
968                  }
969                }
970              }
971    
972            /* Chose branch according to the condition */
973    
974            ecode += condition? 3 : GET(ecode, 1);
975            }
976        }        }
977    
978      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
979        {        {
980        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
981        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
982    
983          /* If the numbered capture is unset, but the reference was by name,
984          scan the table to see if the name refers to any other numbers, and test
985          them. The condition is true if any one is set. This is tediously similar
986          to the code above, but not close enough to try to amalgamate. */
987    
988          if (!condition && condcode == OP_NCREF)
989            {
990            int refno = offset >> 1;
991            uschar *slotA = md->name_table;
992    
993            for (i = 0; i < md->name_count; i++)
994              {
995              if (GET2(slotA, 0) == refno) break;
996              slotA += md->name_entry_size;
997              }
998    
999            /* Found a name for the number - there can be only one; duplicate names
1000            for different numbers are allowed, but not vice versa. First scan down
1001            for duplicates. */
1002    
1003            if (i < md->name_count)
1004              {
1005              uschar *slotB = slotA;
1006              while (slotB > md->name_table)
1007                {
1008                slotB -= md->name_entry_size;
1009                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1010                  {
1011                  offset = GET2(slotB, 0) << 1;
1012                  condition = offset < offset_top &&
1013                    md->offset_vector[offset] >= 0;
1014                  if (condition) break;
1015                  }
1016                else break;
1017                }
1018    
1019              /* Scan up for duplicates */
1020    
1021              if (!condition)
1022                {
1023                slotB = slotA;
1024                for (i++; i < md->name_count; i++)
1025                  {
1026                  slotB += md->name_entry_size;
1027                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1028                    {
1029                    offset = GET2(slotB, 0) << 1;
1030                    condition = offset < offset_top &&
1031                      md->offset_vector[offset] >= 0;
1032                    if (condition) break;
1033                    }
1034                  else break;
1035                  }
1036                }
1037              }
1038            }
1039    
1040          /* Chose branch according to the condition */
1041    
1042        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1043        }        }
1044    
1045      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1046        {        {
1047        condition = FALSE;        condition = FALSE;
1048        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 748  for (;;) Line 1062  for (;;)
1062          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1063          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1064          }          }
1065        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1066          {          {
1067          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1068          }          }
1069        else        else
1070          {          {
1071          condition = FALSE;          condition = FALSE;
1072          ecode += GET(ecode, 1);          ecode += codelink;
1073          }          }
1074        }        }
1075    
1076      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1077      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1078      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1079        group. If the second alternative doesn't exist, we can just plough on. */
1080    
1081      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1082        {        {
1083        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1084        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1085        goto TAIL_RECURSE;          {
1086            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1087            RRETURN(rrc);
1088            }
1089          else                       /* Group must match something */
1090            {
1091            flags = 0;
1092            goto TAIL_RECURSE;
1093            }
1094        }        }
1095      else      else                         /* Condition false & no alternative */
1096        {        {
1097        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1098        }        }
1099      break;      break;
1100    
1101    
1102      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1103      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1104    
1105        case OP_CLOSE:
1106        number = GET2(ecode, 1);
1107        offset = number << 1;
1108    
1109    #ifdef PCRE_DEBUG
1110          printf("end bracket %d at *ACCEPT", number);
1111          printf("\n");
1112    #endif
1113    
1114        md->capture_last = number;
1115        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1116          {
1117          md->offset_vector[offset] =
1118            md->offset_vector[md->offset_end - number];
1119          md->offset_vector[offset+1] = eptr - md->start_subject;
1120          if (offset_top <= offset) offset_top = offset + 2;
1121          }
1122        ecode += 3;
1123        break;
1124    
1125    
1126        /* End of the pattern, either real or forced. If we are in a top-level
1127        recursion, we should restore the offsets appropriately and continue from
1128        after the call. */
1129    
1130        case OP_ACCEPT:
1131      case OP_END:      case OP_END:
1132      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1133        {        {
# Line 787  for (;;) Line 1136  for (;;)
1136        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1137        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1138          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1139        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1140        ims = original_ims;        ims = original_ims;
1141        ecode = rec->after_call;        ecode = rec->after_call;
1142        break;        break;
1143        }        }
1144    
1145      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1146      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1147        the subject. In both cases, backtracking will then try other alternatives,
1148        if any. */
1149    
1150        if (eptr == mstart &&
1151            (md->notempty ||
1152              (md->notempty_atstart &&
1153                mstart == md->start_subject + md->start_offset)))
1154          MRRETURN(MATCH_NOMATCH);
1155    
1156        /* Otherwise, we have a match. */
1157    
1158        md->end_match_ptr = eptr;           /* Record where we ended */
1159        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1160        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1161    
1162      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      /* For some reason, the macros don't work properly if an expression is
1163      md->end_match_ptr = eptr;          /* Record where we ended */      given as the argument to MRRETURN when the heap is in use. */
1164      md->end_offset_top = offset_top;   /* and how many extracts were taken */  
1165      RRETURN(MATCH_MATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1166        MRRETURN(rrc);
1167    
1168      /* Change option settings */      /* Change option settings */
1169    
# Line 821  for (;;) Line 1185  for (;;)
1185        {        {
1186        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1187          RM4);          RM4);
1188        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1189        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1190            mstart = md->start_match_ptr;   /* In case \K reset it */
1191            break;
1192            }
1193          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1194        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1195        }        }
1196      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1197      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1198    
1199      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1200    
# Line 840  for (;;) Line 1208  for (;;)
1208      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1209      continue;      continue;
1210    
1211      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1212        PRUNE, or COMMIT means we must assume failure without checking subsequent
1213        branches. */
1214    
1215      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1216      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 848  for (;;) Line 1218  for (;;)
1218        {        {
1219        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1220          RM5);          RM5);
1221        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1222        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1223            {
1224            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1225            break;
1226            }
1227          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1228        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1229        }        }
1230      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 872  for (;;) Line 1247  for (;;)
1247        while (i-- > 0)        while (i-- > 0)
1248          {          {
1249          eptr--;          eptr--;
1250          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1251          BACKCHAR(eptr)          BACKCHAR(eptr);
1252          }          }
1253        }        }
1254      else      else
# Line 883  for (;;) Line 1258  for (;;)
1258    
1259        {        {
1260        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1261        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1262        }        }
1263    
1264      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1265    
1266        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1267      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1268      break;      break;
1269    
# Line 904  for (;;) Line 1280  for (;;)
1280        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1281        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1282        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1283        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1284        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1285        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1286        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1287        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1288        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1289        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1290        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1291        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1292        }        }
1293      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 966  for (;;) Line 1342  for (;;)
1342    
1343        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1344              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1345        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1346    
1347        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1348        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 978  for (;;) Line 1353  for (;;)
1353          {          {
1354          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1355            md, ims, eptrb, flags, RM6);            md, ims, eptrb, flags, RM6);
1356          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1357            {            {
1358            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1359            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1360            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1361              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1362            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1363            }            }
1364          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1365            {            {
1366            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1367              if (new_recursive.offset_save != stacksave)
1368                (pcre_free)(new_recursive.offset_save);
1369            RRETURN(rrc);            RRETURN(rrc);
1370            }            }
1371    
# Line 1003  for (;;) Line 1380  for (;;)
1380        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1381        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1382          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1383        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1384        }        }
1385      /* Control never reaches here */      /* Control never reaches here */
1386    
# Line 1012  for (;;) Line 1389  for (;;)
1389      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1390      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1391      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1392      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1393        the start-of-match value in case it was changed by \K. */
1394    
1395      case OP_ONCE:      case OP_ONCE:
1396      prev = ecode;      prev = ecode;
# Line 1020  for (;;) Line 1398  for (;;)
1398    
1399      do      do
1400        {        {
1401        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1402          eptrb, 0, RM7);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1403        if (rrc == MATCH_MATCH) break;          {
1404        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1405            break;
1406            }
1407          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1408        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1409        }        }
1410      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1066  for (;;) Line 1447  for (;;)
1447    
1448      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1449        {        {
1450        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
         RM8);  
1451        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1452        ecode = prev;        ecode = prev;
1453        flags = match_tail_recursed;        flags = 0;
1454        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1455        }        }
1456      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1078  for (;;) Line 1458  for (;;)
1458        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1459        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1460        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1461        flags = match_tail_recursed;        flags = 0;
1462        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1463        }        }
1464      /* Control never gets here */      /* Control never gets here */
# Line 1090  for (;;) Line 1470  for (;;)
1470      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1471      break;      break;
1472    
1473      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1474      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1475      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1476      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1477      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1478    
1479      case OP_BRAZERO:      case OP_BRAZERO:
1480        {        {
# Line 1116  for (;;) Line 1496  for (;;)
1496        }        }
1497      break;      break;
1498    
1499        case OP_SKIPZERO:
1500          {
1501          next = ecode+1;
1502          do next += GET(next,1); while (*next == OP_ALT);
1503          ecode = next + 1 + LINK_SIZE;
1504          }
1505        break;
1506    
1507      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1508    
1509      case OP_KET:      case OP_KET:
# Line 1134  for (;;) Line 1522  for (;;)
1522        }        }
1523      else saved_eptr = NULL;      else saved_eptr = NULL;
1524    
1525      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1526      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1527      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1528        it was changed by \K. */
1529    
1530      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1531          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1144  for (;;) Line 1533  for (;;)
1533        {        {
1534        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1535        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1536        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1537          MRRETURN(MATCH_MATCH);
1538        }        }
1539    
1540      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1158  for (;;) Line 1548  for (;;)
1548        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1549        offset = number << 1;        offset = number << 1;
1550    
1551  #ifdef DEBUG  #ifdef PCRE_DEBUG
1552        printf("end bracket %d", number);        printf("end bracket %d", number);
1553        printf("\n");        printf("\n");
1554  #endif  #endif
# Line 1180  for (;;) Line 1570  for (;;)
1570          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1571          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1572          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         md->start_match = rec->save_start;  
1573          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1574            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1575            offset_top = rec->save_offset_top;
1576          ecode = rec->after_call;          ecode = rec->after_call;
1577          ims = original_ims;          ims = original_ims;
1578          break;          break;
# Line 1209  for (;;) Line 1599  for (;;)
1599    
1600      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1601      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1602      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1603        unlimited repeat of a group that can match an empty string. */
1604    
1605      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1606    
1607      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1608        {        {
1609        RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
         RM12);  
1610        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1611          if (flags != 0)    /* Could match an empty string */
1612            {
1613            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1614            RRETURN(rrc);
1615            }
1616        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1617        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1618        }        }
1619      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1227  for (;;) Line 1621  for (;;)
1621        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1622        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1623        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1624        flags = match_tail_recursed;        flags = 0;
1625        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1626        }        }
1627      /* Control never gets here */      /* Control never gets here */
# Line 1235  for (;;) Line 1629  for (;;)
1629      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1630    
1631      case OP_CIRC:      case OP_CIRC:
1632      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1633      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1634        {        {
1635        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1636            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1637          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1638        ecode++;        ecode++;
1639        break;        break;
1640        }        }
# Line 1249  for (;;) Line 1643  for (;;)
1643      /* Start of subject assertion */      /* Start of subject assertion */
1644    
1645      case OP_SOD:      case OP_SOD:
1646      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1647      ecode++;      ecode++;
1648      break;      break;
1649    
1650      /* Start of match assertion */      /* Start of match assertion */
1651    
1652      case OP_SOM:      case OP_SOM:
1653      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1654        ecode++;
1655        break;
1656    
1657        /* Reset the start of match point */
1658    
1659        case OP_SET_SOM:
1660        mstart = eptr;
1661      ecode++;      ecode++;
1662      break;      break;
1663    
# Line 1267  for (;;) Line 1668  for (;;)
1668      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1669        {        {
1670        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1671          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1672        else        else
1673          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1674        ecode++;        ecode++;
1675        break;        break;
1676        }        }
1677      else      else
1678        {        {
1679        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1680        if (!md->endonly)        if (!md->endonly)
1681          {          {
1682          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1683              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1684            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1685          ecode++;          ecode++;
1686          break;          break;
1687          }          }
# Line 1290  for (;;) Line 1691  for (;;)
1691      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1692    
1693      case OP_EOD:      case OP_EOD:
1694      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1695      ecode++;      ecode++;
1696      break;      break;
1697    
# Line 1299  for (;;) Line 1700  for (;;)
1700      case OP_EODN:      case OP_EODN:
1701      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1702          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1703        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1704      ecode++;      ecode++;
1705      break;      break;
1706    
# Line 1311  for (;;) Line 1712  for (;;)
1712    
1713        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1714        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1715        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1716          partial matching. */
1717    
1718  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1719        if (utf8)        if (utf8)
1720          {          {
1721          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1722            {            {
1723            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1724            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1725              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1726            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1727            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1728            }            }
1729          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1730              {
1731              SCHECK_PARTIAL();
1732              cur_is_word = FALSE;
1733              }
1734            else
1735            {            {
1736            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1737            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1332  for (;;) Line 1740  for (;;)
1740        else        else
1741  #endif  #endif
1742    
1743        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1744    
1745          {          {
1746          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1747            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1748          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1749            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1750              }
1751            if (eptr >= md->end_subject)
1752              {
1753              SCHECK_PARTIAL();
1754              cur_is_word = FALSE;
1755              }
1756            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1757          }          }
1758    
1759        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1760    
1761        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1762             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1763          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1764        }        }
1765      break;      break;
1766    
1767      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1768    
1769      case OP_ANY:      case OP_ANY:
1770      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1771        /* Fall through */
1772    
1773        case OP_ALLANY:
1774        if (eptr++ >= md->end_subject)
1775        {        {
1776        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1777          MRRETURN(MATCH_NOMATCH);
1778        }        }
1779      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1780      ecode++;      ecode++;
1781      break;      break;
1782    
# Line 1366  for (;;) Line 1784  for (;;)
1784      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1785    
1786      case OP_ANYBYTE:      case OP_ANYBYTE:
1787      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1788          {
1789          SCHECK_PARTIAL();
1790          MRRETURN(MATCH_NOMATCH);
1791          }
1792      ecode++;      ecode++;
1793      break;      break;
1794    
1795      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1796      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1797          {
1798          SCHECK_PARTIAL();
1799          MRRETURN(MATCH_NOMATCH);
1800          }
1801      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1802      if (      if (
1803  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1379  for (;;) Line 1805  for (;;)
1805  #endif  #endif
1806         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1807         )         )
1808        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1809      ecode++;      ecode++;
1810      break;      break;
1811    
1812      case OP_DIGIT:      case OP_DIGIT:
1813      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1814          {
1815          SCHECK_PARTIAL();
1816          MRRETURN(MATCH_NOMATCH);
1817          }
1818      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1819      if (      if (
1820  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1392  for (;;) Line 1822  for (;;)
1822  #endif  #endif
1823         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1824         )         )
1825        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1826      ecode++;      ecode++;
1827      break;      break;
1828    
1829      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1830      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1831          {
1832          SCHECK_PARTIAL();
1833          MRRETURN(MATCH_NOMATCH);
1834          }
1835      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1836      if (      if (
1837  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1405  for (;;) Line 1839  for (;;)
1839  #endif  #endif
1840         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1841         )         )
1842        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1843      ecode++;      ecode++;
1844      break;      break;
1845    
1846      case OP_WHITESPACE:      case OP_WHITESPACE:
1847      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1848          {
1849          SCHECK_PARTIAL();
1850          MRRETURN(MATCH_NOMATCH);
1851          }
1852      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1853      if (      if (
1854  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1418  for (;;) Line 1856  for (;;)
1856  #endif  #endif
1857         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1858         )         )
1859        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1860      ecode++;      ecode++;
1861      break;      break;
1862    
1863      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1864      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1865          {
1866          SCHECK_PARTIAL();
1867          MRRETURN(MATCH_NOMATCH);
1868          }
1869      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1870      if (      if (
1871  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1431  for (;;) Line 1873  for (;;)
1873  #endif  #endif
1874         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1875         )         )
1876        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1877      ecode++;      ecode++;
1878      break;      break;
1879    
1880      case OP_WORDCHAR:      case OP_WORDCHAR:
1881      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1882          {
1883          SCHECK_PARTIAL();
1884          MRRETURN(MATCH_NOMATCH);
1885          }
1886      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1887      if (      if (
1888  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1444  for (;;) Line 1890  for (;;)
1890  #endif  #endif
1891         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1892         )         )
1893        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1894      ecode++;      ecode++;
1895      break;      break;
1896    
1897      case OP_ANYNL:      case OP_ANYNL:
1898      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1899          {
1900          SCHECK_PARTIAL();
1901          MRRETURN(MATCH_NOMATCH);
1902          }
1903      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1904      switch(c)      switch(c)
1905        {        {
1906        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1907        case 0x000d:        case 0x000d:
1908        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1909        break;        break;
1910    
1911        case 0x000a:        case 0x000a:
1912          break;
1913    
1914        case 0x000b:        case 0x000b:
1915        case 0x000c:        case 0x000c:
1916        case 0x0085:        case 0x0085:
1917        case 0x2028:        case 0x2028:
1918        case 0x2029:        case 0x2029:
1919          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1920          break;
1921          }
1922        ecode++;
1923        break;
1924    
1925        case OP_NOT_HSPACE:
1926        if (eptr >= md->end_subject)
1927          {
1928          SCHECK_PARTIAL();
1929          MRRETURN(MATCH_NOMATCH);
1930          }
1931        GETCHARINCTEST(c, eptr);
1932        switch(c)
1933          {
1934          default: break;
1935          case 0x09:      /* HT */
1936          case 0x20:      /* SPACE */
1937          case 0xa0:      /* NBSP */
1938          case 0x1680:    /* OGHAM SPACE MARK */
1939          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1940          case 0x2000:    /* EN QUAD */
1941          case 0x2001:    /* EM QUAD */
1942          case 0x2002:    /* EN SPACE */
1943          case 0x2003:    /* EM SPACE */
1944          case 0x2004:    /* THREE-PER-EM SPACE */
1945          case 0x2005:    /* FOUR-PER-EM SPACE */
1946          case 0x2006:    /* SIX-PER-EM SPACE */
1947          case 0x2007:    /* FIGURE SPACE */
1948          case 0x2008:    /* PUNCTUATION SPACE */
1949          case 0x2009:    /* THIN SPACE */
1950          case 0x200A:    /* HAIR SPACE */
1951          case 0x202f:    /* NARROW NO-BREAK SPACE */
1952          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1953          case 0x3000:    /* IDEOGRAPHIC SPACE */
1954          MRRETURN(MATCH_NOMATCH);
1955          }
1956        ecode++;
1957        break;
1958    
1959        case OP_HSPACE:
1960        if (eptr >= md->end_subject)
1961          {
1962          SCHECK_PARTIAL();
1963          MRRETURN(MATCH_NOMATCH);
1964          }
1965        GETCHARINCTEST(c, eptr);
1966        switch(c)
1967          {
1968          default: MRRETURN(MATCH_NOMATCH);
1969          case 0x09:      /* HT */
1970          case 0x20:      /* SPACE */
1971          case 0xa0:      /* NBSP */
1972          case 0x1680:    /* OGHAM SPACE MARK */
1973          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1974          case 0x2000:    /* EN QUAD */
1975          case 0x2001:    /* EM QUAD */
1976          case 0x2002:    /* EN SPACE */
1977          case 0x2003:    /* EM SPACE */
1978          case 0x2004:    /* THREE-PER-EM SPACE */
1979          case 0x2005:    /* FOUR-PER-EM SPACE */
1980          case 0x2006:    /* SIX-PER-EM SPACE */
1981          case 0x2007:    /* FIGURE SPACE */
1982          case 0x2008:    /* PUNCTUATION SPACE */
1983          case 0x2009:    /* THIN SPACE */
1984          case 0x200A:    /* HAIR SPACE */
1985          case 0x202f:    /* NARROW NO-BREAK SPACE */
1986          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1987          case 0x3000:    /* IDEOGRAPHIC SPACE */
1988          break;
1989          }
1990        ecode++;
1991        break;
1992    
1993        case OP_NOT_VSPACE:
1994        if (eptr >= md->end_subject)
1995          {
1996          SCHECK_PARTIAL();
1997          MRRETURN(MATCH_NOMATCH);
1998          }
1999        GETCHARINCTEST(c, eptr);
2000        switch(c)
2001          {
2002          default: break;
2003          case 0x0a:      /* LF */
2004          case 0x0b:      /* VT */
2005          case 0x0c:      /* FF */
2006          case 0x0d:      /* CR */
2007          case 0x85:      /* NEL */
2008          case 0x2028:    /* LINE SEPARATOR */
2009          case 0x2029:    /* PARAGRAPH SEPARATOR */
2010          MRRETURN(MATCH_NOMATCH);
2011          }
2012        ecode++;
2013        break;
2014    
2015        case OP_VSPACE:
2016        if (eptr >= md->end_subject)
2017          {
2018          SCHECK_PARTIAL();
2019          MRRETURN(MATCH_NOMATCH);
2020          }
2021        GETCHARINCTEST(c, eptr);
2022        switch(c)
2023          {
2024          default: MRRETURN(MATCH_NOMATCH);
2025          case 0x0a:      /* LF */
2026          case 0x0b:      /* VT */
2027          case 0x0c:      /* FF */
2028          case 0x0d:      /* CR */
2029          case 0x85:      /* NEL */
2030          case 0x2028:    /* LINE SEPARATOR */
2031          case 0x2029:    /* PARAGRAPH SEPARATOR */
2032        break;        break;
2033        }        }
2034      ecode++;      ecode++;
# Line 1474  for (;;) Line 2040  for (;;)
2040    
2041      case OP_PROP:      case OP_PROP:
2042      case OP_NOTPROP:      case OP_NOTPROP:
2043      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2044          {
2045          SCHECK_PARTIAL();
2046          MRRETURN(MATCH_NOMATCH);
2047          }
2048      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2049        {        {
2050        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2051    
2052        switch(ecode[1])        switch(ecode[1])
2053          {          {
2054          case PT_ANY:          case PT_ANY:
2055          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2056          break;          break;
2057    
2058          case PT_LAMP:          case PT_LAMP:
2059          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2060               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2061               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2062            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2063           break;          break;
2064    
2065          case PT_GC:          case PT_GC:
2066          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2067            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2068          break;          break;
2069    
2070          case PT_PC:          case PT_PC:
2071          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2072            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2073          break;          break;
2074    
2075          case PT_SC:          case PT_SC:
2076          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2077            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2078          break;          break;
2079    
2080            /* These are specials */
2081    
2082            case PT_ALNUM:
2083            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2084                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2085              MRRETURN(MATCH_NOMATCH);
2086            break;
2087    
2088            case PT_SPACE:    /* Perl space */
2089            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2090                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2091                   == (op == OP_NOTPROP))
2092              MRRETURN(MATCH_NOMATCH);
2093            break;
2094    
2095            case PT_PXSPACE:  /* POSIX space */
2096            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2097                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2098                 c == CHAR_FF || c == CHAR_CR)
2099                   == (op == OP_NOTPROP))
2100              MRRETURN(MATCH_NOMATCH);
2101            break;
2102    
2103            case PT_WORD:
2104            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2105                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2106                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2107              MRRETURN(MATCH_NOMATCH);
2108            break;
2109    
2110            /* This should never occur */
2111    
2112          default:          default:
2113          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
# Line 1520  for (;;) Line 2121  for (;;)
2121      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2122    
2123      case OP_EXTUNI:      case OP_EXTUNI:
2124      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2125          {
2126          SCHECK_PARTIAL();
2127          MRRETURN(MATCH_NOMATCH);
2128          }
2129      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2130        {        {
2131        int chartype, script;        int category = UCD_CATEGORY(c);
2132        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2133        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2134          {          {
2135          int len = 1;          int len = 1;
# Line 1533  for (;;) Line 2137  for (;;)
2137            {            {
2138            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2139            }            }
2140          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2141          if (category != ucp_M) break;          if (category != ucp_M) break;
2142          eptr += len;          eptr += len;
2143          }          }
# Line 1554  for (;;) Line 2158  for (;;)
2158      case OP_REF:      case OP_REF:
2159        {        {
2160        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2161        ecode += 3;                                 /* Advance past item */        ecode += 3;
2162    
2163          /* If the reference is unset, there are two possibilities:
2164    
2165        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
2166        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
2167        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
2168        minima. */        quantifiers with zero minima.
2169    
2170        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
2171          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
2172          md->offset_vector[offset+1] - md->offset_vector[offset];  
2173          Otherwise, set the length to the length of what was matched by the
2174          referenced subpattern. */
2175    
2176          if (offset >= offset_top || md->offset_vector[offset] < 0)
2177            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2178          else
2179            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2180    
2181        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2182    
# Line 1592  for (;;) Line 2205  for (;;)
2205          break;          break;
2206    
2207          default:               /* No repeat follows */          default:               /* No repeat follows */
2208          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2209              {
2210              CHECK_PARTIAL();
2211              MRRETURN(MATCH_NOMATCH);
2212              }
2213          eptr += length;          eptr += length;
2214          continue;              /* With the main loop */          continue;              /* With the main loop */
2215          }          }
# Line 1608  for (;;) Line 2225  for (;;)
2225    
2226        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2227          {          {
2228          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2229              {
2230              CHECK_PARTIAL();
2231              MRRETURN(MATCH_NOMATCH);
2232              }
2233          eptr += length;          eptr += length;
2234          }          }
2235    
# Line 1625  for (;;) Line 2246  for (;;)
2246            {            {
2247            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2248            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2249            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2250              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2251                {
2252                CHECK_PARTIAL();
2253                MRRETURN(MATCH_NOMATCH);
2254                }
2255            eptr += length;            eptr += length;
2256            }            }
2257          /* Control never gets here */          /* Control never gets here */
# Line 1639  for (;;) Line 2264  for (;;)
2264          pp = eptr;          pp = eptr;
2265          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2266            {            {
2267            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2268                {
2269                CHECK_PARTIAL();
2270                break;
2271                }
2272            eptr += length;            eptr += length;
2273            }            }
2274          while (eptr >= pp)          while (eptr >= pp)
# Line 1648  for (;;) Line 2277  for (;;)
2277            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2278            eptr -= length;            eptr -= length;
2279            }            }
2280          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2281          }          }
2282        }        }
2283      /* Control never gets here */      /* Control never gets here */
2284    
   
   
2285      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2286      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2287      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1709  for (;;) Line 2336  for (;;)
2336          {          {
2337          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2338            {            {
2339            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2340                {
2341                SCHECK_PARTIAL();
2342                MRRETURN(MATCH_NOMATCH);
2343                }
2344            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2345            if (c > 255)            if (c > 255)
2346              {              {
2347              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2348              }              }
2349            else            else
2350              {              {
2351              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2352              }              }
2353            }            }
2354          }          }
# Line 1727  for (;;) Line 2358  for (;;)
2358          {          {
2359          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2360            {            {
2361            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2362                {
2363                SCHECK_PARTIAL();
2364                MRRETURN(MATCH_NOMATCH);
2365                }
2366            c = *eptr++;            c = *eptr++;
2367            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2368            }            }
2369          }          }
2370    
# Line 1751  for (;;) Line 2386  for (;;)
2386              {              {
2387              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2388              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2389              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2390                if (eptr >= md->end_subject)
2391                  {
2392                  SCHECK_PARTIAL();
2393                  MRRETURN(MATCH_NOMATCH);
2394                  }
2395              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2396              if (c > 255)              if (c > 255)
2397                {                {
2398                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2399                }                }
2400              else              else
2401                {                {
2402                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2403                }                }
2404              }              }
2405            }            }
# Line 1771  for (;;) Line 2411  for (;;)
2411              {              {
2412              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2413              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2414              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2415                if (eptr >= md->end_subject)
2416                  {
2417                  SCHECK_PARTIAL();
2418                  MRRETURN(MATCH_NOMATCH);
2419                  }
2420              c = *eptr++;              c = *eptr++;
2421              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2422              }              }
2423            }            }
2424          /* Control never gets here */          /* Control never gets here */
# Line 1792  for (;;) Line 2437  for (;;)
2437            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2438              {              {
2439              int len = 1;              int len = 1;
2440              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2441                  {
2442                  SCHECK_PARTIAL();
2443                  break;
2444                  }
2445              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2446              if (c > 255)              if (c > 255)
2447                {                {
# Line 1818  for (;;) Line 2467  for (;;)
2467            {            {
2468            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2469              {              {
2470              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2471                  {
2472                  SCHECK_PARTIAL();
2473                  break;
2474                  }
2475              c = *eptr;              c = *eptr;
2476              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2477              eptr++;              eptr++;
# Line 1831  for (;;) Line 2484  for (;;)
2484              }              }
2485            }            }
2486    
2487          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2488          }          }
2489        }        }
2490      /* Control never gets here */      /* Control never gets here */
2491    
2492    
2493      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2494      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2495        mode, because Unicode properties are supported in non-UTF-8 mode. */
2496    
2497  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2498      case OP_XCLASS:      case OP_XCLASS:
# Line 1879  for (;;) Line 2533  for (;;)
2533    
2534        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2535          {          {
2536          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2537          GETCHARINC(c, eptr);            {
2538          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2539              MRRETURN(MATCH_NOMATCH);
2540              }
2541            GETCHARINCTEST(c, eptr);
2542            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2543          }          }
2544    
2545        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1898  for (;;) Line 2556  for (;;)
2556            {            {
2557            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2558            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2559            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2560            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2561            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2562                SCHECK_PARTIAL();
2563                MRRETURN(MATCH_NOMATCH);
2564                }
2565              GETCHARINCTEST(c, eptr);
2566              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2567            }            }
2568          /* Control never gets here */          /* Control never gets here */
2569          }          }
# Line 1913  for (;;) Line 2576  for (;;)
2576          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2577            {            {
2578            int len = 1;            int len = 1;
2579            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2580            GETCHARLEN(c, eptr, len);              {
2581                SCHECK_PARTIAL();
2582                break;
2583                }
2584              GETCHARLENTEST(c, eptr, len);
2585            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2586            eptr += len;            eptr += len;
2587            }            }
# Line 1923  for (;;) Line 2590  for (;;)
2590            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2591            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2592            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2593            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2594            }            }
2595          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2596          }          }
2597    
2598        /* Control never gets here */        /* Control never gets here */
# Line 1941  for (;;) Line 2608  for (;;)
2608        length = 1;        length = 1;
2609        ecode++;        ecode++;
2610        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2611        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2612        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2613            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2614            MRRETURN(MATCH_NOMATCH);
2615            }
2616          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2617        }        }
2618      else      else
2619  #endif  #endif
2620    
2621      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2622        {        {
2623        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2624        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2625            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2626            MRRETURN(MATCH_NOMATCH);
2627            }
2628          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2629        ecode += 2;        ecode += 2;
2630        }        }
2631      break;      break;
# Line 1965  for (;;) Line 2640  for (;;)
2640        ecode++;        ecode++;
2641        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2642    
2643        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2644            {
2645            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2646            MRRETURN(MATCH_NOMATCH);
2647            }
2648    
2649        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2650        can use the fast lookup table. */        can use the fast lookup table. */
2651    
2652        if (fc < 128)        if (fc < 128)
2653          {          {
2654          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2655          }          }
2656    
2657        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 1989  for (;;) Line 2668  for (;;)
2668          if (fc != dc)          if (fc != dc)
2669            {            {
2670  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2671            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2672  #endif  #endif
2673              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2674            }            }
2675          }          }
2676        }        }
# Line 2000  for (;;) Line 2679  for (;;)
2679    
2680      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2681        {        {
2682        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2683        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2684            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2685            MRRETURN(MATCH_NOMATCH);
2686            }
2687          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2688        ecode += 2;        ecode += 2;
2689        }        }
2690      break;      break;
# Line 2054  for (;;) Line 2737  for (;;)
2737      case OP_MINQUERY:      case OP_MINQUERY:
2738      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2739      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2740    
2741      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2742      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2743      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2744    
2745      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2746    
2747      REPEATCHAR:      REPEATCHAR:
2748  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2069  for (;;) Line 2751  for (;;)
2751        length = 1;        length = 1;
2752        charptr = ecode;        charptr = ecode;
2753        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2754        ecode += length;        ecode += length;
2755    
2756        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2080  for (;;) Line 2761  for (;;)
2761  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2762          unsigned int othercase;          unsigned int othercase;
2763          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2764              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2765            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2766          else oclength = 0;          else oclength = 0;
2767  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2768    
2769          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2770            {            {
2771            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2772                memcmp(eptr, charptr, length) == 0) eptr += length;
2773  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2774            /* Need braces because of following else */            else if (oclength > 0 &&
2775            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2776                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2777    #endif  /* SUPPORT_UCP */
2778            else            else
2779              {              {
2780              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2781              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2782              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2783            }            }
2784    
2785          if (min == max) continue;          if (min == max) continue;
# Line 2109  for (;;) Line 2790  for (;;)
2790              {              {
2791              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2792              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2793              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2794              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2795                  memcmp(eptr, charptr, length) == 0) eptr += length;
2796  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2797              /* Need braces because of following else */              else if (oclength > 0 &&
2798              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2799                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2800    #endif  /* SUPPORT_UCP */
2801              else              else
2802                {                {
2803                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2804                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2805                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2806              }              }
2807            /* Control never gets here */            /* Control never gets here */
2808            }            }
# Line 2131  for (;;) Line 2812  for (;;)
2812            pp = eptr;            pp = eptr;
2813            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2814              {              {
2815              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2816              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2817  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2818              else if (oclength == 0) break;              else if (oclength > 0 &&
2819                         eptr <= md->end_subject - oclength &&
2820                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2821    #endif  /* SUPPORT_UCP */
2822              else              else
2823                {                {
2824                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2825                eptr += oclength;                break;
2826                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2827              }              }
2828    
2829            if (possessive) continue;            if (possessive) continue;
2830    
2831            for(;;)            for(;;)
2832             {              {
2833             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2834             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2835             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2836  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2837             eptr--;              eptr--;
2838             BACKCHAR(eptr);              BACKCHAR(eptr);
2839  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2840             eptr -= length;              eptr -= length;
2841  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2842             }              }
2843            }            }
2844          /* Control never gets here */          /* Control never gets here */
2845          }          }
# Line 2170  for (;;) Line 2852  for (;;)
2852  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2853    
2854      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2855        {  
2856        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2857    
2858      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2859      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2191  for (;;) Line 2871  for (;;)
2871        {        {
2872        fc = md->lcc[fc];        fc = md->lcc[fc];
2873        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2874          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2875            if (eptr >= md->end_subject)
2876              {
2877              SCHECK_PARTIAL();
2878              MRRETURN(MATCH_NOMATCH);
2879              }
2880            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2881            }
2882        if (min == max) continue;        if (min == max) continue;
2883        if (minimize)        if (minimize)
2884          {          {
# Line 2199  for (;;) Line 2886  for (;;)
2886            {            {
2887            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2888            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2889            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2890                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2891              RRETURN(MATCH_NOMATCH);              {
2892                SCHECK_PARTIAL();
2893                MRRETURN(MATCH_NOMATCH);
2894                }
2895              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2896            }            }
2897          /* Control never gets here */          /* Control never gets here */
2898          }          }
# Line 2210  for (;;) Line 2901  for (;;)
2901          pp = eptr;          pp = eptr;
2902          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2903            {            {
2904            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2905                {
2906                SCHECK_PARTIAL();
2907                break;
2908                }
2909              if (fc != md->lcc[*eptr]) break;
2910            eptr++;            eptr++;
2911            }            }
2912    
2913          if (possessive) continue;          if (possessive) continue;
2914    
2915          while (eptr >= pp)          while (eptr >= pp)
2916            {            {
2917            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2918            eptr--;            eptr--;
2919            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2920            }            }
2921          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2922          }          }
2923        /* Control never gets here */        /* Control never gets here */
2924        }        }
# Line 2229  for (;;) Line 2927  for (;;)
2927    
2928      else      else
2929        {        {
2930        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2931            {
2932            if (eptr >= md->end_subject)
2933              {
2934              SCHECK_PARTIAL();
2935              MRRETURN(MATCH_NOMATCH);
2936              }
2937            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2938            }
2939    
2940        if (min == max) continue;        if (min == max) continue;
2941    
2942        if (minimize)        if (minimize)
2943          {          {
2944          for (fi = min;; fi++)          for (fi = min;; fi++)
2945            {            {
2946            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2947            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2948            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2949              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2950                {
2951                SCHECK_PARTIAL();
2952                MRRETURN(MATCH_NOMATCH);
2953                }
2954              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2955            }            }
2956          /* Control never gets here */          /* Control never gets here */
2957          }          }
# Line 2247  for (;;) Line 2960  for (;;)
2960          pp = eptr;          pp = eptr;
2961          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2962            {            {
2963            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2964                {
2965                SCHECK_PARTIAL();
2966                break;
2967                }
2968              if (fc != *eptr) break;
2969            eptr++;            eptr++;
2970            }            }
2971          if (possessive) continue;          if (possessive) continue;
2972    
2973          while (eptr >= pp)          while (eptr >= pp)
2974            {            {
2975            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2976            eptr--;            eptr--;
2977            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2978            }            }
2979          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2980          }          }
2981        }        }
2982      /* Control never gets here */      /* Control never gets here */
# Line 2266  for (;;) Line 2985  for (;;)
2985      checking can be multibyte. */      checking can be multibyte. */
2986    
2987      case OP_NOT:      case OP_NOT:
2988      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2989          {
2990          SCHECK_PARTIAL();
2991          MRRETURN(MATCH_NOMATCH);
2992          }
2993      ecode++;      ecode++;
2994      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2995      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2275  for (;;) Line 2998  for (;;)
2998        if (c < 256)        if (c < 256)
2999  #endif  #endif
3000        c = md->lcc[c];        c = md->lcc[c];
3001        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3002        }        }
3003      else      else
3004        {        {
3005        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3006        }        }
3007      break;      break;
3008    
# Line 2343  for (;;) Line 3066  for (;;)
3066      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3067      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3068    
3069      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3070    
3071      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3072      fc = *ecode++;      fc = *ecode++;
3073    
3074      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2373  for (;;) Line 3093  for (;;)
3093          register unsigned int d;          register unsigned int d;
3094          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3095            {            {
3096              if (eptr >= md->end_subject)
3097                {
3098                SCHECK_PARTIAL();
3099                MRRETURN(MATCH_NOMATCH);
3100                }
3101            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3102            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3103            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3104            }            }
3105          }          }
3106        else        else
# Line 2384  for (;;) Line 3109  for (;;)
3109        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3110          {          {
3111          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3112            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3113              if (eptr >= md->end_subject)
3114                {
3115                SCHECK_PARTIAL();
3116                MRRETURN(MATCH_NOMATCH);
3117                }
3118              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3119              }
3120          }          }
3121    
3122        if (min == max) continue;        if (min == max) continue;
# Line 2400  for (;;) Line 3132  for (;;)
3132              {              {
3133              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3134              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3135                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3136                if (eptr >= md->end_subject)
3137                  {
3138                  SCHECK_PARTIAL();
3139                  MRRETURN(MATCH_NOMATCH);
3140                  }
3141              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3142              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3143              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3144              }              }
3145            }            }
3146          else          else
# Line 2414  for (;;) Line 3151  for (;;)
3151              {              {
3152              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3153              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3154              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3155                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3156                  {
3157                  SCHECK_PARTIAL();
3158                  MRRETURN(MATCH_NOMATCH);
3159                  }
3160                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3161              }              }
3162            }            }
3163          /* Control never gets here */          /* Control never gets here */
# Line 2435  for (;;) Line 3177  for (;;)
3177            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3178              {              {
3179              int len = 1;              int len = 1;
3180              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3181                  {
3182                  SCHECK_PARTIAL();
3183                  break;
3184                  }
3185              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3186              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3187              if (fc == d) break;              if (fc == d) break;
# Line 2456  for (;;) Line 3202  for (;;)
3202            {            {
3203            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3204              {              {
3205              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3206                  {
3207                  SCHECK_PARTIAL();
3208                  break;
3209                  }
3210                if (fc == md->lcc[*eptr]) break;
3211              eptr++;              eptr++;
3212              }              }
3213            if (possessive) continue;            if (possessive) continue;
# Line 2468  for (;;) Line 3219  for (;;)
3219              }              }
3220            }            }
3221    
3222          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3223          }          }
3224        /* Control never gets here */        /* Control never gets here */
3225        }        }
# Line 2484  for (;;) Line 3235  for (;;)
3235          register unsigned int d;          register unsigned int d;
3236          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3237            {            {
3238              if (eptr >= md->end_subject)
3239                {
3240                SCHECK_PARTIAL();
3241                MRRETURN(MATCH_NOMATCH);
3242                }
3243            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3244            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3245            }            }
3246          }          }
3247        else        else
# Line 2493  for (;;) Line 3249  for (;;)
3249        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3250          {          {
3251          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3252            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3253              if (eptr >= md->end_subject)
3254                {
3255                SCHECK_PARTIAL();
3256                MRRETURN(MATCH_NOMATCH);
3257                }
3258              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3259              }
3260          }          }
3261    
3262        if (min == max) continue;        if (min == max) continue;
# Line 2509  for (;;) Line 3272  for (;;)
3272              {              {
3273              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3274              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3275                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3276                if (eptr >= md->end_subject)
3277                  {
3278                  SCHECK_PARTIAL();
3279                  MRRETURN(MATCH_NOMATCH);
3280                  }
3281              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3282              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3283              }              }
3284            }            }
3285          else          else
# Line 2522  for (;;) Line 3290  for (;;)
3290              {              {
3291              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3292              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3293              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3294                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3295                  {
3296                  SCHECK_PARTIAL();
3297                  MRRETURN(MATCH_NOMATCH);
3298                  }
3299                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3300              }              }
3301            }            }
3302          /* Control never gets here */          /* Control never gets here */
# Line 2543  for (;;) Line 3316  for (;;)
3316            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3317              {              {
3318              int len = 1;              int len = 1;
3319              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3320                  {
3321                  SCHECK_PARTIAL();
3322                  break;
3323                  }
3324              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3325              if (fc == d) break;              if (fc == d) break;
3326              eptr += len;              eptr += len;
# Line 2563  for (;;) Line 3340  for (;;)
3340            {            {
3341            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3342              {              {
3343              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3344                  {
3345                  SCHECK_PARTIAL();
3346                  break;
3347                  }
3348                if (fc == *eptr) break;
3349              eptr++;              eptr++;
3350              }              }
3351            if (possessive) continue;            if (possessive) continue;
# Line 2575  for (;;) Line 3357  for (;;)
3357              }              }
3358            }            }
3359    
3360          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3361          }          }
3362        }        }
3363      /* Control never gets here */      /* Control never gets here */
# Line 2657  for (;;) Line 3439  for (;;)
3439    
3440      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3441      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3442      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3443      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3444      and single-bytes. */      and single-bytes. */
3445    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3446      if (min > 0)      if (min > 0)
3447        {        {
3448  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2672  for (;;) Line 3451  for (;;)
3451          switch(prop_type)          switch(prop_type)
3452            {            {
3453            case PT_ANY:            case PT_ANY:
3454            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3455            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3456              {              {
3457              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3458              GETCHARINC(c, eptr);                {
3459                  SCHECK_PARTIAL();
3460                  MRRETURN(MATCH_NOMATCH);
3461                  }
3462                GETCHARINCTEST(c, eptr);
3463              }              }
3464            break;            break;
3465    
3466            case PT_LAMP:            case PT_LAMP:
3467            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3468              {              {
3469              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3470              GETCHARINC(c, eptr);                {
3471              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3472                  MRRETURN(MATCH_NOMATCH);
3473                  }
3474                GETCHARINCTEST(c, eptr);
3475                prop_chartype = UCD_CHARTYPE(c);
3476              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3477                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3478                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3479                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3480              }              }
3481            break;            break;
3482    
3483            case PT_GC:            case PT_GC:
3484            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3485              {              {
3486              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3487              GETCHARINC(c, eptr);                {
3488              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3489                  MRRETURN(MATCH_NOMATCH);
3490                  }
3491                GETCHARINCTEST(c, eptr);
3492                prop_category = UCD_CATEGORY(c);
3493              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3494                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3495              }              }
3496            break;            break;
3497    
3498            case PT_PC:            case PT_PC:
3499            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3500              {              {
3501              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3502              GETCHARINC(c, eptr);                {
3503              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3504                  MRRETURN(MATCH_NOMATCH);
3505                  }
3506                GETCHARINCTEST(c, eptr);
3507                prop_chartype = UCD_CHARTYPE(c);
3508              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3509                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3510              }              }
3511            break;            break;
3512    
3513            case PT_SC:            case PT_SC:
3514            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3515              {              {
3516              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3517              GETCHARINC(c, eptr);                {
3518              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3519                  MRRETURN(MATCH_NOMATCH);
3520                  }
3521                GETCHARINCTEST(c, eptr);
3522                prop_script = UCD_SCRIPT(c);
3523              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3524                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3525                }
3526              break;
3527    
3528              case PT_ALNUM:
3529              for (i = 1; i <= min; i++)
3530                {
3531                if (eptr >= md->end_subject)
3532                  {
3533                  SCHECK_PARTIAL();
3534                  MRRETURN(MATCH_NOMATCH);
3535                  }
3536                GETCHARINCTEST(c, eptr);
3537                prop_category = UCD_CATEGORY(c);
3538                if ((prop_category == ucp_L || prop_category == ucp_N)
3539                       == prop_fail_result)
3540                  MRRETURN(MATCH_NOMATCH);
3541                }
3542              break;
3543    
3544              case PT_SPACE:    /* Perl space */
3545              for (i = 1; i <= min; i++)
3546                {
3547                if (eptr >= md->end_subject)
3548                  {
3549                  SCHECK_PARTIAL();
3550                  MRRETURN(MATCH_NOMATCH);
3551                  }
3552                GETCHARINCTEST(c, eptr);
3553                prop_category = UCD_CATEGORY(c);
3554                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3555                     c == CHAR_FF || c == CHAR_CR)
3556                       == prop_fail_result)
3557                  MRRETURN(MATCH_NOMATCH);
3558                }
3559              break;
3560    
3561              case PT_PXSPACE:  /* POSIX space */
3562              for (i = 1; i <= min; i++)
3563                {
3564                if (eptr >= md->end_subject)
3565                  {
3566                  SCHECK_PARTIAL();
3567                  MRRETURN(MATCH_NOMATCH);
3568                  }
3569                GETCHARINCTEST(c, eptr);
3570                prop_category = UCD_CATEGORY(c);
3571                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3572                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3573                       == prop_fail_result)
3574                  MRRETURN(MATCH_NOMATCH);
3575              }              }
3576            break;            break;
3577    
3578              case PT_WORD:
3579              for (i = 1; i <= min; i++)
3580                {
3581                if (eptr >= md->end_subject)
3582                  {
3583                  SCHECK_PARTIAL();
3584                  MRRETURN(MATCH_NOMATCH);
3585                  }
3586                GETCHARINCTEST(c, eptr);
3587                prop_category = UCD_CATEGORY(c);
3588                if ((prop_category == ucp_L || prop_category == ucp_N ||
3589                     c == CHAR_UNDERSCORE)
3590                       == prop_fail_result)
3591                  MRRETURN(MATCH_NOMATCH);
3592                }
3593              break;
3594    
3595              /* This should not occur */
3596    
3597            default:            default:
3598            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
# Line 2738  for (;;) Line 3606  for (;;)
3606          {          {
3607          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3608            {            {
3609              if (eptr >= md->end_subject)
3610                {
3611                SCHECK_PARTIAL();
3612                MRRETURN(MATCH_NOMATCH);
3613                }
3614            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3615            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3616            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3617            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3618              {              {
3619              int len = 1;              int len = 1;
3620              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3621                {                else { GETCHARLEN(c, eptr, len); }
3622                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3623              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3624              eptr += len;              eptr += len;
3625              }              }
# Line 2766  for (;;) Line 3637  for (;;)
3637          case OP_ANY:          case OP_ANY:
3638          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3639            {            {
3640            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3641                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3642              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3643                MRRETURN(MATCH_NOMATCH);
3644                }
3645              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3646              eptr++;
3647              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3648              }
3649            break;
3650    
3651            case OP_ALLANY:
3652            for (i = 1; i <= min; i++)
3653              {
3654              if (eptr >= md->end_subject)
3655                {
3656                SCHECK_PARTIAL();
3657                MRRETURN(MATCH_NOMATCH);
3658                }
3659            eptr++;            eptr++;
3660            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3661            }            }
3662          break;          break;
3663    
3664          case OP_ANYBYTE:          case OP_ANYBYTE:
3665            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3666          eptr += min;          eptr += min;
3667          break;          break;
3668    
3669          case OP_ANYNL:          case OP_ANYNL:
3670          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3671            {            {
3672            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3673                {
3674                SCHECK_PARTIAL();
3675                MRRETURN(MATCH_NOMATCH);
3676                }
3677            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3678            switch(c)            switch(c)
3679              {              {
3680              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3681              case 0x000d:              case 0x000d:
3682              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3683              break;              break;
3684    
3685              case 0x000a:              case 0x000a:
3686                break;
3687    
3688              case 0x000b:              case 0x000b:
3689              case 0x000c:              case 0x000c:
3690              case 0x0085:              case 0x0085:
3691              case 0x2028:              case 0x2028:
3692              case 0x2029:              case 0x2029:
3693                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3694                break;
3695                }
3696              }
3697            break;
3698    
3699            case OP_NOT_HSPACE:
3700            for (i = 1; i <= min; i++)
3701              {
3702              if (eptr >= md->end_subject)
3703                {
3704                SCHECK_PARTIAL();
3705                MRRETURN(MATCH_NOMATCH);
3706                }
3707              GETCHARINC(c, eptr);
3708              switch(c)
3709                {
3710                default: break;
3711                case 0x09:      /* HT */
3712                case 0x20:      /* SPACE */
3713                case 0xa0:      /* NBSP */
3714                case 0x1680:    /* OGHAM SPACE MARK */
3715                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3716                case 0x2000:    /* EN QUAD */
3717                case 0x2001:    /* EM QUAD */
3718                case 0x2002:    /* EN SPACE */
3719                case 0x2003:    /* EM SPACE */
3720                case 0x2004:    /* THREE-PER-EM SPACE */
3721                case 0x2005:    /* FOUR-PER-EM SPACE */
3722                case 0x2006:    /* SIX-PER-EM SPACE */
3723                case 0x2007:    /* FIGURE SPACE */
3724                case 0x2008:    /* PUNCTUATION SPACE */
3725                case 0x2009:    /* THIN SPACE */
3726                case 0x200A:    /* HAIR SPACE */
3727                case 0x202f:    /* NARROW NO-BREAK SPACE */
3728                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3729                case 0x3000:    /* IDEOGRAPHIC SPACE */
3730                MRRETURN(MATCH_NOMATCH);
3731                }
3732              }
3733            break;
3734    
3735            case OP_HSPACE:
3736            for (i = 1; i <= min; i++)
3737              {
3738              if (eptr >= md->end_subject)
3739                {
3740                SCHECK_PARTIAL();
3741                MRRETURN(MATCH_NOMATCH);
3742                }
3743              GETCHARINC(c, eptr);
3744              switch(c)
3745                {
3746                default: MRRETURN(MATCH_NOMATCH);
3747                case 0x09:      /* HT */
3748                case 0x20:      /* SPACE */
3749                case 0xa0:      /* NBSP */
3750                case 0x1680:    /* OGHAM SPACE MARK */
3751                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3752                case 0x2000:    /* EN QUAD */
3753                case 0x2001:    /* EM QUAD */
3754                case 0x2002:    /* EN SPACE */
3755                case 0x2003:    /* EM SPACE */
3756                case 0x2004:    /* THREE-PER-EM SPACE */
3757                case 0x2005:    /* FOUR-PER-EM SPACE */
3758                case 0x2006:    /* SIX-PER-EM SPACE */
3759                case 0x2007:    /* FIGURE SPACE */
3760                case 0x2008:    /* PUNCTUATION SPACE */
3761                case 0x2009:    /* THIN SPACE */
3762                case 0x200A:    /* HAIR SPACE */
3763                case 0x202f:    /* NARROW NO-BREAK SPACE */
3764                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3765                case 0x3000:    /* IDEOGRAPHIC SPACE */
3766                break;
3767                }
3768              }
3769            break;
3770    
3771            case OP_NOT_VSPACE:
3772            for (i = 1; i <= min; i++)
3773              {
3774              if (eptr >= md->end_subject)
3775                {
3776                SCHECK_PARTIAL();
3777                MRRETURN(MATCH_NOMATCH);
3778                }
3779              GETCHARINC(c, eptr);
3780              switch(c)
3781                {
3782                default: break;
3783                case 0x0a:      /* LF */
3784                case 0x0b:      /* VT */
3785                case 0x0c:      /* FF */
3786                case 0x0d:      /* CR */
3787                case 0x85:      /* NEL */
3788                case 0x2028:    /* LINE SEPARATOR */
3789                case 0x2029:    /* PARAGRAPH SEPARATOR */
3790                MRRETURN(MATCH_NOMATCH);
3791                }
3792              }
3793            break;
3794    
3795            case OP_VSPACE:
3796            for (i = 1; i <= min; i++)
3797              {
3798              if (eptr >= md->end_subject)
3799                {
3800                SCHECK_PARTIAL();
3801                MRRETURN(MATCH_NOMATCH);
3802                }
3803              GETCHARINC(c, eptr);
3804              switch(c)
3805                {
3806                default: MRRETURN(MATCH_NOMATCH);
3807                case 0x0a:      /* LF */
3808                case 0x0b:      /* VT */
3809                case 0x0c:      /* FF */
3810                case 0x0d:      /* CR */
3811                case 0x85:      /* NEL */
3812                case 0x2028:    /* LINE SEPARATOR */
3813                case 0x2029:    /* PARAGRAPH SEPARATOR */
3814              break;              break;
3815              }              }
3816            }            }
# Line 2803  for (;;) Line 3819  for (;;)
3819          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3820          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3821            {            {
3822            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3823                {
3824                SCHECK_PARTIAL();
3825                MRRETURN(MATCH_NOMATCH);
3826                }
3827            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3828            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3829              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3830            }            }
3831          break;          break;
3832    
3833          case OP_DIGIT:          case OP_DIGIT:
3834          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3835            {            {
3836            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3837               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3838              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3839                MRRETURN(MATCH_NOMATCH);
3840                }
3841              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3842                MRRETURN(MATCH_NOMATCH);
3843            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3844            }            }
3845          break;          break;
# Line 2823  for (;;) Line 3847  for (;;)
3847          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3848          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3849            {            {
3850            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3851               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3852              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3853            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3854                }
3855              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3856                MRRETURN(MATCH_NOMATCH);
3857              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3858            }            }
3859          break;          break;
3860    
3861          case OP_WHITESPACE:          case OP_WHITESPACE:
3862          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3863            {            {
3864            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3865               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3866              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3867                MRRETURN(MATCH_NOMATCH);
3868                }
3869              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3870                MRRETURN(MATCH_NOMATCH);
3871            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3872            }            }
3873          break;          break;
# Line 2843  for (;;) Line 3875  for (;;)
3875          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3876          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3877            {            {
3878            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3879               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3880              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3881            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3882                }
3883              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3884                MRRETURN(MATCH_NOMATCH);
3885              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3886            }            }
3887          break;          break;
3888    
3889          case OP_WORDCHAR:          case OP_WORDCHAR:
3890          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3891            {            {
3892            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3893               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3894              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3895                MRRETURN(MATCH_NOMATCH);
3896                }
3897              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3898                MRRETURN(MATCH_NOMATCH);
3899            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3900            }            }
3901          break;          break;
# Line 2868  for (;;) Line 3908  for (;;)
3908  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3909    
3910        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3911        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3912    
3913        switch(ctype)        switch(ctype)
3914          {          {
3915          case OP_ANY:          case OP_ANY:
3916          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3917            {            {
3918            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3919              {              {
3920              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3921              eptr++;              MRRETURN(MATCH_NOMATCH);
3922              }              }
3923              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3924              eptr++;
3925            }            }
         else eptr += min;  
3926          break;          break;
3927    
3928          case OP_ANYBYTE:          case OP_ALLANY:
3929            if (eptr > md->end_subject - min)
3930              {
3931              SCHECK_PARTIAL();
3932              MRRETURN(MATCH_NOMATCH);
3933              }
3934          eptr += min;          eptr += min;
3935          break;          break;
3936    
3937          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
3938          bytes are present in this case. */          if (eptr > md->end_subject - min)
3939              {
3940              SCHECK_PARTIAL();
3941              MRRETURN(MATCH_NOMATCH);
3942              }
3943            eptr += min;
3944            break;
3945    
3946          case OP_ANYNL:          case OP_ANYNL:
3947          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3948            {            {
3949            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3950                {
3951                SCHECK_PARTIAL();
3952                MRRETURN(MATCH_NOMATCH);
3953                }
3954            switch(*eptr++)            switch(*eptr++)
3955              {              {
3956              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3957              case 0x000d:              case 0x000d:
3958              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3959              break;              break;
3960              case 0x000a:              case 0x000a:
3961                break;
3962    
3963              case 0x000b:              case 0x000b:
3964              case 0x000c:              case 0x000c:
3965              case 0x0085:              case 0x0085:
3966                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3967                break;
3968                }
3969              }
3970            break;
3971    
3972            case OP_NOT_HSPACE:
3973            for (i = 1; i <= min; i++)
3974              {
3975              if (eptr >= md->end_subject)
3976                {
3977                SCHECK_PARTIAL();
3978                MRRETURN(MATCH_NOMATCH);
3979                }
3980              switch(*eptr++)
3981                {
3982                default: break;
3983                case 0x09:      /* HT */
3984                case 0x20:      /* SPACE */
3985                case 0xa0:      /* NBSP */
3986                MRRETURN(MATCH_NOMATCH);
3987                }
3988              }
3989            break;
3990    
3991            case OP_HSPACE:
3992            for (i = 1; i <= min; i++)
3993              {
3994              if (eptr >= md->end_subject)
3995                {
3996                SCHECK_PARTIAL();
3997                MRRETURN(MATCH_NOMATCH);
3998                }
3999              switch(*eptr++)
4000                {
4001                default: MRRETURN(MATCH_NOMATCH);
4002                case 0x09:      /* HT */
4003                case 0x20:      /* SPACE */
4004                case 0xa0:      /* NBSP */
4005                break;
4006                }
4007              }
4008            break;
4009    
4010            case OP_NOT_VSPACE:
4011            for (i = 1; i <= min; i++)
4012              {
4013              if (eptr >= md->end_subject)
4014                {
4015                SCHECK_PARTIAL();
4016                MRRETURN(MATCH_NOMATCH);
4017                }
4018              switch(*eptr++)
4019                {
4020                default: break;
4021                case 0x0a:      /* LF */
4022                case 0x0b:      /* VT */
4023                case 0x0c:      /* FF */
4024                case 0x0d:      /* CR */
4025                case 0x85:      /* NEL */
4026                MRRETURN(MATCH_NOMATCH);
4027                }
4028              }
4029            break;
4030    
4031            case OP_VSPACE:
4032            for (i = 1; i <= min; i++)
4033              {
4034              if (eptr >= md->end_subject)
4035                {
4036                SCHECK_PARTIAL();
4037                MRRETURN(MATCH_NOMATCH);
4038                }
4039              switch(*eptr++)
4040                {
4041                default: MRRETURN(MATCH_NOMATCH);
4042                case 0x0a:      /* LF */
4043                case 0x0b:      /* VT */
4044                case 0x0c:      /* FF */
4045                case 0x0d:      /* CR */
4046                case 0x85:      /* NEL */
4047              break;              break;
4048              }              }
4049            }            }
# Line 2913  for (;;) Line 4051  for (;;)
4051    
4052          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4053          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4054            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            {
4055              if (eptr >= md->end_subject)
4056                {
4057                SCHECK_PARTIAL();
4058                MRRETURN(MATCH_NOMATCH);
4059                }
4060              if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4061              }
4062          break;          break;
4063    
4064          case OP_DIGIT:          case OP_DIGIT:
4065          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4066            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            {
4067              if (eptr >= md->end_subject)
4068                {
4069                SCHECK_PARTIAL();
4070                MRRETURN(MATCH_NOMATCH);
4071                }
4072              if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4073              }
4074          break;          break;
4075    
4076          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4077          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4078            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            {
4079              if (eptr >= md->end_subject)
4080                {
4081                SCHECK_PARTIAL();
4082                MRRETURN(MATCH_NOMATCH);
4083                }
4084              if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4085              }
4086          break;          break;
4087    
4088          case OP_WHITESPACE:          case OP_WHITESPACE:
4089          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4090            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            {
4091              if (eptr >= md->end_subject)
4092                {
4093                SCHECK_PARTIAL();
4094                MRRETURN(MATCH_NOMATCH);
4095                }
4096              if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4097              }
4098          break;          break;
4099    
4100          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4101          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4102              {
4103              if (eptr >= md->end_subject)
4104                {
4105                SCHECK_PARTIAL();
4106                MRRETURN(MATCH_NOMATCH);
4107                }
4108            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
4109              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4110              }
4111          break;          break;
4112    
4113          case OP_WORDCHAR:          case OP_WORDCHAR:
4114          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4115              {
4116              if (eptr >= md->end_subject)
4117                {
4118                SCHECK_PARTIAL();
4119                MRRETURN(MATCH_NOMATCH);
4120                }
4121            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
4122              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4123              }
4124          break;          break;
4125    
4126          default:          default:
# Line 2968  for (;;) Line 4148  for (;;)
4148              {              {
4149              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4150              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4151              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4152                if (eptr >= md->end_subject)
4153                  {
4154                  SCHECK_PARTIAL();
4155                  MRRETURN(MATCH_NOMATCH);
4156                  }
4157              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4158              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4159              }              }
4160            /* Control never gets here */            /* Control never gets here */
4161    
# Line 2979  for (;;) Line 4164  for (;;)
4164              {              {
4165              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4166              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4167              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4168                if (eptr >= md->end_subject)
4169                  {
4170                  SCHECK_PARTIAL();
4171                  MRRETURN(MATCH_NOMATCH);
4172                  }
4173              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4174              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4175              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4176                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4177                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
4178                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4179              }              }
4180            /* Control never gets here */            /* Control never gets here */
4181    
# Line 2994  for (;;) Line 4184  for (;;)
4184              {              {
4185              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4186              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4187              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4188                if (eptr >= md->end_subject)
4189                  {
4190                  SCHECK_PARTIAL();
4191                  MRRETURN(MATCH_NOMATCH);
4192                  }
4193              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4194              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4195              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4196                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4197              }              }
4198            /* Control never gets here */            /* Control never gets here */
4199    
# Line 3007  for (;;) Line 4202  for (;;)
4202              {              {
4203              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4204              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4205              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4206                if (eptr >= md->end_subject)
4207                  {
4208                  SCHECK_PARTIAL();
4209                  MRRETURN(MATCH_NOMATCH);
4210                  }
4211              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4212              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4213              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4214                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4215              }              }
4216            /* Control never gets here */            /* Control never gets here */
4217    
# Line 3020  for (;;) Line 4220  for (;;)
4220              {              {
4221              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4222              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4223              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4224                if (eptr >= md->end_subject)
4225                  {
4226                  SCHECK_PARTIAL();
4227                  MRRETURN(MATCH_NOMATCH);
4228                  }
4229              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4230              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
4231              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4232                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4233              }              }
4234            /* Control never gets here */            /* Control never gets here */
4235    
4236              case PT_ALNUM:
4237              for (fi = min;; fi++)
4238                {
4239                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4240                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4241                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4242                if (eptr >= md->end_subject)
4243                  {
4244                  SCHECK_PARTIAL();
4245                  MRRETURN(MATCH_NOMATCH);
4246                  }
4247                GETCHARINC(c, eptr);
4248                prop_category = UCD_CATEGORY(c);
4249                if ((prop_category == ucp_L || prop_category == ucp_N)
4250                       == prop_fail_result)
4251                  MRRETURN(MATCH_NOMATCH);
4252                }
4253              /* Control never gets here */
4254    
4255              case PT_SPACE:    /* Perl space */
4256              for (fi = min;; fi++)
4257                {
4258                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4259                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4260                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4261                if (eptr >= md->end_subject)
4262                  {
4263                  SCHECK_PARTIAL();
4264                  MRRETURN(MATCH_NOMATCH);
4265                  }
4266                GETCHARINC(c, eptr);
4267                prop_category = UCD_CATEGORY(c);
4268                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4269                     c == CHAR_FF || c == CHAR_CR)
4270                       == prop_fail_result)
4271                  MRRETURN(MATCH_NOMATCH);
4272                }
4273              /* Control never gets here */
4274    
4275              case PT_PXSPACE:  /* POSIX space */
4276              for (fi = min;; fi++)
4277                {
4278                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4279                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4280                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4281                if (eptr >= md->end_subject)
4282                  {
4283                  SCHECK_PARTIAL();
4284                  MRRETURN(MATCH_NOMATCH);
4285                  }
4286                GETCHARINC(c, eptr);
4287                prop_category = UCD_CATEGORY(c);
4288                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4289                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4290                       == prop_fail_result)
4291                  MRRETURN(MATCH_NOMATCH);
4292                }
4293              /* Control never gets here */
4294    
4295              case PT_WORD:
4296              for (fi = min;; fi++)
4297                {
4298                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4299                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4300                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4301                if (eptr >= md->end_subject)
4302                  {
4303                  SCHECK_PARTIAL();
4304                  MRRETURN(MATCH_NOMATCH);
4305                  }
4306                GETCHARINC(c, eptr);
4307                prop_category = UCD_CATEGORY(c);
4308                if ((prop_category == ucp_L ||
4309                     prop_category == ucp_N ||
4310                     c == CHAR_UNDERSCORE)
4311                       == prop_fail_result)
4312                  MRRETURN(MATCH_NOMATCH);
4313                }
4314              /* Control never gets here */
4315    
4316              /* This should never occur */
4317    
4318            default:            default:
4319            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4320            }            }
# Line 3042  for (;;) Line 4329  for (;;)
4329            {            {
4330            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4331            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4332            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4333              if (eptr >= md->end_subject)
4334                {
4335                SCHECK_PARTIAL();
4336                MRRETURN(MATCH_NOMATCH);
4337                }
4338            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4339            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4340            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4341            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4342              {              {
4343              int len = 1;              int len = 1;
4344              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
4345                {                else { GETCHARLEN(c, eptr, len); }
4346                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
4347              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4348              eptr += len;              eptr += len;
4349              }              }
# Line 3071  for (;;) Line 4361  for (;;)
4361            {            {
4362            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4363            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4364            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4365                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&            if (eptr >= md->end_subject)
4366                  IS_NEWLINE(eptr)))              {
4367              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4368                MRRETURN(MATCH_NOMATCH);
4369                }
4370              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4371                MRRETURN(MATCH_NOMATCH);
4372            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4373            switch(ctype)            switch(ctype)
4374              {              {
4375              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
4376              break;              case OP_ALLANY:
   
4377              case OP_ANYBYTE:              case OP_ANYBYTE:
4378              break;              break;
4379    
4380              case OP_ANYNL:              case OP_ANYNL:
4381              switch(c)              switch(c)
4382                {                {
4383                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4384                case 0x000d:                case 0x000d:
4385                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4386                break;                break;
4387                case 0x000a:                case 0x000a:
4388                  break;
4389    
4390                case 0x000b:                case 0x000b:
4391                case 0x000c:                case 0x000c:
4392                case 0x0085:                case 0x0085:
4393                case 0x2028:                case 0x2028:
4394                case 0x2029:                case 0x2029:
4395                  if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4396                  break;
4397                  }
4398                break;
4399    
4400                case OP_NOT_HSPACE:
4401                switch(c)
4402                  {
4403                  default: break;
4404                  case 0x09:      /* HT */
4405                  case 0x20:      /* SPACE */
4406                  case 0xa0:      /* NBSP */
4407                  case 0x1680:    /* OGHAM SPACE MARK */
4408                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4409                  case 0x2000:    /* EN QUAD */
4410                  case 0x2001:    /* EM QUAD */
4411                  case 0x2002:    /* EN SPACE */
4412                  case 0x2003:    /* EM SPACE */
4413                  case 0x2004:    /* THREE-PER-EM SPACE */
4414                  case 0x2005:    /* FOUR-PER-EM SPACE */
4415                  case 0x2006:    /* SIX-PER-EM SPACE */
4416                  case 0x2007:    /* FIGURE SPACE */
4417                  case 0x2008:    /* PUNCTUATION SPACE */
4418                  case 0x2009:    /* THIN SPACE */
4419                  case 0x200A:    /* HAIR SPACE */
4420                  case 0x202f:    /* NARROW NO-BREAK SPACE */
4421                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4422                  case 0x3000:    /* IDEOGRAPHIC SPACE */
4423                  MRRETURN(MATCH_NOMATCH);
4424                  }
4425                break;
4426    
4427                case OP_HSPACE:
4428                switch(c)
4429                  {
4430                  default: MRRETURN(MATCH_NOMATCH);
4431                  case 0x09:      /* HT */
4432                  case 0x20:      /* SPACE */
4433                  case 0xa0:      /* NBSP */
4434                  case 0x1680:    /* OGHAM SPACE MARK */
4435                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4436                  case 0x2000:    /* EN QUAD */
4437                  case 0x2001:    /* EM QUAD */
4438                  case 0x2002:    /* EN SPACE */
4439                  case 0x2003:    /* EM SPACE */
4440                  case 0x2004:    /* THREE-PER-EM SPACE */
4441                  case 0x2005:    /* FOUR-PER-EM SPACE */
4442                  case 0x2006:    /* SIX-PER-EM SPACE */
4443                  case 0x2007:    /* FIGURE SPACE */
4444                  case 0x2008:    /* PUNCTUATION SPACE */
4445                  case 0x2009:    /* THIN SPACE */
4446                  case 0x200A:    /* HAIR SPACE */
4447                  case 0x202f:    /* NARROW NO-BREAK SPACE */
4448                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4449                  case 0x3000:    /* IDEOGRAPHIC SPACE */
4450                  break;
4451                  }
4452                break;
4453    
4454                case OP_NOT_VSPACE:
4455                switch(c)
4456                  {
4457                  default: break;
4458                  case 0x0a:      /* LF */
4459                  case 0x0b:      /* VT */
4460                  case 0x0c:      /* FF */
4461                  case 0x0d:      /* CR */
4462                  case 0x85:      /* NEL */
4463                  case 0x2028:    /* LINE SEPARATOR */
4464                  case 0x2029:    /* PARAGRAPH SEPARATOR */
4465                  MRRETURN(MATCH_NOMATCH);
4466                  }
4467                break;
4468    
4469                case OP_VSPACE:
4470                switch(c)
4471                  {
4472                  default: MRRETURN(MATCH_NOMATCH);
4473                  case 0x0a:      /* LF */
4474                  case 0x0b:      /* VT */
4475                  case 0x0c:      /* FF */
4476                  case 0x0d:      /* CR */
4477                  case 0x85:      /* NEL */
4478                  case 0x2028:    /* LINE SEPARATOR */
4479                  case 0x2029:    /* PARAGRAPH SEPARATOR */
4480                break;                break;
4481                }                }
4482              break;              break;
4483    
4484              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
4485              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4486                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4487              break;              break;
4488    
4489              case OP_DIGIT:              case OP_DIGIT:
4490              if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4491                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4492              break;              break;
4493    
4494              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
4495              if (c < 256 && (md->ctypes[c] & ctype_space) != 0)              if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4496                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4497              break;              break;
4498    
4499              case OP_WHITESPACE:              case OP_WHITESPACE:
4500              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4501                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4502              break;              break;
4503    
4504              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
4505              if (c < 256 && (md->ctypes[c] & ctype_word) != 0)              if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4506                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4507              break;              break;
4508    
4509              case OP_WORDCHAR:              case OP_WORDCHAR:
4510              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4511                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4512              break;              break;
4513    
4514              default:              default:
# Line 3145  for (;;) Line 4524  for (;;)
4524            {            {
4525            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4526            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4527            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4528                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
4529              RRETURN(MATCH_NOMATCH);              {
4530                SCHECK_PARTIAL();
4531                MRRETURN(MATCH_NOMATCH);
4532                }
4533              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4534                MRRETURN(MATCH_NOMATCH);
4535            c = *eptr++;            c = *eptr++;
4536            switch(ctype)            switch(ctype)
4537              {              {
4538              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
4539              break;              case OP_ALLANY:
   
4540              case OP_ANYBYTE:              case OP_ANYBYTE:
4541              break;         &nbs