/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 150 by ph10, Tue Apr 17 08:22:40 2007 UTC revision 530 by ph10, Tue Jun 1 13:42:06 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 83  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 135  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 152  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 188  calls by keeping local variables that ne Line 230  calls by keeping local variables that ne
230  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
263    actually used in this definition. */
264    
265  #ifndef NO_RECURSE  #ifndef NO_RECURSE
266  #define REGISTER register  #define REGISTER register
267  #ifdef DEBUG  
268  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
269    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270    { \    { \
271    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
272    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
274    }    }
275  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 210  versions and production versions. */ Line 278  versions and production versions. */
278    return ra; \    return ra; \
279    }    }
280  #else  #else
281  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
284  #endif  #endif
285    
286  #else  #else
287    
288    
289  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
290  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291  match(), which never changes. */  argument of match(), which never changes. */
292    
293  #define REGISTER  #define REGISTER
294    
295  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
299      {\    newframe->Xeptr = ra;\
300      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
301      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
302      newframe->Xoffset_top = rc;\    newframe->Xmarkptr = markptr;\
303      newframe->Xims = re;\    newframe->Xoffset_top = rc;\
304      newframe->Xeptrb = rf;\    newframe->Xims = re;\
305      newframe->Xflags = rg;\    newframe->Xeptrb = rf;\
306      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xflags = rg;\
307      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
308      frame = newframe;\    newframe->Xprevframe = frame;\
309      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
310      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
311      }\    goto HEAP_RECURSE;\
312    else\    L_##rw:\
313      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
314    }    }
315    
316  #define RRETURN(ra)\  #define RRETURN(ra)\
317    {\    {\
318    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
319    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
320    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
321    if (frame != NULL)\    if (frame != NULL)\
322      {\      {\
323      frame->Xresult = ra;\      rrc = ra;\
324      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
325      }\      }\
326    return ra;\    return ra;\
327    }    }
# Line 271  typedef struct heapframe { Line 334  typedef struct heapframe {
334    
335    /* Function arguments that may change */    /* Function arguments that may change */
336    
337    const uschar *Xeptr;    USPTR Xeptr;
338    const uschar *Xecode;    const uschar *Xecode;
339      USPTR Xmstart;
340      USPTR Xmarkptr;
341    int Xoffset_top;    int Xoffset_top;
342    long int Xims;    long int Xims;
343    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 281  typedef struct heapframe { Line 346  typedef struct heapframe {
346    
347    /* Function local variables */    /* Function local variables */
348    
349    const uschar *Xcallpat;    USPTR Xcallpat;
350    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
351    const uschar *Xdata;    USPTR Xcharptr;
352    const uschar *Xnext;  #endif
353    const uschar *Xpp;    USPTR Xdata;
354    const uschar *Xprev;    USPTR Xnext;
355    const uschar *Xsaved_eptr;    USPTR Xpp;
356      USPTR Xprev;
357      USPTR Xsaved_eptr;
358    
359    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
360    
# Line 308  typedef struct heapframe { Line 375  typedef struct heapframe {
375    uschar Xocchars[8];    uschar Xocchars[8];
376  #endif  #endif
377    
378      int Xcodelink;
379    int Xctype;    int Xctype;
380    unsigned int Xfc;    unsigned int Xfc;
381    int Xfi;    int Xfi;
# Line 323  typedef struct heapframe { Line 391  typedef struct heapframe {
391    
392    eptrblock Xnewptrb;    eptrblock Xnewptrb;
393    
394    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
395    
396    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
397    
398  } heapframe;  } heapframe;
399    
# Line 344  typedef struct heapframe { Line 411  typedef struct heapframe {
411    
412  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
413  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
414  same response.  same response. */
415    
416  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
417  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
418    at the end of the subject and also past the start of the subject (i.e.
419    something has been matched). For hard partial matching, we then return
420    immediately. The second one is used when we already know we are past the end of
421    the subject. */
422    
423    #define CHECK_PARTIAL()\
424      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
425        {\
426        md->hitend = TRUE;\
427        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
428        }
429    
430    #define SCHECK_PARTIAL()\
431      if (md->partial != 0 && eptr > mstart)\
432        {\
433        md->hitend = TRUE;\
434        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
435        }
436    
437    
438    /* Performance note: It might be tempting to extract commonly used fields from
439    the md structure (e.g. utf8, end_subject) into individual variables to improve
440  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
441  made performance worse.  made performance worse.
442    
443  Arguments:  Arguments:
444     eptr        pointer to current character in subject     eptr        pointer to current character in subject
445     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
446       mstart      pointer to the current match start position (can be modified
447                     by encountering \K)
448       markptr     pointer to the most recent MARK name, or NULL
449     offset_top  current top pointer     offset_top  current top pointer
450     md          pointer to "static" info for the match     md          pointer to "static" info for the match
451     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 363  Arguments: Line 455  Arguments:
455                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
456                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
457                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
458     rdepth      the recursion depth     rdepth      the recursion depth
459    
460  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
461                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
462                   a negative MATCH_xxx value for PRUNE, SKIP, etc
463                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
464                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
465  */  */
466    
467  static int  static int
468  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
469    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
470    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
471  {  {
472  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
473  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 387  register unsigned int c;   /* Character Line 479  register unsigned int c;   /* Character
479  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
480    
481  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
482    int condcode;
483    
484  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
485  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 401  frame->Xprevframe = NULL;            /* Line 494  frame->Xprevframe = NULL;            /*
494    
495  frame->Xeptr = eptr;  frame->Xeptr = eptr;
496  frame->Xecode = ecode;  frame->Xecode = ecode;
497    frame->Xmstart = mstart;
498    frame->Xmarkptr = markptr;
499  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
500  frame->Xims = ims;  frame->Xims = ims;
501  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 415  HEAP_RECURSE: Line 510  HEAP_RECURSE:
510    
511  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
512  #define ecode              frame->Xecode  #define ecode              frame->Xecode
513    #define mstart             frame->Xmstart
514    #define markptr            frame->Xmarkptr
515  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
516  #define ims                frame->Xims  #define ims                frame->Xims
517  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 427  HEAP_RECURSE: Line 524  HEAP_RECURSE:
524  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
525  #endif  #endif
526  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
527    #define codelink           frame->Xcodelink
528  #define data               frame->Xdata  #define data               frame->Xdata
529  #define next               frame->Xnext  #define next               frame->Xnext
530  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 507  int oclength; Line 605  int oclength;
605  uschar occhars[8];  uschar occhars[8];
606  #endif  #endif
607    
608    int codelink;
609  int ctype;  int ctype;
610  int length;  int length;
611  int max;  int max;
# Line 540  TAIL_RECURSE: Line 639  TAIL_RECURSE:
639  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
640  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
641  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
642  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
643  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
644  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
645  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
646    
647    #ifdef SUPPORT_UTF8
648    utf8 = md->utf8;       /* Local copy of the flag */
649    #else
650    utf8 = FALSE;
651    #endif
652    
653  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
654  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
655    
# Line 553  if (rdepth >= md->match_limit_recursion) Line 658  if (rdepth >= md->match_limit_recursion)
658    
659  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
660    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
661  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
662  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
663  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
664  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
665  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
666  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
667  already used. */  block that is used is on the stack, so a new one may be required for each
668    match(). */
669    
670  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
671    {    {
672    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
673    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
674      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
675    }    }
676    
677  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 588  for (;;) Line 681  for (;;)
681    minimize = possessive = FALSE;    minimize = possessive = FALSE;
682    op = *ecode;    op = *ecode;
683    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
684    switch(op)    switch(op)
685      {      {
686        case OP_MARK:
687        markptr = ecode + 2;
688        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
689          ims, eptrb, flags, RM55);
690    
691        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
692        argument, and we must check whether that argument matches this MARK's
693        argument. It is passed back in md->start_match_ptr (an overloading of that
694        variable). If it does match, we reset that variable to the current subject
695        position and return MATCH_SKIP. Otherwise, pass back the return code
696        unaltered. */
697    
698        if (rrc == MATCH_SKIP_ARG &&
699            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
700          {
701          md->start_match_ptr = eptr;
702          RRETURN(MATCH_SKIP);
703          }
704    
705        if (md->mark == NULL) md->mark = markptr;
706        RRETURN(rrc);
707    
708        case OP_FAIL:
709        MRRETURN(MATCH_NOMATCH);
710    
711        case OP_COMMIT:
712        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
713          ims, eptrb, flags, RM52);
714        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
715        MRRETURN(MATCH_COMMIT);
716    
717        case OP_PRUNE:
718        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
719          ims, eptrb, flags, RM51);
720        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
721        MRRETURN(MATCH_PRUNE);
722    
723        case OP_PRUNE_ARG:
724        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
725          ims, eptrb, flags, RM56);
726        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
727        md->mark = ecode + 2;
728        RRETURN(MATCH_PRUNE);
729    
730        case OP_SKIP:
731        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
732          ims, eptrb, flags, RM53);
733        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
734        md->start_match_ptr = eptr;   /* Pass back current position */
735        MRRETURN(MATCH_SKIP);
736    
737        case OP_SKIP_ARG:
738        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
739          ims, eptrb, flags, RM57);
740        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
741    
742        /* Pass back the current skip name by overloading md->start_match_ptr and
743        returning the special MATCH_SKIP_ARG return code. This will either be
744        caught by a matching MARK, or get to the top, where it is treated the same
745        as PRUNE. */
746    
747        md->start_match_ptr = ecode + 2;
748        RRETURN(MATCH_SKIP_ARG);
749    
750        case OP_THEN:
751        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
752          ims, eptrb, flags, RM54);
753        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
754        MRRETURN(MATCH_THEN);
755    
756        case OP_THEN_ARG:
757        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
758          ims, eptrb, flags, RM58);
759        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
760        md->mark = ecode + 2;
761        RRETURN(MATCH_THEN);
762    
763      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
764      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
765      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 617  for (;;) Line 779  for (;;)
779      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
780      offset = number << 1;      offset = number << 1;
781    
782  #ifdef DEBUG  #ifdef PCRE_DEBUG
783      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
784      printf("subject=");      printf("subject=");
785      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 632  for (;;) Line 794  for (;;)
794        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
795    
796        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
797        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
798            (int)(eptr - md->start_subject);
799    
800        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
801        do        do
802          {          {
803          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
804            ims, eptrb, flags);            ims, eptrb, flags, RM1);
805          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
806          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
807          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
808          }          }
# Line 651  for (;;) Line 814  for (;;)
814        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
815        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
816    
817          if (rrc != MATCH_THEN) md->mark = markptr;
818        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
819        }        }
820    
821      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
822      bracket. */      as a non-capturing bracket. */
823    
824        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
825        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
826    
827      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
828    
829        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
830        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
831    
832      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
833      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
834      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
835      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
836        is set.*/
837    
838      case OP_BRA:      case OP_BRA:
839      case OP_SBRA:      case OP_SBRA:
# Line 670  for (;;) Line 841  for (;;)
841      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
842      for (;;)      for (;;)
843        {        {
844        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
845          {          {
846          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
847          flags |= match_tail_recursed;            {
848          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
849          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
850              goto TAIL_RECURSE;
851              }
852    
853            /* Possibly empty group; can't use tail recursion. */
854    
855            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
856              eptrb, flags, RM48);
857            if (rrc == MATCH_NOMATCH) md->mark = markptr;
858            RRETURN(rrc);
859          }          }
860    
861        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
862        otherwise return. */        otherwise return. */
863    
864        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
865          eptrb, flags);          eptrb, flags, RM2);
866        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
867        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
868        }        }
869      /* Control never reaches here. */      /* Control never reaches here. */
# Line 696  for (;;) Line 876  for (;;)
876    
877      case OP_COND:      case OP_COND:
878      case OP_SCOND:      case OP_SCOND:
879      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
880    
881        /* Because of the way auto-callout works during compile, a callout item is
882        inserted between OP_COND and an assertion condition. */
883    
884        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
885        {        {
886        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
887        condition = md->recursive != NULL &&          {
888          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
889        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
890            cb.callout_number   = ecode[LINK_SIZE+2];
891            cb.offset_vector    = md->offset_vector;
892            cb.subject          = (PCRE_SPTR)md->start_subject;
893            cb.subject_length   = (int)(md->end_subject - md->start_subject);
894            cb.start_match      = (int)(mstart - md->start_subject);
895            cb.current_position = (int)(eptr - md->start_subject);
896            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
897            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
898            cb.capture_top      = offset_top/2;
899            cb.capture_last     = md->capture_last;
900            cb.callout_data     = md->callout_data;
901            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
902            if (rrc < 0) RRETURN(rrc);
903            }
904          ecode += _pcre_OP_lengths[OP_CALLOUT];
905          }
906    
907        condcode = ecode[LINK_SIZE+1];
908    
909        /* Now see what the actual condition is */
910    
911        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
912          {
913          if (md->recursive == NULL)                /* Not recursing => FALSE */
914            {
915            condition = FALSE;
916            ecode += GET(ecode, 1);
917            }
918          else
919            {
920            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
921            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
922    
923            /* If the test is for recursion into a specific subpattern, and it is
924            false, but the test was set up by name, scan the table to see if the
925            name refers to any other numbers, and test them. The condition is true
926            if any one is set. */
927    
928            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
929              {
930              uschar *slotA = md->name_table;
931              for (i = 0; i < md->name_count; i++)
932                {
933                if (GET2(slotA, 0) == recno) break;
934                slotA += md->name_entry_size;
935                }
936    
937              /* Found a name for the number - there can be only one; duplicate
938              names for different numbers are allowed, but not vice versa. First
939              scan down for duplicates. */
940    
941              if (i < md->name_count)
942                {
943                uschar *slotB = slotA;
944                while (slotB > md->name_table)
945                  {
946                  slotB -= md->name_entry_size;
947                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
948                    {
949                    condition = GET2(slotB, 0) == md->recursive->group_num;
950                    if (condition) break;
951                    }
952                  else break;
953                  }
954    
955                /* Scan up for duplicates */
956    
957                if (!condition)
958                  {
959                  slotB = slotA;
960                  for (i++; i < md->name_count; i++)
961                    {
962                    slotB += md->name_entry_size;
963                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
964                      {
965                      condition = GET2(slotB, 0) == md->recursive->group_num;
966                      if (condition) break;
967                      }
968                    else break;
969                    }
970                  }
971                }
972              }
973    
974            /* Chose branch according to the condition */
975    
976            ecode += condition? 3 : GET(ecode, 1);
977            }
978        }        }
979    
980      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
981        {        {
982        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
983        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
984    
985          /* If the numbered capture is unset, but the reference was by name,
986          scan the table to see if the name refers to any other numbers, and test
987          them. The condition is true if any one is set. This is tediously similar
988          to the code above, but not close enough to try to amalgamate. */
989    
990          if (!condition && condcode == OP_NCREF)
991            {
992            int refno = offset >> 1;
993            uschar *slotA = md->name_table;
994    
995            for (i = 0; i < md->name_count; i++)
996              {
997              if (GET2(slotA, 0) == refno) break;
998              slotA += md->name_entry_size;
999              }
1000    
1001            /* Found a name for the number - there can be only one; duplicate names
1002            for different numbers are allowed, but not vice versa. First scan down
1003            for duplicates. */
1004    
1005            if (i < md->name_count)
1006              {
1007              uschar *slotB = slotA;
1008              while (slotB > md->name_table)
1009                {
1010                slotB -= md->name_entry_size;
1011                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1012                  {
1013                  offset = GET2(slotB, 0) << 1;
1014                  condition = offset < offset_top &&
1015                    md->offset_vector[offset] >= 0;
1016                  if (condition) break;
1017                  }
1018                else break;
1019                }
1020    
1021              /* Scan up for duplicates */
1022    
1023              if (!condition)
1024                {
1025                slotB = slotA;
1026                for (i++; i < md->name_count; i++)
1027                  {
1028                  slotB += md->name_entry_size;
1029                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1030                    {
1031                    offset = GET2(slotB, 0) << 1;
1032                    condition = offset < offset_top &&
1033                      md->offset_vector[offset] >= 0;
1034                    if (condition) break;
1035                    }
1036                  else break;
1037                  }
1038                }
1039              }
1040            }
1041    
1042          /* Chose branch according to the condition */
1043    
1044        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1045        }        }
1046    
1047      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1048        {        {
1049        condition = FALSE;        condition = FALSE;
1050        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 723  for (;;) Line 1056  for (;;)
1056    
1057      else      else
1058        {        {
1059        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1060            match_condassert);            match_condassert, RM3);
1061        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1062          {          {
1063          condition = TRUE;          condition = TRUE;
1064          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1065          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1066          }          }
1067        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1068          {          {
1069          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1070          }          }
1071        else        else
1072          {          {
1073          condition = FALSE;          condition = FALSE;
1074          ecode += GET(ecode, 1);          ecode += codelink;
1075          }          }
1076        }        }
1077    
1078      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1079      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1080      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1081        group. If the second alternative doesn't exist, we can just plough on. */
1082    
1083      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1084        {        {
1085        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1086        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1087        goto TAIL_RECURSE;          {
1088            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1089            RRETURN(rrc);
1090            }
1091          else                       /* Group must match something */
1092            {
1093            flags = 0;
1094            goto TAIL_RECURSE;
1095            }
1096        }        }
1097      else      else                         /* Condition false & no alternative */
1098        {        {
1099        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1100        }        }
1101      break;      break;
1102    
1103    
1104      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1105      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1106    
1107        case OP_CLOSE:
1108        number = GET2(ecode, 1);
1109        offset = number << 1;
1110    
1111    #ifdef PCRE_DEBUG
1112          printf("end bracket %d at *ACCEPT", number);
1113          printf("\n");
1114    #endif
1115    
1116        md->capture_last = number;
1117        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1118          {
1119          md->offset_vector[offset] =
1120            md->offset_vector[md->offset_end - number];
1121          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1122          if (offset_top <= offset) offset_top = offset + 2;
1123          }
1124        ecode += 3;
1125        break;
1126    
1127    
1128        /* End of the pattern, either real or forced. If we are in a top-level
1129        recursion, we should restore the offsets appropriately and continue from
1130        after the call. */
1131    
1132        case OP_ACCEPT:
1133      case OP_END:      case OP_END:
1134      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1135        {        {
# Line 770  for (;;) Line 1138  for (;;)
1138        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1139        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1140          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1141        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1142        ims = original_ims;        ims = original_ims;
1143        ecode = rec->after_call;        ecode = rec->after_call;
1144        break;        break;
1145        }        }
1146    
1147      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1148      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1149        the subject. In both cases, backtracking will then try other alternatives,
1150        if any. */
1151    
1152        if (eptr == mstart &&
1153            (md->notempty ||
1154              (md->notempty_atstart &&
1155                mstart == md->start_subject + md->start_offset)))
1156          MRRETURN(MATCH_NOMATCH);
1157    
1158        /* Otherwise, we have a match. */
1159    
1160        md->end_match_ptr = eptr;           /* Record where we ended */
1161        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1162        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1163    
1164      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      /* For some reason, the macros don't work properly if an expression is
1165      md->end_match_ptr = eptr;          /* Record where we ended */      given as the argument to MRRETURN when the heap is in use. */
1166      md->end_offset_top = offset_top;   /* and how many extracts were taken */  
1167      RRETURN(MATCH_MATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1168        MRRETURN(rrc);
1169    
1170      /* Change option settings */      /* Change option settings */
1171    
# Line 802  for (;;) Line 1185  for (;;)
1185      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1186      do      do
1187        {        {
1188        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1189        if (rrc == MATCH_MATCH) break;          RM4);
1190        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1191            {
1192            mstart = md->start_match_ptr;   /* In case \K reset it */
1193            break;
1194            }
1195          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1196        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1197        }        }
1198      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1199      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1200    
1201      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1202    
# Line 822  for (;;) Line 1210  for (;;)
1210      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1211      continue;      continue;
1212    
1213      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1214        PRUNE, or COMMIT means we must assume failure without checking subsequent
1215        branches. */
1216    
1217      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1218      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1219      do      do
1220        {        {
1221        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1222        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);          RM5);
1223        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1224          if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1225            {
1226            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1227            break;
1228            }
1229          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1230        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1231        }        }
1232      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 853  for (;;) Line 1249  for (;;)
1249        while (i-- > 0)        while (i-- > 0)
1250          {          {
1251          eptr--;          eptr--;
1252          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1253          BACKCHAR(eptr)          BACKCHAR(eptr);
1254          }          }
1255        }        }
1256      else      else
# Line 864  for (;;) Line 1260  for (;;)
1260    
1261        {        {
1262        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1263        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1264        }        }
1265    
1266      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1267    
1268        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1269      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1270      break;      break;
1271    
# Line 884  for (;;) Line 1281  for (;;)
1281        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1282        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1283        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1284        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1285        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1286        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1287        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1288        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1289        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1290        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1291        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1292        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1293        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1294        }        }
1295      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 947  for (;;) Line 1344  for (;;)
1344    
1345        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1346              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1347        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1348    
1349        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1350        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 957  for (;;) Line 1353  for (;;)
1353        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1354        do        do
1355          {          {
1356          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1357            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1358          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1359            {            {
1360            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1361            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1362            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1363              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1364            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1365            }            }
1366          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1367            {            {
1368            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1369              if (new_recursive.offset_save != stacksave)
1370                (pcre_free)(new_recursive.offset_save);
1371            RRETURN(rrc);            RRETURN(rrc);
1372            }            }
1373    
# Line 984  for (;;) Line 1382  for (;;)
1382        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1383        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1384          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1385        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1386        }        }
1387      /* Control never reaches here */      /* Control never reaches here */
1388    
# Line 993  for (;;) Line 1391  for (;;)
1391      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1392      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1393      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1394      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1395        the start-of-match value in case it was changed by \K. */
1396    
1397      case OP_ONCE:      case OP_ONCE:
1398      prev = ecode;      prev = ecode;
# Line 1001  for (;;) Line 1400  for (;;)
1400    
1401      do      do
1402        {        {
1403        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1404          eptrb, 0);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1405        if (rrc == MATCH_MATCH) break;          {
1406        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1407            break;
1408            }
1409          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1410        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1411        }        }
1412      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1047  for (;;) Line 1449  for (;;)
1449    
1450      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1451        {        {
1452        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1453        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1454        ecode = prev;        ecode = prev;
1455        flags = match_tail_recursed;        flags = 0;
1456        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1457        }        }
1458      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1459        {        {
1460        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1461        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1462        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1463        flags = match_tail_recursed;        flags = 0;
1464        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1465        }        }
1466      /* Control never gets here */      /* Control never gets here */
# Line 1070  for (;;) Line 1472  for (;;)
1472      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1473      break;      break;
1474    
1475      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1476      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1477      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1478      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1479      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1480    
1481      case OP_BRAZERO:      case OP_BRAZERO:
1482        {        {
1483        next = ecode+1;        next = ecode+1;
1484        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1485        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1486        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1487        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1090  for (;;) Line 1492  for (;;)
1492        {        {
1493        next = ecode+1;        next = ecode+1;
1494        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1495        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1496        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1497        ecode++;        ecode++;
1498        }        }
1499      break;      break;
1500    
1501        case OP_SKIPZERO:
1502          {
1503          next = ecode+1;
1504          do next += GET(next,1); while (*next == OP_ALT);
1505          ecode = next + 1 + LINK_SIZE;
1506          }
1507        break;
1508    
1509      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1510    
1511      case OP_KET:      case OP_KET:
# Line 1114  for (;;) Line 1524  for (;;)
1524        }        }
1525      else saved_eptr = NULL;      else saved_eptr = NULL;
1526    
1527      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1528      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1529      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1530        it was changed by \K. */
1531    
1532      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1533          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1124  for (;;) Line 1535  for (;;)
1535        {        {
1536        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1537        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1538        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1539          MRRETURN(MATCH_MATCH);
1540        }        }
1541    
1542      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1138  for (;;) Line 1550  for (;;)
1550        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1551        offset = number << 1;        offset = number << 1;
1552    
1553  #ifdef DEBUG  #ifdef PCRE_DEBUG
1554        printf("end bracket %d", number);        printf("end bracket %d", number);
1555        printf("\n");        printf("\n");
1556  #endif  #endif
# Line 1148  for (;;) Line 1560  for (;;)
1560          {          {
1561          md->offset_vector[offset] =          md->offset_vector[offset] =
1562            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1563          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1564          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1565          }          }
1566    
# Line 1160  for (;;) Line 1572  for (;;)
1572          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1573          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1574          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         md->start_match = rec->save_start;  
1575          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1576            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1577            offset_top = rec->save_offset_top;
1578          ecode = rec->after_call;          ecode = rec->after_call;
1579          ims = original_ims;          ims = original_ims;
1580          break;          break;
# Line 1189  for (;;) Line 1601  for (;;)
1601    
1602      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1603      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1604      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1605        unlimited repeat of a group that can match an empty string. */
1606    
1607      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1608    
1609      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1610        {        {
1611        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1612        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1613          if (flags != 0)    /* Could match an empty string */
1614            {
1615            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1616            RRETURN(rrc);
1617            }
1618        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1619        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1620        }        }
1621      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1622        {        {
1623        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1624        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1625        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1626        flags = match_tail_recursed;        flags = 0;
1627        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1628        }        }
1629      /* Control never gets here */      /* Control never gets here */
# Line 1214  for (;;) Line 1631  for (;;)
1631      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1632    
1633      case OP_CIRC:      case OP_CIRC:
1634      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1635      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1636        {        {
1637        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1638            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1639          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1640        ecode++;        ecode++;
1641        break;        break;
1642        }        }
# Line 1228  for (;;) Line 1645  for (;;)
1645      /* Start of subject assertion */      /* Start of subject assertion */
1646    
1647      case OP_SOD:      case OP_SOD:
1648      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1649      ecode++;      ecode++;
1650      break;      break;
1651    
1652      /* Start of match assertion */      /* Start of match assertion */
1653    
1654      case OP_SOM:      case OP_SOM:
1655      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1656        ecode++;
1657        break;
1658    
1659        /* Reset the start of match point */
1660    
1661        case OP_SET_SOM:
1662        mstart = eptr;
1663      ecode++;      ecode++;
1664      break;      break;
1665    
# Line 1246  for (;;) Line 1670  for (;;)
1670      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1671        {        {
1672        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1673          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1674        else        else
1675          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1676        ecode++;        ecode++;
1677        break;        break;
1678        }        }
1679      else      else
1680        {        {
1681        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1682        if (!md->endonly)        if (!md->endonly)
1683          {          {
1684          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1685              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1686            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1687          ecode++;          ecode++;
1688          break;          break;
1689          }          }
# Line 1269  for (;;) Line 1693  for (;;)
1693      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1694    
1695      case OP_EOD:      case OP_EOD:
1696      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1697      ecode++;      ecode++;
1698      break;      break;
1699    
# Line 1278  for (;;) Line 1702  for (;;)
1702      case OP_EODN:      case OP_EODN:
1703      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1704          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1705        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1706      ecode++;      ecode++;
1707      break;      break;
1708    
# Line 1290  for (;;) Line 1714  for (;;)
1714    
1715        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1716        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1717        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1718          partial matching. */
1719    
1720  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1721        if (utf8)        if (utf8)
1722          {          {
1723            /* Get status of previous character */
1724    
1725          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1726            {            {
1727            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1728            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1729              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1730            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1731    #ifdef SUPPORT_UCP
1732              if (md->use_ucp)
1733                {
1734                if (c == '_') prev_is_word = TRUE; else
1735                  {
1736                  int cat = UCD_CATEGORY(c);
1737                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1738                  }
1739                }
1740              else
1741    #endif
1742            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1743            }            }
1744          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1745            /* Get status of next character */
1746    
1747            if (eptr >= md->end_subject)
1748              {
1749              SCHECK_PARTIAL();
1750              cur_is_word = FALSE;
1751              }
1752            else
1753            {            {
1754            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1755    #ifdef SUPPORT_UCP
1756              if (md->use_ucp)
1757                {
1758                if (c == '_') cur_is_word = TRUE; else
1759                  {
1760                  int cat = UCD_CATEGORY(c);
1761                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1762                  }
1763                }
1764              else
1765    #endif
1766            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1767            }            }
1768          }          }
1769        else        else
1770  #endif  #endif
1771    
1772        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1773          consistency with the behaviour of \w we do use it in this case. */
1774    
1775          {          {
1776          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1777            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1778          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1779            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1780              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1781    #ifdef SUPPORT_UCP
1782              if (md->use_ucp)
1783                {
1784                c = eptr[-1];
1785                if (c == '_') prev_is_word = TRUE; else
1786                  {
1787                  int cat = UCD_CATEGORY(c);
1788                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1789                  }
1790                }
1791              else
1792    #endif
1793              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1794              }
1795    
1796            /* Get status of next character */
1797    
1798            if (eptr >= md->end_subject)
1799              {
1800              SCHECK_PARTIAL();
1801              cur_is_word = FALSE;
1802              }
1803            else
1804    #ifdef SUPPORT_UCP
1805            if (md->use_ucp)
1806              {
1807              c = *eptr;
1808              if (c == '_') cur_is_word = TRUE; else
1809                {
1810                int cat = UCD_CATEGORY(c);
1811                cur_is_word = (cat == ucp_L || cat == ucp_N);
1812                }
1813              }
1814            else
1815    #endif
1816            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1817          }          }
1818    
1819        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1820    
1821        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1822             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1823          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1824        }        }
1825      break;      break;
1826    
1827      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1828    
1829      case OP_ANY:      case OP_ANY:
1830      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1831        /* Fall through */
1832    
1833        case OP_ALLANY:
1834        if (eptr++ >= md->end_subject)
1835        {        {
1836        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1837          MRRETURN(MATCH_NOMATCH);
1838        }        }
1839      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1840      ecode++;      ecode++;
1841      break;      break;
1842    
# Line 1345  for (;;) Line 1844  for (;;)
1844      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1845    
1846      case OP_ANYBYTE:      case OP_ANYBYTE:
1847      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1848          {
1849          SCHECK_PARTIAL();
1850          MRRETURN(MATCH_NOMATCH);
1851          }
1852      ecode++;      ecode++;
1853      break;      break;
1854    
1855      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1856      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1857          {
1858          SCHECK_PARTIAL();
1859          MRRETURN(MATCH_NOMATCH);
1860          }
1861      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1862      if (      if (
1863  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1358  for (;;) Line 1865  for (;;)
1865  #endif  #endif
1866         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1867         )         )
1868        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1869      ecode++;      ecode++;
1870      break;      break;
1871    
1872      case OP_DIGIT:      case OP_DIGIT:
1873      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1874          {
1875          SCHECK_PARTIAL();
1876          MRRETURN(MATCH_NOMATCH);
1877          }
1878      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1879      if (      if (
1880  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1371  for (;;) Line 1882  for (;;)
1882  #endif  #endif
1883         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1884         )         )
1885        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1886      ecode++;      ecode++;
1887      break;      break;
1888    
1889      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1890      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1891          {
1892          SCHECK_PARTIAL();
1893          MRRETURN(MATCH_NOMATCH);
1894          }
1895      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1896      if (      if (
1897  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1384  for (;;) Line 1899  for (;;)
1899  #endif  #endif
1900         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1901         )         )
1902        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1903      ecode++;      ecode++;
1904      break;      break;
1905    
1906      case OP_WHITESPACE:      case OP_WHITESPACE:
1907      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1908          {
1909          SCHECK_PARTIAL();
1910          MRRETURN(MATCH_NOMATCH);
1911          }
1912      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1913      if (      if (
1914  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1397  for (;;) Line 1916  for (;;)
1916  #endif  #endif
1917         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1918         )         )
1919        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1920      ecode++;      ecode++;
1921      break;      break;
1922    
1923      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1924      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1925          {
1926          SCHECK_PARTIAL();
1927          MRRETURN(MATCH_NOMATCH);
1928          }
1929      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1930      if (      if (
1931  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1410  for (;;) Line 1933  for (;;)
1933  #endif  #endif
1934         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1935         )         )
1936        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1937      ecode++;      ecode++;
1938      break;      break;
1939    
1940      case OP_WORDCHAR:      case OP_WORDCHAR:
1941      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1942          {
1943          SCHECK_PARTIAL();
1944          MRRETURN(MATCH_NOMATCH);
1945          }
1946      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1947      if (      if (
1948  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1423  for (;;) Line 1950  for (;;)
1950  #endif  #endif
1951         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1952         )         )
1953        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1954      ecode++;      ecode++;
1955      break;      break;
1956    
1957      case OP_ANYNL:      case OP_ANYNL:
1958      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1959          {
1960          SCHECK_PARTIAL();
1961          MRRETURN(MATCH_NOMATCH);
1962          }
1963      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1964      switch(c)      switch(c)
1965        {        {
1966        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1967        case 0x000d:        case 0x000d:
1968        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1969        break;        break;
1970    
1971        case 0x000a:        case 0x000a:
1972          break;
1973    
1974        case 0x000b:        case 0x000b:
1975        case 0x000c:        case 0x000c:
1976        case 0x0085:        case 0x0085:
1977        case 0x2028:        case 0x2028:
1978        case 0x2029:        case 0x2029:
1979          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1980        break;        break;
1981        }        }
1982      ecode++;      ecode++;
1983      break;      break;
1984    
1985  #ifdef SUPPORT_UCP      case OP_NOT_HSPACE:
1986      /* Check the next character by Unicode property. We will get here only      if (eptr >= md->end_subject)
1987      if the support is in the binary; otherwise a compile-time error occurs. */        {
1988          SCHECK_PARTIAL();
1989      case OP_PROP:        MRRETURN(MATCH_NOMATCH);
1990          }
1991        GETCHARINCTEST(c, eptr);
1992        switch(c)
1993          {
1994          default: break;
1995          case 0x09:      /* HT */
1996          case 0x20:      /* SPACE */
1997          case 0xa0:      /* NBSP */
1998          case 0x1680:    /* OGHAM SPACE MARK */
1999          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2000          case 0x2000:    /* EN QUAD */
2001          case 0x2001:    /* EM QUAD */
2002          case 0x2002:    /* EN SPACE */
2003          case 0x2003:    /* EM SPACE */
2004          case 0x2004:    /* THREE-PER-EM SPACE */
2005          case 0x2005:    /* FOUR-PER-EM SPACE */
2006          case 0x2006:    /* SIX-PER-EM SPACE */
2007          case 0x2007:    /* FIGURE SPACE */
2008          case 0x2008:    /* PUNCTUATION SPACE */
2009          case 0x2009:    /* THIN SPACE */
2010          case 0x200A:    /* HAIR SPACE */
2011          case 0x202f:    /* NARROW NO-BREAK SPACE */
2012          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2013          case 0x3000:    /* IDEOGRAPHIC SPACE */
2014          MRRETURN(MATCH_NOMATCH);
2015          }
2016        ecode++;
2017        break;
2018    
2019        case OP_HSPACE:
2020        if (eptr >= md->end_subject)
2021          {
2022          SCHECK_PARTIAL();
2023          MRRETURN(MATCH_NOMATCH);
2024          }
2025        GETCHARINCTEST(c, eptr);
2026        switch(c)
2027          {
2028          default: MRRETURN(MATCH_NOMATCH);
2029          case 0x09:      /* HT */
2030          case 0x20:      /* SPACE */
2031          case 0xa0:      /* NBSP */
2032          case 0x1680:    /* OGHAM SPACE MARK */
2033          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2034          case 0x2000:    /* EN QUAD */
2035          case 0x2001:    /* EM QUAD */
2036          case 0x2002:    /* EN SPACE */
2037          case 0x2003:    /* EM SPACE */
2038          case 0x2004:    /* THREE-PER-EM SPACE */
2039          case 0x2005:    /* FOUR-PER-EM SPACE */
2040          case 0x2006:    /* SIX-PER-EM SPACE */
2041          case 0x2007:    /* FIGURE SPACE */
2042          case 0x2008:    /* PUNCTUATION SPACE */
2043          case 0x2009:    /* THIN SPACE */
2044          case 0x200A:    /* HAIR SPACE */
2045          case 0x202f:    /* NARROW NO-BREAK SPACE */
2046          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2047          case 0x3000:    /* IDEOGRAPHIC SPACE */
2048          break;
2049          }
2050        ecode++;
2051        break;
2052    
2053        case OP_NOT_VSPACE:
2054        if (eptr >= md->end_subject)
2055          {
2056          SCHECK_PARTIAL();
2057          MRRETURN(MATCH_NOMATCH);
2058          }
2059        GETCHARINCTEST(c, eptr);
2060        switch(c)
2061          {
2062          default: break;
2063          case 0x0a:      /* LF */
2064          case 0x0b:      /* VT */
2065          case 0x0c:      /* FF */
2066          case 0x0d:      /* CR */
2067          case 0x85:      /* NEL */
2068          case 0x2028:    /* LINE SEPARATOR */
2069          case 0x2029:    /* PARAGRAPH SEPARATOR */
2070          MRRETURN(MATCH_NOMATCH);
2071          }
2072        ecode++;
2073        break;
2074    
2075        case OP_VSPACE:
2076        if (eptr >= md->end_subject)
2077          {
2078          SCHECK_PARTIAL();
2079          MRRETURN(MATCH_NOMATCH);
2080          }
2081        GETCHARINCTEST(c, eptr);
2082        switch(c)
2083          {
2084          default: MRRETURN(MATCH_NOMATCH);
2085          case 0x0a:      /* LF */
2086          case 0x0b:      /* VT */
2087          case 0x0c:      /* FF */
2088          case 0x0d:      /* CR */
2089          case 0x85:      /* NEL */
2090          case 0x2028:    /* LINE SEPARATOR */
2091          case 0x2029:    /* PARAGRAPH SEPARATOR */
2092          break;
2093          }
2094        ecode++;
2095        break;
2096    
2097    #ifdef SUPPORT_UCP
2098        /* Check the next character by Unicode property. We will get here only
2099        if the support is in the binary; otherwise a compile-time error occurs. */
2100    
2101        case OP_PROP:
2102      case OP_NOTPROP:      case OP_NOTPROP:
2103      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2104          {
2105          SCHECK_PARTIAL();
2106          MRRETURN(MATCH_NOMATCH);
2107          }
2108      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2109        {        {
2110        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2111    
2112        switch(ecode[1])        switch(ecode[1])
2113          {          {
2114          case PT_ANY:          case PT_ANY:
2115          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2116          break;          break;
2117    
2118          case PT_LAMP:          case PT_LAMP:
2119          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2120               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2121               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2122            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2123           break;          break;
2124    
2125          case PT_GC:          case PT_GC:
2126          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2127            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2128          break;          break;
2129    
2130          case PT_PC:          case PT_PC:
2131          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2132            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2133          break;          break;
2134    
2135          case PT_SC:          case PT_SC:
2136          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2137            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2138            break;
2139    
2140            /* These are specials */
2141    
2142            case PT_ALNUM:
2143            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2144                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2145              MRRETURN(MATCH_NOMATCH);
2146            break;
2147    
2148            case PT_SPACE:    /* Perl space */
2149            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2150                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2151                   == (op == OP_NOTPROP))
2152              MRRETURN(MATCH_NOMATCH);
2153            break;
2154    
2155            case PT_PXSPACE:  /* POSIX space */
2156            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2157                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2158                 c == CHAR_FF || c == CHAR_CR)
2159                   == (op == OP_NOTPROP))
2160              MRRETURN(MATCH_NOMATCH);
2161            break;
2162    
2163            case PT_WORD:
2164            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2165                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2166                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2167              MRRETURN(MATCH_NOMATCH);
2168          break;          break;
2169    
2170            /* This should never occur */
2171    
2172          default:          default:
2173          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2174          }          }
# Line 1499  for (;;) Line 2181  for (;;)
2181      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2182    
2183      case OP_EXTUNI:      case OP_EXTUNI:
2184      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2185          {
2186          SCHECK_PARTIAL();
2187          MRRETURN(MATCH_NOMATCH);
2188          }
2189      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2190        {        {
2191        int chartype, script;        int category = UCD_CATEGORY(c);
2192        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2193        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2194          {          {
2195          int len = 1;          int len = 1;
# Line 1512  for (;;) Line 2197  for (;;)
2197            {            {
2198            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2199            }            }
2200          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2201          if (category != ucp_M) break;          if (category != ucp_M) break;
2202          eptr += len;          eptr += len;
2203          }          }
# Line 1533  for (;;) Line 2218  for (;;)
2218      case OP_REF:      case OP_REF:
2219        {        {
2220        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2221        ecode += 3;                                 /* Advance past item */        ecode += 3;
2222    
2223          /* If the reference is unset, there are two possibilities:
2224    
2225        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
2226        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
2227        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
2228        minima. */        quantifiers with zero minima.
2229    
2230        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
2231          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
2232          md->offset_vector[offset+1] - md->offset_vector[offset];  
2233          Otherwise, set the length to the length of what was matched by the
2234          referenced subpattern. */
2235    
2236          if (offset >= offset_top || md->offset_vector[offset] < 0)
2237            length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2238          else
2239            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2240    
2241        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2242    
# Line 1571  for (;;) Line 2265  for (;;)
2265          break;          break;
2266    
2267          default:               /* No repeat follows */          default:               /* No repeat follows */
2268          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2269              {
2270              CHECK_PARTIAL();
2271              MRRETURN(MATCH_NOMATCH);
2272              }
2273          eptr += length;          eptr += length;
2274          continue;              /* With the main loop */          continue;              /* With the main loop */
2275          }          }
# Line 1587  for (;;) Line 2285  for (;;)
2285    
2286        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2287          {          {
2288          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2289              {
2290              CHECK_PARTIAL();
2291              MRRETURN(MATCH_NOMATCH);
2292              }
2293          eptr += length;          eptr += length;
2294          }          }
2295    
# Line 1602  for (;;) Line 2304  for (;;)
2304          {          {
2305          for (fi = min;; fi++)          for (fi = min;; fi++)
2306            {            {
2307            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2308            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2309            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2310              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2311                {
2312                CHECK_PARTIAL();
2313                MRRETURN(MATCH_NOMATCH);
2314                }
2315            eptr += length;            eptr += length;
2316            }            }
2317          /* Control never gets here */          /* Control never gets here */
# Line 1618  for (;;) Line 2324  for (;;)
2324          pp = eptr;          pp = eptr;
2325          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2326            {            {
2327            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2328                {
2329                CHECK_PARTIAL();
2330                break;
2331                }
2332            eptr += length;            eptr += length;
2333            }            }
2334          while (eptr >= pp)          while (eptr >= pp)
2335            {            {
2336            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2337            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2338            eptr -= length;            eptr -= length;
2339            }            }
2340          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2341          }          }
2342        }        }
2343      /* Control never gets here */      /* Control never gets here */
2344    
   
   
2345      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2346      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2347      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1688  for (;;) Line 2396  for (;;)
2396          {          {
2397          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2398            {            {
2399            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2400                {
2401                SCHECK_PARTIAL();
2402                MRRETURN(MATCH_NOMATCH);
2403                }
2404            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2405            if (c > 255)            if (c > 255)
2406              {              {
2407              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2408              }              }
2409            else            else
2410              {              {
2411              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2412              }              }
2413            }            }
2414          }          }
# Line 1706  for (;;) Line 2418  for (;;)
2418          {          {
2419          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2420            {            {
2421            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2422                {
2423                SCHECK_PARTIAL();
2424                MRRETURN(MATCH_NOMATCH);
2425                }
2426            c = *eptr++;            c = *eptr++;
2427            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2428            }            }
2429          }          }
2430    
# Line 1728  for (;;) Line 2444  for (;;)
2444            {            {
2445            for (fi = min;; fi++)            for (fi = min;; fi++)
2446              {              {
2447              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2448              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2449              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2450                if (eptr >= md->end_subject)
2451                  {
2452                  SCHECK_PARTIAL();
2453                  MRRETURN(MATCH_NOMATCH);
2454                  }
2455              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2456              if (c > 255)              if (c > 255)
2457                {                {
2458                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2459                }                }
2460              else              else
2461                {                {
2462                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2463                }                }
2464              }              }
2465            }            }
# Line 1748  for (;;) Line 2469  for (;;)
2469            {            {
2470            for (fi = min;; fi++)            for (fi = min;; fi++)
2471              {              {
2472              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2473              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2474              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2475                if (eptr >= md->end_subject)
2476                  {
2477                  SCHECK_PARTIAL();
2478                  MRRETURN(MATCH_NOMATCH);
2479                  }
2480              c = *eptr++;              c = *eptr++;
2481              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2482              }              }
2483            }            }
2484          /* Control never gets here */          /* Control never gets here */
# Line 1771  for (;;) Line 2497  for (;;)
2497            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2498              {              {
2499              int len = 1;              int len = 1;
2500              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2501                  {
2502                  SCHECK_PARTIAL();
2503                  break;
2504                  }
2505              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2506              if (c > 255)              if (c > 255)
2507                {                {
# Line 1785  for (;;) Line 2515  for (;;)
2515              }              }
2516            for (;;)            for (;;)
2517              {              {
2518              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2519              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2520              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2521              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1797  for (;;) Line 2527  for (;;)
2527            {            {
2528            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2529              {              {
2530              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2531                  {
2532                  SCHECK_PARTIAL();
2533                  break;
2534                  }
2535              c = *eptr;              c = *eptr;
2536              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2537              eptr++;              eptr++;
2538              }              }
2539            while (eptr >= pp)            while (eptr >= pp)
2540              {              {
2541              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2542              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2543              eptr--;              eptr--;
2544              }              }
2545            }            }
2546    
2547          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2548          }          }
2549        }        }
2550      /* Control never gets here */      /* Control never gets here */
2551    
2552    
2553      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2554      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2555        mode, because Unicode properties are supported in non-UTF-8 mode. */
2556    
2557  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2558      case OP_XCLASS:      case OP_XCLASS:
# Line 1858  for (;;) Line 2593  for (;;)
2593    
2594        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2595          {          {
2596          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2597          GETCHARINC(c, eptr);            {
2598          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2599              MRRETURN(MATCH_NOMATCH);
2600              }
2601            GETCHARINCTEST(c, eptr);
2602            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2603          }          }
2604    
2605        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1875  for (;;) Line 2614  for (;;)
2614          {          {
2615          for (fi = min;; fi++)          for (fi = min;; fi++)
2616            {            {
2617            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2618            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2619            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2620            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2621            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2622                SCHECK_PARTIAL();
2623                MRRETURN(MATCH_NOMATCH);
2624                }
2625              GETCHARINCTEST(c, eptr);
2626              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2627            }            }
2628          /* Control never gets here */          /* Control never gets here */
2629          }          }
# Line 1892  for (;;) Line 2636  for (;;)
2636          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2637            {            {
2638            int len = 1;            int len = 1;
2639            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2640            GETCHARLEN(c, eptr, len);              {
2641                SCHECK_PARTIAL();
2642                break;
2643                }
2644              GETCHARLENTEST(c, eptr, len);
2645            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2646            eptr += len;            eptr += len;
2647            }            }
2648          for(;;)          for(;;)
2649            {            {
2650            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2651            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2652            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2653            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2654            }            }
2655          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2656          }          }
2657    
2658        /* Control never gets here */        /* Control never gets here */
# Line 1920  for (;;) Line 2668  for (;;)
2668        length = 1;        length = 1;
2669        ecode++;        ecode++;
2670        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2671        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2672        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2673            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2674            MRRETURN(MATCH_NOMATCH);
2675            }
2676          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2677        }        }
2678      else      else
2679  #endif  #endif
2680    
2681      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2682        {        {
2683        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2684        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2685            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2686            MRRETURN(MATCH_NOMATCH);
2687            }
2688          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2689        ecode += 2;        ecode += 2;
2690        }        }
2691      break;      break;
# Line 1944  for (;;) Line 2700  for (;;)
2700        ecode++;        ecode++;
2701        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2702    
2703        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2704            {
2705            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2706            MRRETURN(MATCH_NOMATCH);
2707            }
2708    
2709        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2710        can use the fast lookup table. */        can use the fast lookup table. */
2711    
2712        if (fc < 128)        if (fc < 128)
2713          {          {
2714          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2715          }          }
2716    
2717        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 1968  for (;;) Line 2728  for (;;)
2728          if (fc != dc)          if (fc != dc)
2729            {            {
2730  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2731            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2732  #endif  #endif
2733              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2734            }            }
2735          }          }
2736        }        }
# Line 1979  for (;;) Line 2739  for (;;)
2739    
2740      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2741        {        {
2742        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2743        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2744            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2745            MRRETURN(MATCH_NOMATCH);
2746            }
2747          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2748        ecode += 2;        ecode += 2;
2749        }        }
2750      break;      break;
# Line 2033  for (;;) Line 2797  for (;;)
2797      case OP_MINQUERY:      case OP_MINQUERY:
2798      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2799      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2800    
2801      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2802      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2803      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2804    
2805      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2806    
2807      REPEATCHAR:      REPEATCHAR:
2808  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2048  for (;;) Line 2811  for (;;)
2811        length = 1;        length = 1;
2812        charptr = ecode;        charptr = ecode;
2813        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2814        ecode += length;        ecode += length;
2815    
2816        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2059  for (;;) Line 2821  for (;;)
2821  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2822          unsigned int othercase;          unsigned int othercase;
2823          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2824              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2825            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2826          else oclength = 0;          else oclength = 0;
2827  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2828    
2829          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2830            {            {
2831            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2832                memcmp(eptr, charptr, length) == 0) eptr += length;
2833  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2834            /* Need braces because of following else */            else if (oclength > 0 &&
2835            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2836                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2837    #endif  /* SUPPORT_UCP */
2838            else            else
2839              {              {
2840              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2841              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2842              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2843            }            }
2844    
2845          if (min == max) continue;          if (min == max) continue;
# Line 2086  for (;;) Line 2848  for (;;)
2848            {            {
2849            for (fi = min;; fi++)            for (fi = min;; fi++)
2850              {              {
2851              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2852              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2853              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2854              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2855                  memcmp(eptr, charptr, length) == 0) eptr += length;
2856  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2857              /* Need braces because of following else */              else if (oclength > 0 &&
2858              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2859                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2860    #endif  /* SUPPORT_UCP */
2861              else              else
2862                {                {
2863                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2864                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2865                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2866              }              }
2867            /* Control never gets here */            /* Control never gets here */
2868            }            }
# Line 2110  for (;;) Line 2872  for (;;)
2872            pp = eptr;            pp = eptr;
2873            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2874              {              {
2875              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2876              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2877  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2878              else if (oclength == 0) break;              else if (oclength > 0 &&
2879                         eptr <= md->end_subject - oclength &&
2880                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2881    #endif  /* SUPPORT_UCP */
2882              else              else
2883                {                {
2884                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2885                eptr += oclength;                break;
2886                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2887              }              }
2888    
2889            if (possessive) continue;            if (possessive) continue;
2890    
2891            for(;;)            for(;;)
2892             {              {
2893             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2894             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2895             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2896  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2897             eptr--;              eptr--;
2898             BACKCHAR(eptr);              BACKCHAR(eptr);
2899  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2900             eptr -= length;              eptr -= length;
2901  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2902             }              }
2903            }            }
2904          /* Control never gets here */          /* Control never gets here */
2905          }          }
# Line 2149  for (;;) Line 2912  for (;;)
2912  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2913    
2914      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2915        {  
2916        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2917    
2918      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2919      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2170  for (;;) Line 2931  for (;;)
2931        {        {
2932        fc = md->lcc[fc];        fc = md->lcc[fc];
2933        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2934          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2935            if (eptr >= md->end_subject)
2936              {
2937              SCHECK_PARTIAL();
2938              MRRETURN(MATCH_NOMATCH);
2939              }
2940            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2941            }
2942        if (min == max) continue;        if (min == max) continue;
2943        if (minimize)        if (minimize)
2944          {          {
2945          for (fi = min;; fi++)          for (fi = min;; fi++)
2946            {            {
2947            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2948            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2949            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2950                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2951              RRETURN(MATCH_NOMATCH);              {
2952                SCHECK_PARTIAL();
2953                MRRETURN(MATCH_NOMATCH);
2954                }
2955              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2956            }            }
2957          /* Control never gets here */          /* Control never gets here */
2958          }          }
# Line 2189  for (;;) Line 2961  for (;;)
2961          pp = eptr;          pp = eptr;
2962          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2963            {            {
2964            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2965                {
2966                SCHECK_PARTIAL();
2967                break;
2968                }
2969              if (fc != md->lcc[*eptr]) break;
2970            eptr++;            eptr++;
2971            }            }
2972    
2973          if (possessive) continue;          if (possessive) continue;
2974    
2975          while (eptr >= pp)          while (eptr >= pp)
2976            {            {
2977            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2978            eptr--;            eptr--;
2979            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2980            }            }
2981          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2982          }          }
2983        /* Control never gets here */        /* Control never gets here */
2984        }        }
# Line 2208  for (;;) Line 2987  for (;;)
2987    
2988      else      else
2989        {        {
2990        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2991            {
2992            if (eptr >= md->end_subject)
2993              {
2994              SCHECK_PARTIAL();
2995              MRRETURN(MATCH_NOMATCH);
2996              }
2997            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2998            }
2999    
3000        if (min == max) continue;        if (min == max) continue;
3001    
3002        if (minimize)        if (minimize)
3003          {          {
3004          for (fi = min;; fi++)          for (fi = min;; fi++)
3005            {            {
3006            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3007            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3008            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3009              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3010                {
3011                SCHECK_PARTIAL();
3012                MRRETURN(MATCH_NOMATCH);
3013                }
3014              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3015            }            }
3016          /* Control never gets here */          /* Control never gets here */
3017          }          }
# Line 2226  for (;;) Line 3020  for (;;)
3020          pp = eptr;          pp = eptr;
3021          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3022            {            {
3023            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3024                {
3025                SCHECK_PARTIAL();
3026                break;
3027                }
3028              if (fc != *eptr) break;
3029            eptr++;            eptr++;
3030            }            }
3031          if (possessive) continue;          if (possessive) continue;
3032    
3033          while (eptr >= pp)          while (eptr >= pp)
3034            {            {
3035            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3036            eptr--;            eptr--;
3037            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3038            }            }
3039          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3040          }          }
3041        }        }
3042      /* Control never gets here */      /* Control never gets here */
# Line 2245  for (;;) Line 3045  for (;;)
3045      checking can be multibyte. */      checking can be multibyte. */
3046    
3047      case OP_NOT:      case OP_NOT:
3048      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3049          {
3050          SCHECK_PARTIAL();
3051          MRRETURN(MATCH_NOMATCH);
3052          }
3053      ecode++;      ecode++;
3054      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3055      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2254  for (;;) Line 3058  for (;;)
3058        if (c < 256)        if (c < 256)
3059  #endif  #endif
3060        c = md->lcc[c];        c = md->lcc[c];
3061        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3062        }        }
3063      else      else
3064        {        {
3065        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3066        }        }
3067      break;      break;
3068    
# Line 2322  for (;;) Line 3126  for (;;)
3126      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3127      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3128    
3129      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3130    
3131      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3132      fc = *ecode++;      fc = *ecode++;
3133    
3134      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2352  for (;;) Line 3153  for (;;)
3153          register unsigned int d;          register unsigned int d;
3154          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3155            {            {
3156              if (eptr >= md->end_subject)
3157                {
3158                SCHECK_PARTIAL();
3159                MRRETURN(MATCH_NOMATCH);
3160                }
3161            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3162            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3163            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3164            }            }
3165          }          }
3166        else        else
# Line 2363  for (;;) Line 3169  for (;;)
3169        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3170          {          {
3171          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3172            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3173              if (eptr >= md->end_subject)
3174                {
3175                SCHECK_PARTIAL();
3176                MRRETURN(MATCH_NOMATCH);
3177                }
3178              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3179              }
3180          }          }
3181    
3182        if (min == max) continue;        if (min == max) continue;
# Line 2377  for (;;) Line 3190  for (;;)
3190            register unsigned int d;            register unsigned int d;
3191            for (fi = min;; fi++)            for (fi = min;; fi++)
3192              {              {
3193              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3194              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3195                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3196                if (eptr >= md->end_subject)
3197                  {
3198                  SCHECK_PARTIAL();
3199                  MRRETURN(MATCH_NOMATCH);
3200                  }
3201              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3202              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3203              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3204              }              }
3205            }            }
3206          else          else
# Line 2391  for (;;) Line 3209  for (;;)
3209            {            {
3210            for (fi = min;; fi++)            for (fi = min;; fi++)
3211              {              {
3212              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3213              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3214              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3215                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3216                  {
3217                  SCHECK_PARTIAL();
3218                  MRRETURN(MATCH_NOMATCH);
3219                  }
3220                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3221              }              }
3222            }            }
3223          /* Control never gets here */          /* Control never gets here */
# Line 2414  for (;;) Line 3237  for (;;)
3237            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3238              {              {
3239              int len = 1;              int len = 1;
3240              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3241                  {
3242                  SCHECK_PARTIAL();
3243                  break;
3244                  }
3245              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3246              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3247              if (fc == d) break;              if (fc == d) break;
# Line 2423  for (;;) Line 3250  for (;;)
3250          if (possessive) continue;          if (possessive) continue;
3251          for(;;)          for(;;)
3252              {              {
3253              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3254              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3255              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3256              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2435  for (;;) Line 3262  for (;;)
3262            {            {
3263            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3264              {              {
3265              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3266                  {
3267                  SCHECK_PARTIAL();
3268                  break;
3269                  }
3270                if (fc == md->lcc[*eptr]) break;
3271              eptr++;              eptr++;
3272              }              }
3273            if (possessive) continue;            if (possessive) continue;
3274            while (eptr >= pp)            while (eptr >= pp)
3275              {              {
3276              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3277              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3278              eptr--;              eptr--;
3279              }              }
3280            }            }
3281    
3282          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3283          }          }
3284        /* Control never gets here */        /* Control never gets here */
3285        }        }
# Line 2463  for (;;) Line 3295  for (;;)
3295          register unsigned int d;          register unsigned int d;
3296          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3297            {            {
3298              if (eptr >= md->end_subject)
3299                {
3300                SCHECK_PARTIAL();
3301                MRRETURN(MATCH_NOMATCH);
3302                }
3303            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3304            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3305            }            }
3306          }          }
3307        else        else
# Line 2472  for (;;) Line 3309  for (;;)
3309        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3310          {          {
3311          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3312            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3313              if (eptr >= md->end_subject)
3314                {
3315                SCHECK_PARTIAL();
3316                MRRETURN(MATCH_NOMATCH);
3317                }
3318              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3319              }
3320          }          }
3321    
3322        if (min == max) continue;        if (min == max) continue;
# Line 2486  for (;;) Line 3330  for (;;)
3330            register unsigned int d;            register unsigned int d;
3331            for (fi = min;; fi++)            for (fi = min;; fi++)
3332              {              {
3333              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3334              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3335                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3336                if (eptr >= md->end_subject)
3337                  {
3338                  SCHECK_PARTIAL();
3339                  MRRETURN(MATCH_NOMATCH);
3340                  }
3341              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3342              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3343              }              }
3344            }            }
3345          else          else
# Line 2499  for (;;) Line 3348  for (;;)
3348            {            {
3349            for (fi = min;; fi++)            for (fi = min;; fi++)
3350              {              {
3351              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3352              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3353              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3354                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3355                  {
3356                  SCHECK_PARTIAL();
3357                  MRRETURN(MATCH_NOMATCH);
3358                  }
3359                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3360              }              }
3361            }            }
3362          /* Control never gets here */          /* Control never gets here */
# Line 2522  for (;;) Line 3376  for (;;)
3376            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3377              {              {
3378              int len = 1;              int len = 1;
3379              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3380                  {
3381                  SCHECK_PARTIAL();
3382                  break;
3383                  }
3384              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3385              if (fc == d) break;              if (fc == d) break;
3386              eptr += len;              eptr += len;
# Line 2530  for (;;) Line 3388  for (;;)
3388            if (possessive) continue;            if (possessive) continue;
3389            for(;;)            for(;;)
3390              {              {
3391              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3392              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3393              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3394              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2542  for (;;) Line 3400  for (;;)
3400            {            {
3401            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3402              {              {
3403              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3404                  {
3405                  SCHECK_PARTIAL();
3406                  break;
3407                  }
3408                if (fc == *eptr) break;
3409              eptr++;              eptr++;
3410              }              }
3411            if (possessive) continue;            if (possessive) continue;
3412            while (eptr >= pp)            while (eptr >= pp)
3413              {              {
3414              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3415              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3416              eptr--;              eptr--;
3417              }              }
3418            }            }
3419    
3420          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3421          }          }
3422        }        }
3423      /* Control never gets here */      /* Control never gets here */
# Line 2636  for (;;) Line 3499  for (;;)
3499    
3500      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3501      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3502      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3503      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3504      and single-bytes. */      and single-bytes. */
3505    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3506      if (min > 0)      if (min > 0)
3507        {        {
3508  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2651  for (;;) Line 3511  for (;;)
3511          switch(prop_type)          switch(prop_type)
3512            {            {
3513            case PT_ANY:            case PT_ANY:
3514            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3515            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3516              {              {
3517              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3518              GETCHARINC(c, eptr);                {
3519                  SCHECK_PARTIAL();
3520                  MRRETURN(MATCH_NOMATCH);
3521                  }
3522                GETCHARINCTEST(c, eptr);
3523              }              }
3524            break;            break;
3525    
3526            case PT_LAMP:            case PT_LAMP:
3527            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3528              {              {
3529              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3530              GETCHARINC(c, eptr);                {
3531              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3532                  MRRETURN(MATCH_NOMATCH);
3533                  }
3534                GETCHARINCTEST(c, eptr);
3535                prop_chartype = UCD_CHARTYPE(c);
3536              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3537                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3538                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3539                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3540              }              }
3541            break;            break;
3542    
3543            case PT_GC:            case PT_GC:
3544            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3545              {              {
3546              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3547              GETCHARINC(c, eptr);                {
3548              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3549                  MRRETURN(MATCH_NOMATCH);
3550                  }
3551                GETCHARINCTEST(c, eptr);
3552                prop_category = UCD_CATEGORY(c);
3553              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3554                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3555              }              }
3556            break;            break;
3557    
3558            case PT_PC:            case PT_PC:
3559            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3560              {              {
3561              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3562              GETCHARINC(c, eptr);                {
3563              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3564                  MRRETURN(MATCH_NOMATCH);
3565                  }
3566                GETCHARINCTEST(c, eptr);
3567                prop_chartype = UCD_CHARTYPE(c);
3568              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3569                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3570              }              }
3571            break;            break;
3572    
3573            case PT_SC:            case PT_SC:
3574            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3575              {              {
3576              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3577              GETCHARINC(c, eptr);                {
3578              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3579                  MRRETURN(MATCH_NOMATCH);
3580                  }
3581                GETCHARINCTEST(c, eptr);
3582                prop_script = UCD_SCRIPT(c);
3583              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3584                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3585                }
3586              break;
3587    
3588              case PT_ALNUM:
3589              for (i = 1; i <= min; i++)
3590                {
3591                if (eptr >= md->end_subject)
3592                  {
3593                  SCHECK_PARTIAL();
3594                  MRRETURN(MATCH_NOMATCH);
3595                  }
3596                GETCHARINCTEST(c, eptr);
3597                prop_category = UCD_CATEGORY(c);
3598                if ((prop_category == ucp_L || prop_category == ucp_N)
3599                       == prop_fail_result)
3600                  MRRETURN(MATCH_NOMATCH);
3601                }
3602              break;
3603    
3604              case PT_SPACE:    /* Perl space */
3605              for (i = 1; i <= min; i++)
3606                {
3607                if (eptr >= md->end_subject)
3608                  {
3609                  SCHECK_PARTIAL();
3610                  MRRETURN(MATCH_NOMATCH);
3611                  }
3612                GETCHARINCTEST(c, eptr);
3613                prop_category = UCD_CATEGORY(c);
3614                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3615                     c == CHAR_FF || c == CHAR_CR)
3616                       == prop_fail_result)
3617                  MRRETURN(MATCH_NOMATCH);
3618                }
3619              break;
3620    
3621              case PT_PXSPACE:  /* POSIX space */
3622              for (i = 1; i <= min; i++)
3623                {
3624                if (eptr >= md->end_subject)
3625                  {
3626                  SCHECK_PARTIAL();
3627                  MRRETURN(MATCH_NOMATCH);
3628                  }
3629                GETCHARINCTEST(c, eptr);
3630                prop_category = UCD_CATEGORY(c);
3631                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3632                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3633                       == prop_fail_result)
3634                  MRRETURN(MATCH_NOMATCH);
3635                }
3636              break;
3637    
3638              case PT_WORD:
3639              for (i = 1; i <= min; i++)
3640                {
3641                if (eptr >= md->end_subject)
3642                  {
3643                  SCHECK_PARTIAL();
3644                  MRRETURN(MATCH_NOMATCH);
3645                  }
3646                GETCHARINCTEST(c, eptr);
3647                prop_category = UCD_CATEGORY(c);
3648                if ((prop_category == ucp_L || prop_category == ucp_N ||
3649                     c == CHAR_UNDERSCORE)
3650                       == prop_fail_result)
3651                  MRRETURN(MATCH_NOMATCH);
3652              }              }
3653            break;            break;
3654    
3655              /* This should not occur */
3656    
3657            default:            default:
3658            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3659            }            }
# Line 2717  for (;;) Line 3666  for (;;)
3666          {          {
3667          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3668            {            {
3669              if (eptr >= md->end_subject)
3670                {
3671                SCHECK_PARTIAL();
3672                MRRETURN(MATCH_NOMATCH);
3673                }
3674            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3675            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3676            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3677            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3678              {              {
3679              int len = 1;              int len = 1;
3680              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3681                {                else { GETCHARLEN(c, eptr, len); }
3682                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3683              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3684              eptr += len;              eptr += len;
3685              }              }
# Line 2745  for (;;) Line 3697  for (;;)
3697          case OP_ANY:          case OP_ANY:
3698          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3699            {            {
3700            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3701                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3702              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3703                MRRETURN(MATCH_NOMATCH);
3704                }
3705              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3706              eptr++;
3707              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3708              }
3709            break;
3710    
3711            case OP_ALLANY:
3712            for (i = 1; i <= min; i++)
3713              {
3714              if (eptr >= md->end_subject)
3715                {
3716                SCHECK_PARTIAL();
3717                MRRETURN(MATCH_NOMATCH);
3718                }
3719            eptr++;            eptr++;
3720            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3721            }            }
3722          break;          break;
3723    
3724          case OP_ANYBYTE:          case OP_ANYBYTE:
3725            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3726          eptr += min;          eptr += min;
3727          break;          break;
3728    
3729          case OP_ANYNL:          case OP_ANYNL:
3730          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3731            {            {
3732            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3733                {
3734                SCHECK_PARTIAL();
3735                MRRETURN(MATCH_NOMATCH);
3736                }
3737            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3738            switch(c)            switch(c)
3739              {              {
3740              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3741              case 0x000d:              case 0x000d:
3742              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3743              break;              break;
3744    
3745              case 0x000a:              case 0x000a:
3746                break;
3747    
3748              case 0x000b:              case 0x000b:
3749              case 0x000c:              case 0x000c:
3750              case 0x0085:              case 0x0085:
3751              case 0x2028:              case 0x2028:
3752              case 0x2029:              case 0x2029:
3753                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3754              break;              break;
3755              }              }
3756            }            }
3757          break;          break;
3758    
3759          case OP_NOT_DIGIT:          case OP_NOT_HSPACE:
3760          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3761            {            {
3762            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3763                {
3764                SCHECK_PARTIAL();
3765                MRRETURN(MATCH_NOMATCH);
3766                }
3767              GETCHARINC(c, eptr);
3768              switch(c)
3769                {
3770                default: break;
3771                case 0x09:      /* HT */
3772                case 0x20:      /* SPACE */
3773                case 0xa0:      /* NBSP */
3774                case 0x1680:    /* OGHAM SPACE MARK */
3775                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3776                case 0x2000:    /* EN QUAD */
3777                case 0x2001:    /* EM QUAD */
3778                case 0x2002:    /* EN SPACE */
3779                case 0x2003:    /* EM SPACE */
3780                case 0x2004:    /* THREE-PER-EM SPACE */
3781                case 0x2005:    /* FOUR-PER-EM SPACE */
3782                case 0x2006:    /* SIX-PER-EM SPACE */
3783                case 0x2007:    /* FIGURE SPACE */
3784                case 0x2008:    /* PUNCTUATION SPACE */
3785                case 0x2009:    /* THIN SPACE */
3786                case 0x200A:    /* HAIR SPACE */
3787                case 0x202f:    /* NARROW NO-BREAK SPACE */
3788                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3789                case 0x3000:    /* IDEOGRAPHIC SPACE */
3790                MRRETURN(MATCH_NOMATCH);
3791                }
3792              }
3793            break;
3794    
3795            case OP_HSPACE:
3796            for (i = 1; i <= min; i++)
3797              {
3798              if (eptr >= md->end_subject)
3799                {
3800                SCHECK_PARTIAL();
3801                MRRETURN(MATCH_NOMATCH);
3802                }
3803              GETCHARINC(c, eptr);
3804              switch(c)
3805                {
3806                default: MRRETURN(MATCH_NOMATCH);
3807                case 0x09:      /* HT */
3808                case 0x20:      /* SPACE */
3809                case 0xa0:      /* NBSP */
3810                case 0x1680:    /* OGHAM SPACE MARK */
3811                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3812                case 0x2000:    /* EN QUAD */
3813                case 0x2001:    /* EM QUAD */
3814                case 0x2002:    /* EN SPACE */
3815                case 0x2003:    /* EM SPACE */
3816                case 0x2004:    /* THREE-PER-EM SPACE */
3817                case 0x2005:    /* FOUR-PER-EM SPACE */
3818                case 0x2006:    /* SIX-PER-EM SPACE */
3819                case 0x2007:    /* FIGURE SPACE */
3820                case 0x2008:    /* PUNCTUATION SPACE */
3821                case 0x2009:    /* THIN SPACE */
3822                case 0x200A:    /* HAIR SPACE */
3823                case 0x202f:    /* NARROW NO-BREAK SPACE */
3824                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3825                case 0x3000:    /* IDEOGRAPHIC SPACE */
3826                break;
3827                }
3828              }
3829            break;
3830    
3831            case OP_NOT_VSPACE:
3832            for (i = 1; i <= min; i++)
3833              {
3834              if (eptr >= md->end_subject)
3835                {
3836                SCHECK_PARTIAL();
3837                MRRETURN(MATCH_NOMATCH);
3838                }
3839              GETCHARINC(c, eptr);
3840              switch(c)
3841                {
3842                default: break;
3843                case 0x0a:      /* LF */
3844                case 0x0b:      /* VT */
3845                case 0x0c:      /* FF */
3846                case 0x0d:      /* CR */
3847                case 0x85:      /* NEL */
3848                case 0x2028:    /* LINE SEPARATOR */
3849                case 0x2029:    /* PARAGRAPH SEPARATOR */
3850                MRRETURN(MATCH_NOMATCH);
3851                }
3852              }
3853            break;
3854    
3855            case OP_VSPACE:
3856            for (i = 1; i <= min; i++)
3857              {
3858              if (eptr >= md->end_subject)
3859                {
3860                SCHECK_PARTIAL();
3861                MRRETURN(MATCH_NOMATCH);
3862                }
3863              GETCHARINC(c, eptr);
3864              switch(c)
3865                {
3866                default: MRRETURN(MATCH_NOMATCH);
3867                case 0x0a:      /* LF */
3868                case 0x0b:      /* VT */
3869                case 0x0c:      /* FF */
3870                case 0x0d:      /* CR */
3871                case 0x85:      /* NEL */
3872                case 0x2028:    /* LINE SEPARATOR */
3873                case 0x2029:    /* PARAGRAPH SEPARATOR */
3874                break;
3875                }
3876              }
3877            break;
3878    
3879            case OP_NOT_DIGIT:
3880            for (i = 1; i <= min; i++)
3881              {
3882              if (eptr >= md->end_subject)
3883                {
3884                SCHECK_PARTIAL();
3885                MRRETURN(MATCH_NOMATCH);
3886                }
3887            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3888            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3889              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3890            }            }
3891          break;          break;
3892    
3893          case OP_DIGIT:          case OP_DIGIT:
3894          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3895            {            {
3896            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3897               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3898              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3899                MRRETURN(MATCH_NOMATCH);
3900                }
3901              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3902                MRRETURN(MATCH_NOMATCH);
3903            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3904            }            }
3905          break;          break;
# Line 2802  for (;;) Line 3907  for (;;)
3907          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3908          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3909            {            {
3910            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3911               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3912              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3913            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3914                }
3915              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3916                MRRETURN(MATCH_NOMATCH);
3917              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3918            }            }
3919          break;          break;
3920    
3921          case OP_WHITESPACE:          case OP_WHITESPACE:
3922          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3923            {            {
3924            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3925               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3926              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3927                MRRETURN(MATCH_NOMATCH);
3928                }
3929              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3930                MRRETURN(MATCH_NOMATCH);
3931            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3932            }            }
3933          break;          break;
# Line 2822  for (;;) Line 3935  for (;;)
3935          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3936          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3937            {            {
3938            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3939               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3940              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3941            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3942                }
3943              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3944                MRRETURN(MATCH_NOMATCH);
3945              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3946            }            }
3947          break;          break;
3948    
3949          case OP_WORDCHAR:          case OP_WORDCHAR:
3950          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3951            {            {
3952            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3953               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3954              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3955                MRRETURN(MATCH_NOMATCH);
3956                }
3957              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3958                MRRETURN(MATCH_NOMATCH);
3959            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3960            }            }
3961          break;          break;
# Line 2847  for (;;) Line 3968  for (;;)
3968  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3969    
3970        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3971        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3972    
3973        switch(ctype)        switch(ctype)
3974          {          {
3975          case OP_ANY:          case OP_ANY:
3976          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3977            {            {
3978            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3979              {              {
3980              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3981              eptr++;              MRRETURN(MATCH_NOMATCH);
3982              }              }
3983              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3984              eptr++;
3985            }            }
         else eptr += min;  
3986          break;          break;
3987    
3988          case OP_ANYBYTE:          case OP_ALLANY:
3989            if (eptr > md->end_subject - min)
3990              {
3991              SCHECK_PARTIAL();
3992              MRRETURN(MATCH_NOMATCH);
3993              }
3994          eptr += min;          eptr += min;
3995          break;          break;
3996    
3997          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
3998          bytes are present in this case. */          if (eptr > md->end_subject - min)
3999              {
4000              SCHECK_PARTIAL();
4001              MRRETURN(MATCH_NOMATCH);
4002              }
4003            eptr += min;
4004            break;
4005    
4006          case OP_ANYNL:          case OP_ANYNL:
4007          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4008            {            {
4009            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4010                {
4011                SCHECK_PARTIAL();
4012                MRRETURN(MATCH_NOMATCH);
4013                }
4014            switch(*eptr++)            switch(*eptr++)
4015              {              {
4016              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4017              case 0x000d:              case 0x000d:
4018              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4019              break;              break;
4020              case 0x000a:              case 0x000a:
4021                break;
4022    
4023              case 0x000b:              case 0x000b:
4024              case 0x000c:              case 0x000c:
4025              case 0x0085:              case 0x0085:
4026                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4027                break;
4028                }
4029              }
4030            break;
4031    
4032            case OP_NOT_HSPACE:
4033            for (i = 1; i <= min; i++)
4034              {
4035              if (eptr >= md->end_subject)
4036                {
4037                SCHECK_PARTIAL();
4038                MRRETURN(MATCH_NOMATCH);
4039                }
4040              switch(*eptr++)
4041                {
4042                default: break;
4043                case 0x09:      /* HT */
4044                case 0x20:      /* SPACE */
4045                case 0xa0:      /* NBSP */
4046                MRRETURN(MATCH_NOMATCH);
4047                }
4048              }
4049            break;
4050    
4051            case OP_HSPACE:
4052            for (i = 1; i <= min; i++)
4053              {
4054              if (eptr >= md->end_subject)
4055                {
4056                SCHECK_PARTIAL();
4057                MRRETURN(MATCH_NOMATCH);
4058                }
4059              switch(*eptr++)
4060                {
4061                default: MRRETURN(MATCH_NOMATCH);
4062                case 0x09:      /* HT */
4063                case 0x20:      /* SPACE */
4064                case 0xa0:      /* NBSP */
4065                break;
4066                }
4067              }
4068            break;
4069    
4070            case OP_NOT_VSPACE:
4071            for (i = 1; i <= min; i++)
4072              {
4073              if (eptr >= md->end_subject)
4074                {
4075                SCHECK_PARTIAL();
4076                MRRETURN(MATCH_NOMATCH);
4077                }
4078              switch(*eptr++)
4079                {
4080                default: break;
4081                case 0x0a:      /* LF */
4082                case 0x0b:      /* VT */
4083                case 0x0c:      /* FF */
4084                case 0x0d:      /* CR */
4085                case 0x85:      /* NEL */
4086                MRRETURN(MATCH_NOMATCH);
4087                }
4088              }
4089            break;
4090    
4091            case OP_VSPACE:
4092            for (i = 1; i <= min; i++)
4093              {
4094              if (eptr >= md->end_subject)
4095                {
4096                SCHECK_PARTIAL();
4097                MRRETURN(MATCH_NOMATCH);
4098                }
4099              switch(*eptr++)
4100                {
4101                default: MRRETURN(MATCH_NOMATCH);
4102                case 0x0a:      /* LF */
4103                case 0x0b:      /* VT */
4104                case 0x0c:      /* FF */
4105                case 0x0d:      /* CR */
4106                case 0x85:      /* NEL */
4107              break;              break;
4108              }              }
4109            }            }
# Line 2892  for (;;) Line 4111  for (;;)
4111    
4112          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4113          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4114            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            {
4115              if (eptr >= md->end_subject)
4116                {
4117                SCHECK_PARTIAL();
4118                MRRETURN(MATCH_NOMATCH);
4119                }
4120              if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4121              }
4122          break;          break;
4123    
4124          case OP_DIGIT:          case OP_DIGIT:
4125          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4126            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            {
4127              if (eptr >= md->end_subject)
4128                {
4129                SCHECK_PARTIAL();
4130                MRRETURN(MATCH_NOMATCH);
4131                }
4132              if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4133              }
4134          break;          break;
4135    
4136          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4137          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4138            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            {
4139              if (eptr >= md->end_subject)
4140                {
4141                SCHECK_PARTIAL();
4142                MRRETURN(MATCH_NOMATCH);
4143                }
4144              if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4145              }
4146          break;          break;
4147    
4148          case OP_WHITESPACE:          case OP_WHITESPACE:
4149          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4150            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            {
4151              if (eptr >= md->end_subject)
4152                {
4153                SCHECK_PARTIAL();
4154                MRRETURN(MATCH_NOMATCH);
4155                }
4156              if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4157              }
4158          break;          break;
4159    
4160          case OP_NOT_WORDCHAR: