/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 164 by ph10, Fri May 4 15:11:14 2007 UTC revision 595 by ph10, Mon May 2 10:33:29 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 83  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 116  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    ims         the ims flags
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    unsigned long int ims)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
159    printf("matching subject <null>");    printf("matching subject <null>");
160  else  else
# Line 148  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175    properly if Unicode properties are supported. Otherwise, we can check only
176    ASCII characters. */
177    
178  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
179    {    {
180    #ifdef SUPPORT_UTF8
181    #ifdef SUPPORT_UCP
182      if (md->utf8)
183        {
184        /* Match characters up to the end of the reference. NOTE: the number of
185        bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194          {
195          int c, d;
196          GETCHARINC(c, eptr);
197          GETCHARINC(d, p);
198          if (c != d && c != UCD_OTHERCASE(d)) return -1;
199          }
200        }
201      else
202    #endif
203    #endif
204    
205      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206      is no UCP support. */
207    
208    while (length-- > 0)    while (length-- > 0)
209      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
210    }    }
211    
212    /* In the caseful case, we can just compare the bytes, whether or not we
213    are in UTF-8 mode. */
214    
215  else  else
216    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return -1; }
217    
218  return TRUE;  return eptr - eptr_start;
219  }  }
220    
221    
# Line 189  obtained from malloc() instead instead o Line 242  obtained from malloc() instead instead o
242  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
243  always used to.  always used to.
244    
245  The original heap-recursive code used longjmp(). However, it seems that this  The original heap-recursive code used longjmp(). However, it seems that this
246  can be very slow on some operating systems. Following a suggestion from Stan  can be very slow on some operating systems. Following a suggestion from Stan
247  Switzer, the use of longjmp() has been abolished, at the cost of having to  Switzer, the use of longjmp() has been abolished, at the cost of having to
248  provide a unique number for each call to RMATCH. There is no way of generating  provide a unique number for each call to RMATCH. There is no way of generating
# Line 198  them stand out more clearly. Line 251  them stand out more clearly.
251    
252  Crude tests on x86 Linux show a small speedup of around 5-8%. However, on  Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
253  FreeBSD, avoiding longjmp() more than halves the time taken to run the standard  FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
254  tests. Furthermore, not using longjmp() means that local dynamic variables  tests. Furthermore, not using longjmp() means that local dynamic variables
255  don't have indeterminate values; this has meant that the frame size can be  don't have indeterminate values; this has meant that the frame size can be
256  reduced because the result can be "passed back" by straight setting of the  reduced because the result can be "passed back" by straight setting of the
257  variable instead of being passed in the frame.  variable instead of being passed in the frame.
258  ****************************************************************************  ****************************************************************************
259  ***************************************************************************/  ***************************************************************************/
260    
261    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
262  /* Numbers for RMATCH calls */  below must be updated in sync.  */
263    
264  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
265         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
266         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
267         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
268         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
269           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
270           RM61,  RM62 };
271    
272  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
273  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
274  actuall used in this definition. */  actually used in this definition. */
275    
276  #ifndef NO_RECURSE  #ifndef NO_RECURSE
277  #define REGISTER register  #define REGISTER register
278    
279  #ifdef DEBUG  #ifdef PCRE_DEBUG
280  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281    { \    { \
282    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
283    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
284    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
285    }    }
286  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 236  actuall used in this definition. */ Line 290  actuall used in this definition. */
290    }    }
291  #else  #else
292  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
293    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
294  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
295  #endif  #endif
296    
# Line 251  argument of match(), which never changes Line 305  argument of match(), which never changes
305    
306  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
307    {\    {\
308    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
309      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
310    frame->Xwhere = rw; \    frame->Xwhere = rw; \
311    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
312    newframe->Xecode = rb;\    newframe->Xecode = rb;\
313      newframe->Xmstart = mstart;\
314      newframe->Xmarkptr = markptr;\
315    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
316    newframe->Xims = re;\    newframe->Xims = re;\
317    newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
# Line 270  argument of match(), which never changes Line 327  argument of match(), which never changes
327    
328  #define RRETURN(ra)\  #define RRETURN(ra)\
329    {\    {\
330    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
331    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
332    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
333    if (frame != NULL)\    if (frame != NULL)\
334      {\      {\
335      rrc = ra;\      rrc = ra;\
# Line 289  typedef struct heapframe { Line 346  typedef struct heapframe {
346    
347    /* Function arguments that may change */    /* Function arguments that may change */
348    
349    const uschar *Xeptr;    USPTR Xeptr;
350    const uschar *Xecode;    const uschar *Xecode;
351      USPTR Xmstart;
352      USPTR Xmarkptr;
353    int Xoffset_top;    int Xoffset_top;
354    long int Xims;    long int Xims;
355    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 299  typedef struct heapframe { Line 358  typedef struct heapframe {
358    
359    /* Function local variables */    /* Function local variables */
360    
361    const uschar *Xcallpat;    USPTR Xcallpat;
362    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
363    const uschar *Xdata;    USPTR Xcharptr;
364    const uschar *Xnext;  #endif
365    const uschar *Xpp;    USPTR Xdata;
366    const uschar *Xprev;    USPTR Xnext;
367    const uschar *Xsaved_eptr;    USPTR Xpp;
368      USPTR Xprev;
369      USPTR Xsaved_eptr;
370    
371    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
372    
# Line 326  typedef struct heapframe { Line 387  typedef struct heapframe {
387    uschar Xocchars[8];    uschar Xocchars[8];
388  #endif  #endif
389    
390      int Xcodelink;
391    int Xctype;    int Xctype;
392    unsigned int Xfc;    unsigned int Xfc;
393    int Xfi;    int Xfi;
# Line 344  typedef struct heapframe { Line 406  typedef struct heapframe {
406    /* Where to jump back to */    /* Where to jump back to */
407    
408    int Xwhere;    int Xwhere;
409    
410  } heapframe;  } heapframe;
411    
412  #endif  #endif
# Line 361  typedef struct heapframe { Line 423  typedef struct heapframe {
423    
424  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
425  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
426  same response.  same response. */
427    
428    /* These macros pack up tests that are used for partial matching, and which
429    appears several times in the code. We set the "hit end" flag if the pointer is
430    at the end of the subject and also past the start of the subject (i.e.
431    something has been matched). For hard partial matching, we then return
432    immediately. The second one is used when we already know we are past the end of
433    the subject. */
434    
435    #define CHECK_PARTIAL()\
436      if (md->partial != 0 && eptr >= md->end_subject && \
437          eptr > md->start_used_ptr) \
438        { \
439        md->hitend = TRUE; \
440        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
441        }
442    
443    #define SCHECK_PARTIAL()\
444      if (md->partial != 0 && eptr > md->start_used_ptr) \
445        { \
446        md->hitend = TRUE; \
447        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
448        }
449    
450    
451  Performance note: It might be tempting to extract commonly used fields from the  /* Performance note: It might be tempting to extract commonly used fields from
452  md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf8, end_subject) into individual variables to improve
453  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
454  made performance worse.  made performance worse.
455    
456  Arguments:  Arguments:
457     eptr        pointer to current character in subject     eptr        pointer to current character in subject
458     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
459       mstart      pointer to the current match start position (can be modified
460                     by encountering \K)
461       markptr     pointer to the most recent MARK name, or NULL
462     offset_top  current top pointer     offset_top  current top pointer
463     md          pointer to "static" info for the match     md          pointer to "static" info for the match
464     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 380  Arguments: Line 468  Arguments:
468                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
469                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
470                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
471     rdepth      the recursion depth     rdepth      the recursion depth
472    
473  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
474                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
475                   a negative MATCH_xxx value for PRUNE, SKIP, etc
476                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
477                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
478  */  */
479    
480  static int  static int
481  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
482    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
483    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
484  {  {
485  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
486  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 404  register unsigned int c;   /* Character Line 492  register unsigned int c;   /* Character
492  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
493    
494  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
495    int condcode;
496    
497  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
498  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 411  heap storage. Set up the top-level frame Line 500  heap storage. Set up the top-level frame
500  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
501    
502  #ifdef NO_RECURSE  #ifdef NO_RECURSE
503  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
504    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
505  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
506    
507  /* Copy in the original argument variables */  /* Copy in the original argument variables */
508    
509  frame->Xeptr = eptr;  frame->Xeptr = eptr;
510  frame->Xecode = ecode;  frame->Xecode = ecode;
511    frame->Xmstart = mstart;
512    frame->Xmarkptr = markptr;
513  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
514  frame->Xims = ims;  frame->Xims = ims;
515  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 432  HEAP_RECURSE: Line 524  HEAP_RECURSE:
524    
525  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
526  #define ecode              frame->Xecode  #define ecode              frame->Xecode
527    #define mstart             frame->Xmstart
528    #define markptr            frame->Xmarkptr
529  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
530  #define ims                frame->Xims  #define ims                frame->Xims
531  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 444  HEAP_RECURSE: Line 538  HEAP_RECURSE:
538  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
539  #endif  #endif
540  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
541    #define codelink           frame->Xcodelink
542  #define data               frame->Xdata  #define data               frame->Xdata
543  #define next               frame->Xnext  #define next               frame->Xnext
544  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 524  int oclength; Line 619  int oclength;
619  uschar occhars[8];  uschar occhars[8];
620  #endif  #endif
621    
622    int codelink;
623  int ctype;  int ctype;
624  int length;  int length;
625  int max;  int max;
# Line 557  TAIL_RECURSE: Line 653  TAIL_RECURSE:
653  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
654  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
655  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
656  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
657  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
658  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
659  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
# Line 580  original_ims = ims;    /* Save for reset Line 676  original_ims = ims;    /* Save for reset
676  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
677  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
678  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
679  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
680  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
681  already used. */  block that is used is on the stack, so a new one may be required for each
682    match(). */
683    
684  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
685    {    {
686    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
687    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
688      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
689    }    }
690    
691  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 605  for (;;) Line 695  for (;;)
695    minimize = possessive = FALSE;    minimize = possessive = FALSE;
696    op = *ecode;    op = *ecode;
697    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
698    switch(op)    switch(op)
699      {      {
700        case OP_MARK:
701        markptr = ecode + 2;
702        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
703          ims, eptrb, flags, RM55);
704    
705        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
706        argument, and we must check whether that argument matches this MARK's
707        argument. It is passed back in md->start_match_ptr (an overloading of that
708        variable). If it does match, we reset that variable to the current subject
709        position and return MATCH_SKIP. Otherwise, pass back the return code
710        unaltered. */
711    
712        if (rrc == MATCH_SKIP_ARG &&
713            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
714          {
715          md->start_match_ptr = eptr;
716          RRETURN(MATCH_SKIP);
717          }
718    
719        if (md->mark == NULL) md->mark = markptr;
720        RRETURN(rrc);
721    
722        case OP_FAIL:
723        MRRETURN(MATCH_NOMATCH);
724    
725        /* COMMIT overrides PRUNE, SKIP, and THEN */
726    
727        case OP_COMMIT:
728        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729          ims, eptrb, flags, RM52);
730        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
731            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
732            rrc != MATCH_THEN)
733          RRETURN(rrc);
734        MRRETURN(MATCH_COMMIT);
735    
736        /* PRUNE overrides THEN */
737    
738        case OP_PRUNE:
739        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
740          ims, eptrb, flags, RM51);
741        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742        MRRETURN(MATCH_PRUNE);
743    
744        case OP_PRUNE_ARG:
745        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
746          ims, eptrb, flags, RM56);
747        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748        md->mark = ecode + 2;
749        RRETURN(MATCH_PRUNE);
750    
751        /* SKIP overrides PRUNE and THEN */
752    
753        case OP_SKIP:
754        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
755          ims, eptrb, flags, RM53);
756        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
757          RRETURN(rrc);
758        md->start_match_ptr = eptr;   /* Pass back current position */
759        MRRETURN(MATCH_SKIP);
760    
761        case OP_SKIP_ARG:
762        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
763          ims, eptrb, flags, RM57);
764        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
765          RRETURN(rrc);
766    
767        /* Pass back the current skip name by overloading md->start_match_ptr and
768        returning the special MATCH_SKIP_ARG return code. This will either be
769        caught by a matching MARK, or get to the top, where it is treated the same
770        as PRUNE. */
771    
772        md->start_match_ptr = ecode + 2;
773        RRETURN(MATCH_SKIP_ARG);
774    
775        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
776        the alt that is at the start of the current branch. This makes it possible
777        to skip back past alternatives that precede the THEN within the current
778        branch. */
779    
780        case OP_THEN:
781        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
782          ims, eptrb, flags, RM54);
783        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
784        md->start_match_ptr = ecode - GET(ecode, 1);
785        MRRETURN(MATCH_THEN);
786    
787        case OP_THEN_ARG:
788        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
789          offset_top, md, ims, eptrb, flags, RM58);
790        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
791        md->start_match_ptr = ecode - GET(ecode, 1);
792        md->mark = ecode + LINK_SIZE + 2;
793        RRETURN(MATCH_THEN);
794    
795      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
796      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
797      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 634  for (;;) Line 811  for (;;)
811      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
812      offset = number << 1;      offset = number << 1;
813    
814  #ifdef DEBUG  #ifdef PCRE_DEBUG
815      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
816      printf("subject=");      printf("subject=");
817      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 649  for (;;) Line 826  for (;;)
826        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
827    
828        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
829        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
830            (int)(eptr - md->start_subject);
831    
832        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
833        do        do
834          {          {
835          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
836            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
837          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
838                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
839              RRETURN(rrc);
840          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
841          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
842          }          }
# Line 668  for (;;) Line 848  for (;;)
848        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
849        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
850    
851          if (rrc != MATCH_THEN) md->mark = markptr;
852        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
853        }        }
854    
855      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
856      bracket. */      as a non-capturing bracket. */
857    
858        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
859        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
860    
861      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
862    
863        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
864        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865    
866      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
867      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
868      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
869      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
870        is set.*/
871    
872      case OP_BRA:      case OP_BRA:
873      case OP_SBRA:      case OP_SBRA:
# Line 687  for (;;) Line 875  for (;;)
875      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
876      for (;;)      for (;;)
877        {        {
878        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
879          {          {
880          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
881          flags |= match_tail_recursed;            {
882          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
883          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
884              goto TAIL_RECURSE;
885              }
886    
887            /* Possibly empty group; can't use tail recursion. */
888    
889            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
890              eptrb, flags, RM48);
891            if (rrc == MATCH_NOMATCH) md->mark = markptr;
892            RRETURN(rrc);
893          }          }
894    
895        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 700  for (;;) Line 897  for (;;)
897    
898        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
899          eptrb, flags, RM2);          eptrb, flags, RM2);
900        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
901              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
902            RRETURN(rrc);
903        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
904        }        }
905      /* Control never reaches here. */      /* Control never reaches here. */
# Line 713  for (;;) Line 912  for (;;)
912    
913      case OP_COND:      case OP_COND:
914      case OP_SCOND:      case OP_SCOND:
915      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
916    
917        /* Because of the way auto-callout works during compile, a callout item is
918        inserted between OP_COND and an assertion condition. */
919    
920        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
921        {        {
922        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
923        condition = md->recursive != NULL &&          {
924          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
925        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
926            cb.callout_number   = ecode[LINK_SIZE+2];
927            cb.offset_vector    = md->offset_vector;
928            cb.subject          = (PCRE_SPTR)md->start_subject;
929            cb.subject_length   = (int)(md->end_subject - md->start_subject);
930            cb.start_match      = (int)(mstart - md->start_subject);
931            cb.current_position = (int)(eptr - md->start_subject);
932            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
933            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
934            cb.capture_top      = offset_top/2;
935            cb.capture_last     = md->capture_last;
936            cb.callout_data     = md->callout_data;
937            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
938            if (rrc < 0) RRETURN(rrc);
939            }
940          ecode += _pcre_OP_lengths[OP_CALLOUT];
941          }
942    
943        condcode = ecode[LINK_SIZE+1];
944    
945        /* Now see what the actual condition is */
946    
947        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
948          {
949          if (md->recursive == NULL)                /* Not recursing => FALSE */
950            {
951            condition = FALSE;
952            ecode += GET(ecode, 1);
953            }
954          else
955            {
956            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
957            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
958    
959            /* If the test is for recursion into a specific subpattern, and it is
960            false, but the test was set up by name, scan the table to see if the
961            name refers to any other numbers, and test them. The condition is true
962            if any one is set. */
963    
964            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
965              {
966              uschar *slotA = md->name_table;
967              for (i = 0; i < md->name_count; i++)
968                {
969                if (GET2(slotA, 0) == recno) break;
970                slotA += md->name_entry_size;
971                }
972    
973              /* Found a name for the number - there can be only one; duplicate
974              names for different numbers are allowed, but not vice versa. First
975              scan down for duplicates. */
976    
977              if (i < md->name_count)
978                {
979                uschar *slotB = slotA;
980                while (slotB > md->name_table)
981                  {
982                  slotB -= md->name_entry_size;
983                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
984                    {
985                    condition = GET2(slotB, 0) == md->recursive->group_num;
986                    if (condition) break;
987                    }
988                  else break;
989                  }
990    
991                /* Scan up for duplicates */
992    
993                if (!condition)
994                  {
995                  slotB = slotA;
996                  for (i++; i < md->name_count; i++)
997                    {
998                    slotB += md->name_entry_size;
999                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1000                      {
1001                      condition = GET2(slotB, 0) == md->recursive->group_num;
1002                      if (condition) break;
1003                      }
1004                    else break;
1005                    }
1006                  }
1007                }
1008              }
1009    
1010            /* Chose branch according to the condition */
1011    
1012            ecode += condition? 3 : GET(ecode, 1);
1013            }
1014        }        }
1015    
1016      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1017        {        {
1018        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1019        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
1020    
1021          /* If the numbered capture is unset, but the reference was by name,
1022          scan the table to see if the name refers to any other numbers, and test
1023          them. The condition is true if any one is set. This is tediously similar
1024          to the code above, but not close enough to try to amalgamate. */
1025    
1026          if (!condition && condcode == OP_NCREF)
1027            {
1028            int refno = offset >> 1;
1029            uschar *slotA = md->name_table;
1030    
1031            for (i = 0; i < md->name_count; i++)
1032              {
1033              if (GET2(slotA, 0) == refno) break;
1034              slotA += md->name_entry_size;
1035              }
1036    
1037            /* Found a name for the number - there can be only one; duplicate names
1038            for different numbers are allowed, but not vice versa. First scan down
1039            for duplicates. */
1040    
1041            if (i < md->name_count)
1042              {
1043              uschar *slotB = slotA;
1044              while (slotB > md->name_table)
1045                {
1046                slotB -= md->name_entry_size;
1047                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1048                  {
1049                  offset = GET2(slotB, 0) << 1;
1050                  condition = offset < offset_top &&
1051                    md->offset_vector[offset] >= 0;
1052                  if (condition) break;
1053                  }
1054                else break;
1055                }
1056    
1057              /* Scan up for duplicates */
1058    
1059              if (!condition)
1060                {
1061                slotB = slotA;
1062                for (i++; i < md->name_count; i++)
1063                  {
1064                  slotB += md->name_entry_size;
1065                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1066                    {
1067                    offset = GET2(slotB, 0) << 1;
1068                    condition = offset < offset_top &&
1069                      md->offset_vector[offset] >= 0;
1070                    if (condition) break;
1071                    }
1072                  else break;
1073                  }
1074                }
1075              }
1076            }
1077    
1078          /* Chose branch according to the condition */
1079    
1080        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1081        }        }
1082    
1083      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1084        {        {
1085        condition = FALSE;        condition = FALSE;
1086        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 748  for (;;) Line 1100  for (;;)
1100          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1101          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1102          }          }
1103        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH &&
1104                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1105          {          {
1106          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1107          }          }
1108        else        else
1109          {          {
1110          condition = FALSE;          condition = FALSE;
1111          ecode += GET(ecode, 1);          ecode += codelink;
1112          }          }
1113        }        }
1114    
1115      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1116      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1117      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1118        group. If the second alternative doesn't exist, we can just plough on. */
1119    
1120      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1121        {        {
1122        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1123        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1124        goto TAIL_RECURSE;          {
1125            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1126            RRETURN(rrc);
1127            }
1128          else                       /* Group must match something */
1129            {
1130            flags = 0;
1131            goto TAIL_RECURSE;
1132            }
1133        }        }
1134      else      else                         /* Condition false & no alternative */
1135        {        {
1136        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1137        }        }
1138      break;      break;
1139    
1140    
1141      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1142      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1143    
1144        case OP_CLOSE:
1145        number = GET2(ecode, 1);
1146        offset = number << 1;
1147    
1148    #ifdef PCRE_DEBUG
1149          printf("end bracket %d at *ACCEPT", number);
1150          printf("\n");
1151    #endif
1152    
1153        md->capture_last = number;
1154        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1155          {
1156          md->offset_vector[offset] =
1157            md->offset_vector[md->offset_end - number];
1158          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1159          if (offset_top <= offset) offset_top = offset + 2;
1160          }
1161        ecode += 3;
1162        break;
1163    
1164    
1165        /* End of the pattern, either real or forced. If we are in a top-level
1166        recursion, we should restore the offsets appropriately and continue from
1167        after the call. */
1168    
1169        case OP_ACCEPT:
1170      case OP_END:      case OP_END:
1171      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1172        {        {
# Line 787  for (;;) Line 1175  for (;;)
1175        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1176        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1177          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1178        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1179        ims = original_ims;        ims = original_ims;
1180        ecode = rec->after_call;        ecode = rec->after_call;
1181        break;        break;
1182        }        }
1183    
1184      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1185      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1186        the subject. In both cases, backtracking will then try other alternatives,
1187        if any. */
1188    
1189        if (eptr == mstart &&
1190            (md->notempty ||
1191              (md->notempty_atstart &&
1192                mstart == md->start_subject + md->start_offset)))
1193          MRRETURN(MATCH_NOMATCH);
1194    
1195        /* Otherwise, we have a match. */
1196    
1197        md->end_match_ptr = eptr;           /* Record where we ended */
1198        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1199        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1200    
1201      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      /* For some reason, the macros don't work properly if an expression is
1202      md->end_match_ptr = eptr;          /* Record where we ended */      given as the argument to MRRETURN when the heap is in use. */
1203      md->end_offset_top = offset_top;   /* and how many extracts were taken */  
1204      RRETURN(MATCH_MATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1205        MRRETURN(rrc);
1206    
1207      /* Change option settings */      /* Change option settings */
1208    
# Line 821  for (;;) Line 1224  for (;;)
1224        {        {
1225        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1226          RM4);          RM4);
1227        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1228        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1229            mstart = md->start_match_ptr;   /* In case \K reset it */
1230            break;
1231            }
1232          if (rrc != MATCH_NOMATCH &&
1233              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1234            RRETURN(rrc);
1235        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1236        }        }
1237      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1238      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1239    
1240      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1241    
# Line 840  for (;;) Line 1249  for (;;)
1249      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1250      continue;      continue;
1251    
1252      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1253        PRUNE, or COMMIT means we must assume failure without checking subsequent
1254        branches. */
1255    
1256      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1257      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 848  for (;;) Line 1259  for (;;)
1259        {        {
1260        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1261          RM5);          RM5);
1262        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1263        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1264            {
1265            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1266            break;
1267            }
1268          if (rrc != MATCH_NOMATCH &&
1269              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1270            RRETURN(rrc);
1271        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1272        }        }
1273      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 872  for (;;) Line 1290  for (;;)
1290        while (i-- > 0)        while (i-- > 0)
1291          {          {
1292          eptr--;          eptr--;
1293          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1294          BACKCHAR(eptr)          BACKCHAR(eptr);
1295          }          }
1296        }        }
1297      else      else
# Line 883  for (;;) Line 1301  for (;;)
1301    
1302        {        {
1303        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1304        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1305        }        }
1306    
1307      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1308    
1309        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1310      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1311      break;      break;
1312    
# Line 903  for (;;) Line 1322  for (;;)
1322        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1323        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1324        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1325        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1326        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1327        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1328        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1329        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1330        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1331        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1332        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1333        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1334        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1335        }        }
1336      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 966  for (;;) Line 1385  for (;;)
1385    
1386        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1387              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1388        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1389    
1390        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1391        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 978  for (;;) Line 1396  for (;;)
1396          {          {
1397          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1398            md, ims, eptrb, flags, RM6);            md, ims, eptrb, flags, RM6);
1399          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1400            {            {
1401            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1402            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1403            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1404              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1405            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1406            }            }
1407          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH &&
1408                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1409            {            {
1410            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1411              if (new_recursive.offset_save != stacksave)
1412                (pcre_free)(new_recursive.offset_save);
1413            RRETURN(rrc);            RRETURN(rrc);
1414            }            }
1415    
# Line 1003  for (;;) Line 1424  for (;;)
1424        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1425        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1426          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1427        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1428        }        }
1429      /* Control never reaches here */      /* Control never reaches here */
1430    
# Line 1012  for (;;) Line 1433  for (;;)
1433      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1434      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1435      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1436      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1437        the start-of-match value in case it was changed by \K. */
1438    
1439      case OP_ONCE:      case OP_ONCE:
1440      prev = ecode;      prev = ecode;
# Line 1020  for (;;) Line 1442  for (;;)
1442    
1443      do      do
1444        {        {
1445        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1446          eptrb, 0, RM7);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1447        if (rrc == MATCH_MATCH) break;          {
1448        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1449            break;
1450            }
1451          if (rrc != MATCH_NOMATCH &&
1452              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1453            RRETURN(rrc);
1454        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1455        }        }
1456      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1066  for (;;) Line 1493  for (;;)
1493    
1494      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1495        {        {
1496        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
         RM8);  
1497        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1498        ecode = prev;        ecode = prev;
1499        flags = match_tail_recursed;        flags = 0;
1500        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1501        }        }
1502      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1078  for (;;) Line 1504  for (;;)
1504        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1505        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1506        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1507        flags = match_tail_recursed;        flags = 0;
1508        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1509        }        }
1510      /* Control never gets here */      /* Control never gets here */
# Line 1090  for (;;) Line 1516  for (;;)
1516      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1517      break;      break;
1518    
1519      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1520      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1521      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1522      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1523      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1524    
1525      case OP_BRAZERO:      case OP_BRAZERO:
1526        {        {
# Line 1116  for (;;) Line 1542  for (;;)
1542        }        }
1543      break;      break;
1544    
1545        case OP_SKIPZERO:
1546          {
1547          next = ecode+1;
1548          do next += GET(next,1); while (*next == OP_ALT);
1549          ecode = next + 1 + LINK_SIZE;
1550          }
1551        break;
1552    
1553      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1554    
1555      case OP_KET:      case OP_KET:
# Line 1134  for (;;) Line 1568  for (;;)
1568        }        }
1569      else saved_eptr = NULL;      else saved_eptr = NULL;
1570    
1571      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1572      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1573      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1574        it was changed by \K. */
1575    
1576      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1577          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1144  for (;;) Line 1579  for (;;)
1579        {        {
1580        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1581        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1582        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1583          MRRETURN(MATCH_MATCH);
1584        }        }
1585    
1586      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1158  for (;;) Line 1594  for (;;)
1594        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1595        offset = number << 1;        offset = number << 1;
1596    
1597  #ifdef DEBUG  #ifdef PCRE_DEBUG
1598        printf("end bracket %d", number);        printf("end bracket %d", number);
1599        printf("\n");        printf("\n");
1600  #endif  #endif
# Line 1168  for (;;) Line 1604  for (;;)
1604          {          {
1605          md->offset_vector[offset] =          md->offset_vector[offset] =
1606            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1607          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1608          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1609          }          }
1610    
# Line 1180  for (;;) Line 1616  for (;;)
1616          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1617          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1618          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         md->start_match = rec->save_start;  
1619          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1620            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1621            offset_top = rec->save_offset_top;
1622          ecode = rec->after_call;          ecode = rec->after_call;
1623          ims = original_ims;          ims = original_ims;
1624          break;          break;
# Line 1209  for (;;) Line 1645  for (;;)
1645    
1646      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1647      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1648      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1649        unlimited repeat of a group that can match an empty string. */
1650    
1651      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1652    
1653      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1654        {        {
1655        RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
         RM12);  
1656        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1657          if (flags != 0)    /* Could match an empty string */
1658            {
1659            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1660            RRETURN(rrc);
1661            }
1662        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1663        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1664        }        }
1665      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1227  for (;;) Line 1667  for (;;)
1667        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1668        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1669        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1670        flags = match_tail_recursed;        flags = 0;
1671        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1672        }        }
1673      /* Control never gets here */      /* Control never gets here */
# Line 1235  for (;;) Line 1675  for (;;)
1675      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1676    
1677      case OP_CIRC:      case OP_CIRC:
1678      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1679      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1680        {        {
1681        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1682            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1683          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1684        ecode++;        ecode++;
1685        break;        break;
1686        }        }
# Line 1249  for (;;) Line 1689  for (;;)
1689      /* Start of subject assertion */      /* Start of subject assertion */
1690    
1691      case OP_SOD:      case OP_SOD:
1692      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1693      ecode++;      ecode++;
1694      break;      break;
1695    
1696      /* Start of match assertion */      /* Start of match assertion */
1697    
1698      case OP_SOM:      case OP_SOM:
1699      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1700        ecode++;
1701        break;
1702    
1703        /* Reset the start of match point */
1704    
1705        case OP_SET_SOM:
1706        mstart = eptr;
1707      ecode++;      ecode++;
1708      break;      break;
1709    
# Line 1267  for (;;) Line 1714  for (;;)
1714      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1715        {        {
1716        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1717          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1718        else        else
1719          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          {
1720            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1721            SCHECK_PARTIAL();
1722            }
1723        ecode++;        ecode++;
1724        break;        break;
1725        }        }
1726      else      else  /* Not multiline */
1727        {        {
1728        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1729        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           RRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1730        }        }
1731    
1732      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1733    
1734      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1735    
1736      case OP_EOD:      case OP_EOD:
1737      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1738        SCHECK_PARTIAL();
1739      ecode++;      ecode++;
1740      break;      break;
1741    
1742      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1743    
1744      case OP_EODN:      case OP_EODN:
1745      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1746        if (eptr < md->end_subject &&
1747          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1748        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1749    
1750        /* Either at end of string or \n before end. */
1751    
1752        SCHECK_PARTIAL();
1753      ecode++;      ecode++;
1754      break;      break;
1755    
# Line 1311  for (;;) Line 1761  for (;;)
1761    
1762        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1763        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1764        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1765          partial matching. */
1766    
1767  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1768        if (utf8)        if (utf8)
1769          {          {
1770            /* Get status of previous character */
1771    
1772          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1773            {            {
1774            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1775            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1776              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1777            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1778    #ifdef SUPPORT_UCP
1779              if (md->use_ucp)
1780                {
1781                if (c == '_') prev_is_word = TRUE; else
1782                  {
1783                  int cat = UCD_CATEGORY(c);
1784                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1785                  }
1786                }
1787              else
1788    #endif
1789            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1790            }            }
1791          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1792            /* Get status of next character */
1793    
1794            if (eptr >= md->end_subject)
1795              {
1796              SCHECK_PARTIAL();
1797              cur_is_word = FALSE;
1798              }
1799            else
1800            {            {
1801            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1802    #ifdef SUPPORT_UCP
1803              if (md->use_ucp)
1804                {
1805                if (c == '_') cur_is_word = TRUE; else
1806                  {
1807                  int cat = UCD_CATEGORY(c);
1808                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1809                  }
1810                }
1811              else
1812    #endif
1813            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1814            }            }
1815          }          }
1816        else        else
1817  #endif  #endif
1818    
1819        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1820          consistency with the behaviour of \w we do use it in this case. */
1821    
1822          {          {
1823          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1824            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1825          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1826            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1827              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1828    #ifdef SUPPORT_UCP
1829              if (md->use_ucp)
1830                {
1831                c = eptr[-1];
1832                if (c == '_') prev_is_word = TRUE; else
1833                  {
1834                  int cat = UCD_CATEGORY(c);
1835                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1836                  }
1837                }
1838              else
1839    #endif
1840              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1841              }
1842    
1843            /* Get status of next character */
1844    
1845            if (eptr >= md->end_subject)
1846              {
1847              SCHECK_PARTIAL();
1848              cur_is_word = FALSE;
1849              }
1850            else
1851    #ifdef SUPPORT_UCP
1852            if (md->use_ucp)
1853              {
1854              c = *eptr;
1855              if (c == '_') cur_is_word = TRUE; else
1856                {
1857                int cat = UCD_CATEGORY(c);
1858                cur_is_word = (cat == ucp_L || cat == ucp_N);
1859                }
1860              }
1861            else
1862    #endif
1863            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1864          }          }
1865    
1866        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1867    
1868        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1869             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1870          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1871        }        }
1872      break;      break;
1873    
1874      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1875    
1876      case OP_ANY:      case OP_ANY:
1877      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1878        /* Fall through */
1879    
1880        case OP_ALLANY:
1881        if (eptr++ >= md->end_subject)
1882        {        {
1883        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1884          MRRETURN(MATCH_NOMATCH);
1885        }        }
1886      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1887      ecode++;      ecode++;
1888      break;      break;
1889    
# Line 1366  for (;;) Line 1891  for (;;)
1891      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1892    
1893      case OP_ANYBYTE:      case OP_ANYBYTE:
1894      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1895          {
1896          SCHECK_PARTIAL();
1897          MRRETURN(MATCH_NOMATCH);
1898          }
1899      ecode++;      ecode++;
1900      break;      break;
1901    
1902      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1903      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1904          {
1905          SCHECK_PARTIAL();
1906          MRRETURN(MATCH_NOMATCH);
1907          }
1908      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1909      if (      if (
1910  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1379  for (;;) Line 1912  for (;;)
1912  #endif  #endif
1913         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1914         )         )
1915        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1916      ecode++;      ecode++;
1917      break;      break;
1918    
1919      case OP_DIGIT:      case OP_DIGIT:
1920      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1921          {
1922          SCHECK_PARTIAL();
1923          MRRETURN(MATCH_NOMATCH);
1924          }
1925      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1926      if (      if (
1927  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1392  for (;;) Line 1929  for (;;)
1929  #endif  #endif
1930         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1931         )         )
1932        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1933      ecode++;      ecode++;
1934      break;      break;
1935    
1936      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1937      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1938          {
1939          SCHECK_PARTIAL();
1940          MRRETURN(MATCH_NOMATCH);
1941          }
1942      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1943      if (      if (
1944  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1405  for (;;) Line 1946  for (;;)
1946  #endif  #endif
1947         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1948         )         )
1949        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1950      ecode++;      ecode++;
1951      break;      break;
1952    
1953      case OP_WHITESPACE:      case OP_WHITESPACE:
1954      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1955          {
1956          SCHECK_PARTIAL();
1957          MRRETURN(MATCH_NOMATCH);
1958          }
1959      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1960      if (      if (
1961  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1418  for (;;) Line 1963  for (;;)
1963  #endif  #endif
1964         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1965         )         )
1966        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1967      ecode++;      ecode++;
1968      break;      break;
1969    
1970      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1971      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1972          {
1973          SCHECK_PARTIAL();
1974          MRRETURN(MATCH_NOMATCH);
1975          }
1976      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1977      if (      if (
1978  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1431  for (;;) Line 1980  for (;;)
1980  #endif  #endif
1981         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1982         )         )
1983        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1984      ecode++;      ecode++;
1985      break;      break;
1986    
1987      case OP_WORDCHAR:      case OP_WORDCHAR:
1988      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1989          {
1990          SCHECK_PARTIAL();
1991          MRRETURN(MATCH_NOMATCH);
1992          }
1993      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1994      if (      if (
1995  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1444  for (;;) Line 1997  for (;;)
1997  #endif  #endif
1998         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1999         )         )
2000        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2001      ecode++;      ecode++;
2002      break;      break;
2003    
2004      case OP_ANYNL:      case OP_ANYNL:
2005      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2006          {
2007          SCHECK_PARTIAL();
2008          MRRETURN(MATCH_NOMATCH);
2009          }
2010      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2011      switch(c)      switch(c)
2012        {        {
2013        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2014        case 0x000d:        case 0x000d:
2015        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2016        break;        break;
2017    
2018        case 0x000a:        case 0x000a:
2019          break;
2020    
2021        case 0x000b:        case 0x000b:
2022        case 0x000c:        case 0x000c:
2023        case 0x0085:        case 0x0085:
2024        case 0x2028:        case 0x2028:
2025        case 0x2029:        case 0x2029:
2026          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2027        break;        break;
2028        }        }
2029      ecode++;      ecode++;
2030      break;      break;
2031    
2032  #ifdef SUPPORT_UCP      case OP_NOT_HSPACE:
2033      /* Check the next character by Unicode property. We will get here only      if (eptr >= md->end_subject)
2034      if the support is in the binary; otherwise a compile-time error occurs. */        {
2035          SCHECK_PARTIAL();
2036      case OP_PROP:        MRRETURN(MATCH_NOMATCH);
2037      case OP_NOTPROP:        }
     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);  
2038      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2039        switch(c)
2040        {        {
2041        int chartype, script;        default: break;
2042        int category = _pcre_ucp_findprop(c, &chartype, &script);        case 0x09:      /* HT */
2043          case 0x20:      /* SPACE */
2044          case 0xa0:      /* NBSP */
2045          case 0x1680:    /* OGHAM SPACE MARK */
2046          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2047          case 0x2000:    /* EN QUAD */
2048          case 0x2001:    /* EM QUAD */
2049          case 0x2002:    /* EN SPACE */
2050          case 0x2003:    /* EM SPACE */
2051          case 0x2004:    /* THREE-PER-EM SPACE */
2052          case 0x2005:    /* FOUR-PER-EM SPACE */
2053          case 0x2006:    /* SIX-PER-EM SPACE */
2054          case 0x2007:    /* FIGURE SPACE */
2055          case 0x2008:    /* PUNCTUATION SPACE */
2056          case 0x2009:    /* THIN SPACE */
2057          case 0x200A:    /* HAIR SPACE */
2058          case 0x202f:    /* NARROW NO-BREAK SPACE */
2059          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2060          case 0x3000:    /* IDEOGRAPHIC SPACE */
2061          MRRETURN(MATCH_NOMATCH);
2062          }
2063        ecode++;
2064        break;
2065    
2066        switch(ecode[1])      case OP_HSPACE:
2067          {      if (eptr >= md->end_subject)
2068          {
2069          SCHECK_PARTIAL();
2070          MRRETURN(MATCH_NOMATCH);
2071          }
2072        GETCHARINCTEST(c, eptr);
2073        switch(c)
2074          {
2075          default: MRRETURN(MATCH_NOMATCH);
2076          case 0x09:      /* HT */
2077          case 0x20:      /* SPACE */
2078          case 0xa0:      /* NBSP */
2079          case 0x1680:    /* OGHAM SPACE MARK */
2080          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2081          case 0x2000:    /* EN QUAD */
2082          case 0x2001:    /* EM QUAD */
2083          case 0x2002:    /* EN SPACE */
2084          case 0x2003:    /* EM SPACE */
2085          case 0x2004:    /* THREE-PER-EM SPACE */
2086          case 0x2005:    /* FOUR-PER-EM SPACE */
2087          case 0x2006:    /* SIX-PER-EM SPACE */
2088          case 0x2007:    /* FIGURE SPACE */
2089          case 0x2008:    /* PUNCTUATION SPACE */
2090          case 0x2009:    /* THIN SPACE */
2091          case 0x200A:    /* HAIR SPACE */
2092          case 0x202f:    /* NARROW NO-BREAK SPACE */
2093          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2094          case 0x3000:    /* IDEOGRAPHIC SPACE */
2095          break;
2096          }
2097        ecode++;
2098        break;
2099    
2100        case OP_NOT_VSPACE:
2101        if (eptr >= md->end_subject)
2102          {
2103          SCHECK_PARTIAL();
2104          MRRETURN(MATCH_NOMATCH);
2105          }
2106        GETCHARINCTEST(c, eptr);
2107        switch(c)
2108          {
2109          default: break;
2110          case 0x0a:      /* LF */
2111          case 0x0b:      /* VT */
2112          case 0x0c:      /* FF */
2113          case 0x0d:      /* CR */
2114          case 0x85:      /* NEL */
2115          case 0x2028:    /* LINE SEPARATOR */
2116          case 0x2029:    /* PARAGRAPH SEPARATOR */
2117          MRRETURN(MATCH_NOMATCH);
2118          }
2119        ecode++;
2120        break;
2121    
2122        case OP_VSPACE:
2123        if (eptr >= md->end_subject)
2124          {
2125          SCHECK_PARTIAL();
2126          MRRETURN(MATCH_NOMATCH);
2127          }
2128        GETCHARINCTEST(c, eptr);
2129        switch(c)
2130          {
2131          default: MRRETURN(MATCH_NOMATCH);
2132          case 0x0a:      /* LF */
2133          case 0x0b:      /* VT */
2134          case 0x0c:      /* FF */
2135          case 0x0d:      /* CR */
2136          case 0x85:      /* NEL */
2137          case 0x2028:    /* LINE SEPARATOR */
2138          case 0x2029:    /* PARAGRAPH SEPARATOR */
2139          break;
2140          }
2141        ecode++;
2142        break;
2143    
2144    #ifdef SUPPORT_UCP
2145        /* Check the next character by Unicode property. We will get here only
2146        if the support is in the binary; otherwise a compile-time error occurs. */
2147    
2148        case OP_PROP:
2149        case OP_NOTPROP:
2150        if (eptr >= md->end_subject)
2151          {
2152          SCHECK_PARTIAL();
2153          MRRETURN(MATCH_NOMATCH);
2154          }
2155        GETCHARINCTEST(c, eptr);
2156          {
2157          const ucd_record *prop = GET_UCD(c);
2158    
2159          switch(ecode[1])
2160            {
2161          case PT_ANY:          case PT_ANY:
2162          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2163          break;          break;
2164    
2165          case PT_LAMP:          case PT_LAMP:
2166          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2167               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2168               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2169            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2170           break;          break;
2171    
2172          case PT_GC:          case PT_GC:
2173          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2174            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2175          break;          break;
2176    
2177          case PT_PC:          case PT_PC:
2178          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2179            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2180          break;          break;
2181    
2182          case PT_SC:          case PT_SC:
2183          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2184            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2185            break;
2186    
2187            /* These are specials */
2188    
2189            case PT_ALNUM:
2190            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2191                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2192              MRRETURN(MATCH_NOMATCH);
2193            break;
2194    
2195            case PT_SPACE:    /* Perl space */
2196            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2197                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2198                   == (op == OP_NOTPROP))
2199              MRRETURN(MATCH_NOMATCH);
2200          break;          break;
2201    
2202            case PT_PXSPACE:  /* POSIX space */
2203            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2205                 c == CHAR_FF || c == CHAR_CR)
2206                   == (op == OP_NOTPROP))
2207              MRRETURN(MATCH_NOMATCH);
2208            break;
2209    
2210            case PT_WORD:
2211            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2212                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2213                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2214              MRRETURN(MATCH_NOMATCH);
2215            break;
2216    
2217            /* This should never occur */
2218    
2219          default:          default:
2220          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2221          }          }
# Line 1520  for (;;) Line 2228  for (;;)
2228      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2229    
2230      case OP_EXTUNI:      case OP_EXTUNI:
2231      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2232          {
2233          SCHECK_PARTIAL();
2234          MRRETURN(MATCH_NOMATCH);
2235          }
2236      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2237        {        {
2238        int chartype, script;        int category = UCD_CATEGORY(c);
2239        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2240        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2241          {          {
2242          int len = 1;          int len = 1;
# Line 1533  for (;;) Line 2244  for (;;)
2244            {            {
2245            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2246            }            }
2247          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2248          if (category != ucp_M) break;          if (category != ucp_M) break;
2249          eptr += len;          eptr += len;
2250          }          }
# Line 1552  for (;;) Line 2263  for (;;)
2263      loops). */      loops). */
2264    
2265      case OP_REF:      case OP_REF:
2266        {      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2267        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      ecode += 3;
       ecode += 3;                                 /* Advance past item */  
2268    
2269        /* If the reference is unset, set the length to be longer than the amount      /* If the reference is unset, there are two possibilities:
       of subject left; this ensures that every attempt at a match fails. We  
       can't just fail here, because of the possibility of quantifiers with zero  
       minima. */  
   
       length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
         md->end_subject - eptr + 1 :  
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2270    
2271        /* Set up for repetition, or handle the non-repeated case */      (a) In the default, Perl-compatible state, set the length negative;
2272        this ensures that every attempt at a match fails. We can't just fail
2273        here, because of the possibility of quantifiers with zero minima.
2274    
2275        switch (*ecode)      (b) If the JavaScript compatibility flag is set, set the length to zero
2276          {      so that the back reference matches an empty string.
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         c = *ecode++ - OP_CRSTAR;  
         minimize = (c & 1) != 0;  
         min = rep_min[c];                 /* Pick up values from tables; */  
         max = rep_max[c];                 /* zero for max => infinity */  
         if (max == 0) max = INT_MAX;  
         break;  
2277    
2278          case OP_CRRANGE:      Otherwise, set the length to the length of what was matched by the
2279          case OP_CRMINRANGE:      referenced subpattern. */
         minimize = (*ecode == OP_CRMINRANGE);  
         min = GET2(ecode, 1);  
         max = GET2(ecode, 3);  
         if (max == 0) max = INT_MAX;  
         ecode += 5;  
         break;  
2280    
2281          default:               /* No repeat follows */      if (offset >= offset_top || md->offset_vector[offset] < 0)
2282          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);        length = (md->jscript_compat)? 0 : -1;
2283          eptr += length;      else
2284          continue;              /* With the main loop */        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2285    
2286        /* Set up for repetition, or handle the non-repeated case */
2287    
2288        switch (*ecode)
2289          {
2290          case OP_CRSTAR:
2291          case OP_CRMINSTAR:
2292          case OP_CRPLUS:
2293          case OP_CRMINPLUS:
2294          case OP_CRQUERY:
2295          case OP_CRMINQUERY:
2296          c = *ecode++ - OP_CRSTAR;
2297          minimize = (c & 1) != 0;
2298          min = rep_min[c];                 /* Pick up values from tables; */
2299          max = rep_max[c];                 /* zero for max => infinity */
2300          if (max == 0) max = INT_MAX;
2301          break;
2302    
2303          case OP_CRRANGE:
2304          case OP_CRMINRANGE:
2305          minimize = (*ecode == OP_CRMINRANGE);
2306          min = GET2(ecode, 1);
2307          max = GET2(ecode, 3);
2308          if (max == 0) max = INT_MAX;
2309          ecode += 5;
2310          break;
2311    
2312          default:               /* No repeat follows */
2313          if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2314            {
2315            CHECK_PARTIAL();
2316            MRRETURN(MATCH_NOMATCH);
2317          }          }
2318          eptr += length;
2319          continue;              /* With the main loop */
2320          }
2321    
2322        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2323        main loop. */      zero, just continue with the main loop. */
2324    
2325        if (length == 0) continue;      if (length == 0) continue;
2326    
2327        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2328        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2329        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2330    
2331        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2332          {
2333          int slength;
2334          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2335          {          {
2336          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          CHECK_PARTIAL();
2337          eptr += length;          MRRETURN(MATCH_NOMATCH);
2338          }          }
2339          eptr += slength;
2340          }
2341    
2342        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2343        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2344    
2345        if (min == max) continue;      if (min == max) continue;
2346    
2347        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2348    
2349        if (minimize)      if (minimize)
2350          {
2351          for (fi = min;; fi++)
2352          {          {
2353          for (fi = min;; fi++)          int slength;
2354            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2355            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2357            if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2358            {            {
2359            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            CHECK_PARTIAL();
2360            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max || !match_ref(offset, eptr, length, md, ims))  
             RRETURN(MATCH_NOMATCH);  
           eptr += length;  
2361            }            }
2362          /* Control never gets here */          eptr += slength;
2363          }          }
2364          /* Control never gets here */
2365          }
2366    
2367        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2368    
2369        else      else
2370          {
2371          pp = eptr;
2372          for (i = min; i < max; i++)
2373          {          {
2374          pp = eptr;          int slength;
2375          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
           {  
           if (!match_ref(offset, eptr, length, md, ims)) break;  
           eptr += length;  
           }  
         while (eptr >= pp)  
2376            {            {
2377            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);            CHECK_PARTIAL();
2378            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            break;
           eptr -= length;  
2379            }            }
2380          RRETURN(MATCH_NOMATCH);          eptr += slength;
2381            }
2382          while (eptr >= pp)
2383            {
2384            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2385            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386            eptr -= length;
2387          }          }
2388          MRRETURN(MATCH_NOMATCH);
2389        }        }
2390      /* Control never gets here */      /* Control never gets here */
2391    
   
   
2392      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2393      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2394      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1709  for (;;) Line 2443  for (;;)
2443          {          {
2444          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2445            {            {
2446            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2447                {
2448                SCHECK_PARTIAL();
2449                MRRETURN(MATCH_NOMATCH);
2450                }
2451            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2452            if (c > 255)            if (c > 255)
2453              {              {
2454              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2455              }              }
2456            else            else
2457              {              {
2458              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2459              }              }
2460            }            }
2461          }          }
# Line 1727  for (;;) Line 2465  for (;;)
2465          {          {
2466          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2467            {            {
2468            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2469                {
2470                SCHECK_PARTIAL();
2471                MRRETURN(MATCH_NOMATCH);
2472                }
2473            c = *eptr++;            c = *eptr++;
2474            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2475            }            }
2476          }          }
2477    
# Line 1751  for (;;) Line 2493  for (;;)
2493              {              {
2494              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2495              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2496              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2497                if (eptr >= md->end_subject)
2498                  {
2499                  SCHECK_PARTIAL();
2500                  MRRETURN(MATCH_NOMATCH);
2501                  }
2502              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2503              if (c > 255)              if (c > 255)
2504                {                {
2505                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2506                }                }
2507              else              else
2508                {                {
2509                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2510                }                }
2511              }              }
2512            }            }
# Line 1771  for (;;) Line 2518  for (;;)
2518              {              {
2519              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2520              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522                if (eptr >= md->end_subject)
2523                  {
2524                  SCHECK_PARTIAL();
2525                  MRRETURN(MATCH_NOMATCH);
2526                  }
2527              c = *eptr++;              c = *eptr++;
2528              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2529              }              }
2530            }            }
2531          /* Control never gets here */          /* Control never gets here */
# Line 1792  for (;;) Line 2544  for (;;)
2544            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2545              {              {
2546              int len = 1;              int len = 1;
2547              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2548                  {
2549                  SCHECK_PARTIAL();
2550                  break;
2551                  }
2552              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2553              if (c > 255)              if (c > 255)
2554                {                {
# Line 1818  for (;;) Line 2574  for (;;)
2574            {            {
2575            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2576              {              {
2577              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2578                  {
2579                  SCHECK_PARTIAL();
2580                  break;
2581                  }
2582              c = *eptr;              c = *eptr;
2583              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2584              eptr++;              eptr++;
# Line 1831  for (;;) Line 2591  for (;;)
2591              }              }
2592            }            }
2593    
2594          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2595          }          }
2596        }        }
2597      /* Control never gets here */      /* Control never gets here */
2598    
2599    
2600      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2601      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2602        mode, because Unicode properties are supported in non-UTF-8 mode. */
2603    
2604  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2605      case OP_XCLASS:      case OP_XCLASS:
# Line 1879  for (;;) Line 2640  for (;;)
2640    
2641        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2642          {          {
2643          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2644          GETCHARINC(c, eptr);            {
2645          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2646              MRRETURN(MATCH_NOMATCH);
2647              }
2648            GETCHARINCTEST(c, eptr);
2649            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2650          }          }
2651    
2652        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1898  for (;;) Line 2663  for (;;)
2663            {            {
2664            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2665            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2666            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2667            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2668            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2669                SCHECK_PARTIAL();
2670                MRRETURN(MATCH_NOMATCH);
2671                }
2672              GETCHARINCTEST(c, eptr);
2673              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2674            }            }
2675          /* Control never gets here */          /* Control never gets here */
2676          }          }
# Line 1913  for (;;) Line 2683  for (;;)
2683          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2684            {            {
2685            int len = 1;            int len = 1;
2686            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2687            GETCHARLEN(c, eptr, len);              {
2688                SCHECK_PARTIAL();
2689                break;
2690                }
2691              GETCHARLENTEST(c, eptr, len);
2692            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2693            eptr += len;            eptr += len;
2694            }            }
# Line 1923  for (;;) Line 2697  for (;;)
2697            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2698            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2699            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2700            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2701            }            }
2702          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2703          }          }
2704    
2705        /* Control never gets here */        /* Control never gets here */
# Line 1941  for (;;) Line 2715  for (;;)
2715        length = 1;        length = 1;
2716        ecode++;        ecode++;
2717        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2718        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2719        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2720            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2721            MRRETURN(MATCH_NOMATCH);
2722            }
2723          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2724        }        }
2725      else      else
2726  #endif  #endif
2727    
2728      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2729        {        {
2730        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2731        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2732            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2733            MRRETURN(MATCH_NOMATCH);
2734            }
2735          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2736        ecode += 2;        ecode += 2;
2737        }        }
2738      break;      break;
# Line 1965  for (;;) Line 2747  for (;;)
2747        ecode++;        ecode++;
2748        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2749    
2750        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2751            {
2752            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2753            MRRETURN(MATCH_NOMATCH);
2754            }
2755    
2756        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2757        can use the fast lookup table. */        can use the fast lookup table. */
2758    
2759        if (fc < 128)        if (fc < 128)
2760          {          {
2761          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2762          }          }
2763    
2764        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 1989  for (;;) Line 2775  for (;;)
2775          if (fc != dc)          if (fc != dc)
2776            {            {
2777  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2778            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2779  #endif  #endif
2780              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2781            }            }
2782          }          }
2783        }        }
# Line 2000  for (;;) Line 2786  for (;;)
2786    
2787      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2788        {        {
2789        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2790        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2791            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2792            MRRETURN(MATCH_NOMATCH);
2793            }
2794          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2795        ecode += 2;        ecode += 2;
2796        }        }
2797      break;      break;
# Line 2054  for (;;) Line 2844  for (;;)
2844      case OP_MINQUERY:      case OP_MINQUERY:
2845      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2846      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2847    
2848      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2849      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2850      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2851    
2852      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2853    
2854      REPEATCHAR:      REPEATCHAR:
2855  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2069  for (;;) Line 2858  for (;;)
2858        length = 1;        length = 1;
2859        charptr = ecode;        charptr = ecode;
2860        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2861        ecode += length;        ecode += length;
2862    
2863        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2080  for (;;) Line 2868  for (;;)
2868  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2869          unsigned int othercase;          unsigned int othercase;
2870          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2871              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2872            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2873          else oclength = 0;          else oclength = 0;
2874  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2875    
2876          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2877            {            {
2878            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2879                memcmp(eptr, charptr, length) == 0) eptr += length;
2880  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2881            /* Need braces because of following else */            else if (oclength > 0 &&
2882            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2883                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2884    #endif  /* SUPPORT_UCP */
2885            else            else
2886              {              {
2887              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2888              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2889              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2890            }            }
2891    
2892          if (min == max) continue;          if (min == max) continue;
# Line 2109  for (;;) Line 2897  for (;;)
2897              {              {
2898              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2899              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2900              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2901              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2902                  memcmp(eptr, charptr, length) == 0) eptr += length;
2903  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2904              /* Need braces because of following else */              else if (oclength > 0 &&
2905              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2906                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2907    #endif  /* SUPPORT_UCP */
2908              else              else
2909                {                {
2910                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2911                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2912                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2913              }              }
2914            /* Control never gets here */            /* Control never gets here */
2915            }            }
# Line 2131  for (;;) Line 2919  for (;;)
2919            pp = eptr;            pp = eptr;
2920            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2921              {              {
2922              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2923              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2924  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2925              else if (oclength == 0) break;              else if (oclength > 0 &&
2926                         eptr <= md->end_subject - oclength &&
2927                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2928    #endif  /* SUPPORT_UCP */
2929              else              else
2930                {                {
2931                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2932                eptr += oclength;                break;
2933                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2934              }              }
2935    
2936            if (possessive) continue;            if (possessive) continue;
2937    
2938            for(;;)            for(;;)
2939             {              {
2940             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2941             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2942             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2943  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2944             eptr--;              eptr--;
2945             BACKCHAR(eptr);              BACKCHAR(eptr);
2946  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2947             eptr -= length;              eptr -= length;
2948  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2949             }              }
2950            }            }
2951          /* Control never gets here */          /* Control never gets here */
2952          }          }
# Line 2170  for (;;) Line 2959  for (;;)
2959  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2960    
2961      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2962        {  
2963        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2964    
2965      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2966      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2191  for (;;) Line 2978  for (;;)
2978        {        {
2979        fc = md->lcc[fc];        fc = md->lcc[fc];
2980        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2981          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2982            if (eptr >= md->end_subject)
2983              {
2984              SCHECK_PARTIAL();
2985              MRRETURN(MATCH_NOMATCH);
2986              }
2987            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988            }
2989        if (min == max) continue;        if (min == max) continue;
2990        if (minimize)        if (minimize)
2991          {          {
# Line 2199  for (;;) Line 2993  for (;;)
2993            {            {
2994            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2995            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2996            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2997                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2998              RRETURN(MATCH_NOMATCH);              {
2999                SCHECK_PARTIAL();
3000                MRRETURN(MATCH_NOMATCH);
3001                }
3002              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3003            }            }
3004          /* Control never gets here */          /* Control never gets here */
3005          }          }
# Line 2210  for (;;) Line 3008  for (;;)
3008          pp = eptr;          pp = eptr;
3009          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3010            {            {
3011            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
3012                {
3013                SCHECK_PARTIAL();
3014                break;
3015                }
3016              if (fc != md->lcc[*eptr]) break;
3017            eptr++;            eptr++;
3018            }            }
3019    
3020          if (possessive) continue;          if (possessive) continue;
3021    
3022          while (eptr >= pp)          while (eptr >= pp)
3023            {            {
3024            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3025            eptr--;            eptr--;
3026            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3027            }            }
3028          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3029          }          }
3030        /* Control never gets here */        /* Control never gets here */
3031        }        }
# Line 2229  for (;;) Line 3034  for (;;)
3034    
3035      else      else
3036        {        {
3037        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3038            {
3039            if (eptr >= md->end_subject)
3040              {
3041              SCHECK_PARTIAL();
3042              MRRETURN(MATCH_NOMATCH);
3043              }
3044            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3045            }
3046    
3047        if (min == max) continue;        if (min == max) continue;
3048    
3049        if (minimize)        if (minimize)
3050          {          {
3051          for (fi = min;; fi++)          for (fi = min;; fi++)
3052            {            {
3053            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3054            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3055            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3056              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3057                {
3058                SCHECK_PARTIAL();
3059                MRRETURN(MATCH_NOMATCH);
3060                }
3061              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3062            }            }
3063          /* Control never gets here */          /* Control never gets here */
3064          }          }
# Line 2247  for (;;) Line 3067  for (;;)
3067          pp = eptr;          pp = eptr;
3068          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3069            {            {
3070            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3071                {
3072                SCHECK_PARTIAL();
3073                break;
3074                }
3075              if (fc != *eptr) break;
3076            eptr++;            eptr++;
3077            }            }
3078          if (possessive) continue;          if (possessive) continue;
3079    
3080          while (eptr >= pp)          while (eptr >= pp)
3081            {            {
3082            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3083            eptr--;            eptr--;
3084            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3085            }            }
3086          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3087          }          }
3088        }        }
3089      /* Control never gets here */      /* Control never gets here */
# Line 2266  for (;;) Line 3092  for (;;)
3092      checking can be multibyte. */      checking can be multibyte. */
3093    
3094      case OP_NOT:      case OP_NOT:
3095      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3096          {
3097          SCHECK_PARTIAL();
3098          MRRETURN(MATCH_NOMATCH);
3099          }
3100      ecode++;      ecode++;
3101      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3102      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2275  for (;;) Line 3105  for (;;)
3105        if (c < 256)        if (c < 256)
3106  #endif  #endif
3107        c = md->lcc[c];        c = md->lcc[c];
3108        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3109        }        }
3110      else      else
3111        {        {
3112        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3113        }        }
3114      break;      break;
3115    
# Line 2343  for (;;) Line 3173  for (;;)
3173      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3174      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3175    
3176      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3177    
3178      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3179      fc = *ecode++;      fc = *ecode++;
3180    
3181      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2373  for (;;) Line 3200  for (;;)
3200          register unsigned int d;          register unsigned int d;
3201          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3202            {            {
3203              if (eptr >= md->end_subject)
3204                {
3205                SCHECK_PARTIAL();
3206                MRRETURN(MATCH_NOMATCH);
3207                }
3208            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3209            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3210            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3211            }            }
3212          }          }
3213        else        else
# Line 2384  for (;;) Line 3216  for (;;)
3216        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3217          {          {
3218          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3219            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3220              if (eptr >= md->end_subject)
3221                {
3222                SCHECK_PARTIAL();
3223                MRRETURN(MATCH_NOMATCH);
3224                }
3225              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3226              }
3227          }          }
3228    
3229        if (min == max) continue;        if (min == max) continue;
# Line 2400  for (;;) Line 3239  for (;;)
3239              {              {
3240              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3241              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3242                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3243                if (eptr >= md->end_subject)
3244                  {
3245                  SCHECK_PARTIAL();
3246                  MRRETURN(MATCH_NOMATCH);
3247                  }
3248              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3249              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3250              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3251              }              }
3252            }            }
3253          else          else
# Line 2414  for (;;) Line 3258  for (;;)
3258              {              {
3259              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3260              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3261              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3262                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3263                  {
3264                  SCHECK_PARTIAL();
3265                  MRRETURN(MATCH_NOMATCH);
3266                  }
3267                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3268              }              }
3269            }            }
3270          /* Control never gets here */          /* Control never gets here */
# Line 2435  for (;;) Line 3284  for (;;)
3284            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3285              {              {
3286              int len = 1;              int len = 1;
3287              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3288                  {
3289                  SCHECK_PARTIAL();
3290                  break;
3291                  }
3292              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3293              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3294              if (fc == d) break;              if (fc == d) break;
# Line 2456  for (;;) Line 3309  for (;;)
3309            {            {
3310            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3311              {              {
3312              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3313                  {
3314                  SCHECK_PARTIAL();
3315                  break;
3316                  }
3317                if (fc == md->lcc[*eptr]) break;
3318              eptr++;              eptr++;
3319              }              }
3320            if (possessive) continue;            if (possessive) continue;
# Line 2468  for (;;) Line 3326  for (;;)
3326              }              }
3327            }            }
3328    
3329          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3330          }          }
3331        /* Control never gets here */        /* Control never gets here */
3332        }        }
# Line 2484  for (;;) Line 3342  for (;;)
3342          register unsigned int d;          register unsigned int d;
3343          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3344            {            {
3345              if (eptr >= md->end_subject)
3346                {
3347                SCHECK_PARTIAL();
3348                MRRETURN(MATCH_NOMATCH);
3349                }
3350            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3351            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3352            }            }
3353          }          }
3354        else        else
# Line 2493  for (;;) Line 3356  for (;;)
3356        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3357          {          {
3358          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3359            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3360              if (eptr >= md->end_subject)
3361                {
3362                SCHECK_PARTIAL();
3363                MRRETURN(MATCH_NOMATCH);
3364                }
3365              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3366              }
3367          }          }
3368    
3369        if (min == max) continue;        if (min == max) continue;
# Line 2509  for (;;) Line 3379  for (;;)
3379              {              {
3380              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3381              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3382                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3383                if (eptr >= md->end_subject)
3384                  {
3385                  SCHECK_PARTIAL();
3386                  MRRETURN(MATCH_NOMATCH);
3387                  }
3388              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3389              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3390              }              }
3391            }            }
3392          else          else
# Line 2522  for (;;) Line 3397  for (;;)
3397              {              {
3398              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3399              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3400              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3401                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3402                  {
3403                  SCHECK_PARTIAL();
3404                  MRRETURN(MATCH_NOMATCH);
3405                  }
3406                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3407              }              }
3408            }            }
3409          /* Control never gets here */          /* Control never gets here */
# Line 2543  for (;;) Line 3423  for (;;)
3423            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3424              {              {
3425              int len = 1;              int len = 1;
3426              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3427                  {
3428                  SCHECK_PARTIAL();
3429                  break;
3430                  }
3431              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3432              if (fc == d) break;              if (fc == d) break;
3433              eptr += len;              eptr += len;
# Line 2563  for (;;) Line 3447  for (;;)
3447            {            {
3448            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3449              {              {
3450              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3451                  {
3452                  SCHECK_PARTIAL();
3453                  break;
3454                  }
3455                if (fc == *eptr) break;
3456              eptr++;              eptr++;
3457              }              }
3458            if (possessive) continue;            if (possessive) continue;
# Line 2575  for (;;) Line 3464  for (;;)
3464              }              }
3465            }            }
3466    
3467          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3468          }          }
3469        }        }
3470      /* Control never gets here */      /* Control never gets here */
# Line 2657  for (;;) Line 3546  for (;;)
3546    
3547      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3548      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3549      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3550      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3551      and single-bytes. */      and single-bytes. */
3552    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3553      if (min > 0)      if (min > 0)
3554        {        {
3555  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2672  for (;;) Line 3558  for (;;)
3558          switch(prop_type)          switch(prop_type)
3559            {            {
3560            case PT_ANY:            case PT_ANY:
3561            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3562            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3563              {              {
3564              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3565              GETCHARINC(c, eptr);                {
3566                  SCHECK_PARTIAL();
3567                  MRRETURN(MATCH_NOMATCH);
3568                  }
3569                GETCHARINCTEST(c, eptr);
3570              }              }
3571            break;            break;
3572    
3573            case PT_LAMP:            case PT_LAMP:
3574            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3575              {              {
3576              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3577              GETCHARINC(c, eptr);                {
3578              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3579                  MRRETURN(MATCH_NOMATCH);
3580                  }
3581                GETCHARINCTEST(c, eptr);
3582                prop_chartype = UCD_CHARTYPE(c);
3583              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3584                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3585                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3586                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3587              }              }
3588            break;            break;
3589    
3590            case PT_GC:            case PT_GC:
3591            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3592              {              {
3593              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3594              GETCHARINC(c, eptr);                {
3595              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3596                  MRRETURN(MATCH_NOMATCH);
3597                  }
3598                GETCHARINCTEST(c, eptr);
3599                prop_category = UCD_CATEGORY(c);
3600              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3601                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3602              }              }
3603            break;            break;
3604    
3605            case PT_PC:            case PT_PC:
3606            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3607              {              {
3608              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3609              GETCHARINC(c, eptr);                {
3610              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3611                  MRRETURN(MATCH_NOMATCH);
3612                  }
3613                GETCHARINCTEST(c, eptr);
3614                prop_chartype = UCD_CHARTYPE(c);
3615              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3616                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3617              }              }
3618            break;            break;
3619    
3620            case PT_SC:            case PT_SC:
3621            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3622              {              {
3623              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3624              GETCHARINC(c, eptr);                {
3625              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3626                  MRRETURN(MATCH_NOMATCH);
3627                  }
3628                GETCHARINCTEST(c, eptr);
3629                prop_script = UCD_SCRIPT(c);
3630              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3631                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3632              }              }
3633            break;            break;
3634    
3635            default:            case PT_ALNUM:
3636            RRETURN(PCRE_ERROR_INTERNAL);            for (i = 1; i <= min; i++)
3637            }              {
3638          }              if (eptr >= md->end_subject)
3639                  {
3640                  SCHECK_PARTIAL();
3641                  MRRETURN(MATCH_NOMATCH);
3642                  }
3643                GETCHARINCTEST(c, eptr);
3644                prop_category = UCD_CATEGORY(c);
3645                if ((prop_category == ucp_L || prop_category == ucp_N)
3646                       == prop_fail_result)
3647                  MRRETURN(MATCH_NOMATCH);
3648                }
3649              break;
3650    
3651              case PT_SPACE:    /* Perl space */
3652              for (i = 1; i <= min; i++)
3653                {
3654                if (eptr >= md->end_subject)
3655                  {
3656                  SCHECK_PARTIAL();
3657                  MRRETURN(MATCH_NOMATCH);
3658                  }
3659                GETCHARINCTEST(c, eptr);
3660                prop_category = UCD_CATEGORY(c);
3661                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3662                     c == CHAR_FF || c == CHAR_CR)
3663                       == prop_fail_result)
3664                  MRRETURN(MATCH_NOMATCH);
3665                }
3666              break;
3667    
3668              case PT_PXSPACE:  /* POSIX space */
3669              for (i = 1; i <= min; i++)
3670                {
3671                if (eptr >= md->end_subject)
3672                  {
3673                  SCHECK_PARTIAL();
3674                  MRRETURN(MATCH_NOMATCH);
3675                  }
3676                GETCHARINCTEST(c, eptr);
3677                prop_category = UCD_CATEGORY(c);
3678                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3679                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3680                       == prop_fail_result)
3681                  MRRETURN(MATCH_NOMATCH);
3682                }
3683              break;
3684    
3685              case PT_WORD:
3686              for (i = 1; i <= min; i++)
3687                {
3688                if (eptr >= md->end_subject)
3689                  {
3690                  SCHECK_PARTIAL();
3691                  MRRETURN(MATCH_NOMATCH);
3692                  }
3693                GETCHARINCTEST(c, eptr);
3694                prop_category = UCD_CATEGORY(c);
3695                if ((prop_category == ucp_L || prop_category == ucp_N ||
3696                     c == CHAR_UNDERSCORE)
3697                       == prop_fail_result)
3698                  MRRETURN(MATCH_NOMATCH);
3699                }
3700              break;
3701    
3702              /* This should not occur */
3703    
3704              default:
3705              RRETURN(PCRE_ERROR_INTERNAL);
3706              }
3707            }
3708    
3709        /* Match extended Unicode sequences. We will get here only if the        /* Match extended Unicode sequences. We will get here only if the
3710        support is in the binary; otherwise a compile-time error occurs. */        support is in the binary; otherwise a compile-time error occurs. */
# Line 2738  for (;;) Line 3713  for (;;)
3713          {          {
3714          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3715            {            {
3716              if (eptr >= md->end_subject)
3717                {
3718                SCHECK_PARTIAL();
3719                MRRETURN(MATCH_NOMATCH);
3720                }
3721            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3722            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3723            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3724            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3725              {              {
3726              int len = 1;              int len = 1;
3727              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3728                {                else { GETCHARLEN(c, eptr, len); }
3729                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3730              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3731              eptr += len;              eptr += len;
3732              }              }
# Line 2766  for (;;) Line 3744  for (;;)
3744          case OP_ANY:          case OP_ANY:
3745          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3746            {            {
3747            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3748                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3749              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3750                MRRETURN(MATCH_NOMATCH);
3751                }
3752              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3753              eptr++;
3754              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3755              }
3756            break;
3757    
3758            case OP_ALLANY:
3759            for (i = 1; i <= min; i++)
3760              {
3761              if (eptr >= md->end_subject)
3762                {
3763                SCHECK_PARTIAL();
3764                MRRETURN(MATCH_NOMATCH);
3765                }
3766            eptr++;            eptr++;
3767            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3768            }            }
3769          break;          break;
3770    
3771          case OP_ANYBYTE:          case OP_ANYBYTE:
3772            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3773          eptr += min;          eptr += min;
3774          break;          break;
3775    
3776          case OP_ANYNL:          case OP_ANYNL:
3777          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3778            {            {
3779            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3780                {
3781                SCHECK_PARTIAL();
3782                MRRETURN(MATCH_NOMATCH);
3783                }
3784            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3785            switch(c)            switch(c)
3786              {              {
3787              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3788              case 0x000d:              case 0x000d:
3789              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3790              break;              break;
3791    
3792              case 0x000a:              case 0x000a:
3793                break;
3794    
3795              case 0x000b:              case 0x000b:
3796              case 0x000c:              case 0x000c:
3797              case 0x0085:              case 0x0085:
3798              case 0x2028:              case 0x2028:
3799              case 0x2029:              case 0x2029:
3800                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3801                break;
3802                }
3803              }
3804            break;
3805    
3806            case OP_NOT_HSPACE:
3807            for (i = 1; i <= min; i++)
3808              {
3809              if (eptr >= md->end_subject)
3810                {
3811                SCHECK_PARTIAL();
3812                MRRETURN(MATCH_NOMATCH);
3813                }
3814              GETCHARINC(c, eptr);
3815              switch(c)
3816                {
3817                default: break;
3818                case 0x09:      /* HT */
3819                case 0x20:      /* SPACE */
3820                case 0xa0:      /* NBSP */
3821                case 0x1680:    /* OGHAM SPACE MARK */
3822                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3823                case 0x2000:    /* EN QUAD */
3824                case 0x2001:    /* EM QUAD */
3825                case 0x2002:    /* EN SPACE */
3826                case 0x2003:    /* EM SPACE */
3827                case 0x2004:    /* THREE-PER-EM SPACE */
3828                case 0x2005:    /* FOUR-PER-EM SPACE */
3829                case 0x2006:    /* SIX-PER-EM SPACE */
3830                case 0x2007:    /* FIGURE SPACE */
3831                case 0x2008:    /* PUNCTUATION SPACE */
3832                case 0x2009:    /* THIN SPACE */
3833                case 0x200A:    /* HAIR SPACE */
3834                case 0x202f:    /* NARROW NO-BREAK SPACE */
3835                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3836                case 0x3000:    /* IDEOGRAPHIC SPACE */
3837                MRRETURN(MATCH_NOMATCH);
3838                }
3839              }
3840            break;
3841    
3842            case OP_HSPACE:
3843            for (i = 1; i <= min; i++)
3844              {
3845              if (eptr >= md->end_subject)
3846                {
3847                SCHECK_PARTIAL();
3848                MRRETURN(MATCH_NOMATCH);
3849                }
3850              GETCHARINC(c, eptr);
3851              switch(c)
3852                {
3853                default: MRRETURN(MATCH_NOMATCH);
3854                case 0x09:      /* HT */
3855                case 0x20:      /* SPACE */
3856                case 0xa0:      /* NBSP */
3857                case 0x1680:    /* OGHAM SPACE MARK */
3858                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3859                case 0x2000:    /* EN QUAD */
3860                case 0x2001:    /* EM QUAD */
3861                case 0x2002:    /* EN SPACE */
3862                case 0x2003:    /* EM SPACE */
3863                case 0x2004:    /* THREE-PER-EM SPACE */
3864                case 0x2005:    /* FOUR-PER-EM SPACE */
3865                case 0x2006:    /* SIX-PER-EM SPACE */
3866                case 0x2007:    /* FIGURE SPACE */
3867                case 0x2008:    /* PUNCTUATION SPACE */
3868                case 0x2009:    /* THIN SPACE */
3869                case 0x200A:    /* HAIR SPACE */
3870                case 0x202f:    /* NARROW NO-BREAK SPACE */
3871                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3872                case 0x3000:    /* IDEOGRAPHIC SPACE */
3873                break;
3874                }
3875              }
3876            break;
3877    
3878            case OP_NOT_VSPACE:
3879            for (i = 1; i <= min; i++)
3880              {
3881              if (eptr >= md->end_subject)
3882                {
3883                SCHECK_PARTIAL();
3884                MRRETURN(MATCH_NOMATCH);
3885                }
3886              GETCHARINC(c, eptr);
3887              switch(c)
3888                {
3889                default: break;
3890                case 0x0a:      /* LF */
3891                case 0x0b:      /* VT */
3892                case 0x0c:      /* FF */
3893                case 0x0d:      /* CR */
3894                case 0x85:      /* NEL */
3895                case 0x2028:    /* LINE SEPARATOR */
3896                case 0x2029:    /* PARAGRAPH SEPARATOR */
3897                MRRETURN(MATCH_NOMATCH);
3898                }
3899              }
3900            break;
3901    
3902            case OP_VSPACE:
3903            for (i = 1; i <= min; i++)
3904              {
3905              if (eptr >= md->end_subject)
3906                {
3907                SCHECK_PARTIAL();
3908                MRRETURN(MATCH_NOMATCH);
3909                }
3910              GETCHARINC(c, eptr);
3911              switch(c)
3912                {
3913                default: MRRETURN(MATCH_NOMATCH);
3914                case 0x0a:      /* LF */
3915                case 0x0b:      /* VT */
3916                case 0x0c:      /* FF */
3917                case 0x0d:      /* CR */
3918                case 0x85:      /* NEL */
3919                case 0x2028:    /* LINE SEPARATOR */
3920                case 0x2029:    /* PARAGRAPH SEPARATOR */
3921              break;              break;
3922              }              }
3923            }            }
# Line 2803  for (;;) Line 3926  for (;;)
3926          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3927          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3928            {            {
3929            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3930                {
3931                SCHECK_PARTIAL();
3932                MRRETURN(MATCH_NOMATCH);
3933                }
3934            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3935            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3936              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3937            }            }
3938          break;          break;
3939    
3940          case OP_DIGIT:          case OP_DIGIT:
3941          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3942            {            {
3943            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3944               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3945              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3946                MRRETURN(MATCH_NOMATCH);
3947                }
3948              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3949                MRRETURN(MATCH_NOMATCH);
3950            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3951            }            }
3952          break;          break;
# Line 2823  for (;;) Line 3954  for (;;)
3954          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3955          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3956            {            {
3957            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3958               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3959              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3960            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3961                }
3962              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3963                MRRETURN(MATCH_NOMATCH);
3964              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3965            }            }
3966          break;          break;
3967    
3968          case OP_WHITESPACE:          case OP_WHITESPACE:
3969          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3970            {            {
3971            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3972               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3973              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3974                MRRETURN(MATCH_NOMATCH);
3975                }
3976              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3977                MRRETURN(MATCH_NOMATCH);
3978            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3979            }            }
3980          break;          break;
# Line 2843  for (;;) Line 3982  for (;;)
3982          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3983          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3984            {            {
3985            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3986               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3987              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3988            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3989                }
3990              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3991                MRRETURN(MATCH_NOMATCH);
3992              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3993            }            }
3994          break;          break;
3995    
3996          case OP_WORDCHAR:          case OP_WORDCHAR:
3997          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3998            {            {
3999            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
4000               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
4001              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4002                MRRETURN(MATCH_NOMATCH);
4003                }
4004              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
4005                MRRETURN(MATCH_NOMATCH);
4006            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4007            }            }
4008          break;          break;
# Line 2868  for (;;) Line 4015  for (;;)
4015  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
4016    
4017        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4018        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
4019    
4020        switch(ctype)        switch(ctype)
4021          {          {
4022          case OP_ANY:          case OP_ANY:
4023          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
4024            {            {
4025            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
4026              {              {
4027              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4028              eptr++;              MRRETURN(MATCH_NOMATCH);
4029              }              }
4030              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4031              eptr++;
4032            }            }
         else eptr += min;  
4033          break;          break;
4034    
4035          case OP_ANYBYTE:          case OP_ALLANY:
4036            if (eptr > md->end_subject - min)
4037              {
4038              SCHECK_PARTIAL();
4039              MRRETURN(MATCH_NOMATCH);
4040              }
4041          eptr += min;          eptr += min;
4042          break;          break;
4043    
4044          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
4045          bytes are present in this case. */          if (eptr > md->end_subject - min)
4046              {
4047              SCHECK_PARTIAL();
4048              MRRETURN(MATCH_NOMATCH);
4049              }
4050            eptr += min;
4051            break;
4052    
4053          case OP_ANYNL:          case OP_ANYNL:
4054          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4055            {            {
4056            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4057                {
4058                SCHECK_PARTIAL();
4059                MRRETURN(MATCH_NOMATCH);
4060                }
4061            switch(*eptr++)            switch(*eptr++)
4062              {              {
4063              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4064              case 0x000d:              case 0x000d:
4065              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4066              break;              break;
4067              case 0x000a:              case 0x000a:
4068                break;
4069    
4070              case 0x000b:              case 0x000b:
4071              case 0x000c:              case 0x000c:
4072              case 0x0085:              case 0x0085:
4073                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4074                break;
4075                }
4076              }
4077            break;
4078    
4079            case OP_NOT_HSPACE:
4080            for (i = 1; i <= min; i++)
4081              {
4082              if (eptr >= md->end_subject)
4083                {
4084                SCHECK_PARTIAL();
4085                MRRETURN(MATCH_NOMATCH);
4086                }
4087              switch(*eptr++)
4088                {
4089                default: break;
4090                case 0x09:      /* HT */
4091                case 0x20:      /* SPACE */
4092                case 0xa0:      /* NBSP */
4093                MRRETURN(MATCH_NOMATCH);
4094                }
4095              }
4096            break;
4097    
4098            case OP_HSPACE:
4099            for (i = 1; i <= min; i++)
4100              {
4101              if (eptr >= md->end_subject)
4102                {
4103                SCHECK_PARTIAL();
4104                MRRETURN(MATCH_NOMATCH);
4105                }
4106              switch(*eptr++)
4107                {
4108                default: MRRETURN(MATCH_NOMATCH);
4109                case 0x09:      /* HT */
4110                case 0x20:      /* SPACE */
4111                case 0xa0:      /* NBSP */
4112                break;
4113                }
4114              }
4115            break;
4116    
4117            case OP_NOT_VSPACE:
4118            for (i = 1; i <= min; i++)
4119              {
4120              if (eptr >= md->end_subject)
4121                {
4122                SCHECK_PARTIAL();
4123                MRRETURN(MATCH_NOMATCH);
4124                }
4125              switch(*eptr++)
4126                {
4127                default: break;
4128                case 0x0a:      /* LF */
4129                case 0x0b:      /* VT */
4130                case 0x0c:      /* FF */
4131                case 0x0d:      /* CR */
4132                case 0x85:      /* NEL */
4133                MRRETURN(MATCH_NOMATCH);
4134                }
4135              }
4136            break;
4137    
4138            case OP_VSPACE:
4139            for (i = 1; i <= min; i++)
4140              {
4141              if (eptr >= md->end_subject)
4142                {
4143                SCHECK_PARTIAL();
4144                MRRETURN(MATCH_NOMATCH);
4145                }
4146              switch(*eptr++)
4147                {
4148                default: MRRETURN(MATCH_NOMATCH);
4149                case 0x0a:      /* LF */
4150                case 0x0b:      /* VT */
4151                case 0x0c:      /* FF */
4152                case 0x0d:      /* CR */
4153                case 0x85:      /* NEL */
4154              break;              break;
4155              }              }
4156            }            }
# Line 2913  for (;;) Line 4158  for (;;)
4158    
4159          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4160          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4161            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            {
4162              if (eptr >= md->end_subject)
4163                {
4164                SCHECK_PARTIAL();
4165                MRRETURN(MATCH_NOMATCH);
4166                }
4167              if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4168              }
4169          break;          break;
4170    
4171          case OP_DIGIT:          case OP_DIGIT:
4172          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4173            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            {
4174              if (eptr >= md->end_subject)
4175                {
4176                SCHECK_PARTIAL();
4177                MRRETURN(MATCH_NOMATCH);
4178                }
4179              if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4180              }
4181          break;          break;
4182    
4183          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4184          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4185            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            {
4186              if (eptr >= md->end_subject)
4187                {
4188                SCHECK_PARTIAL();
4189                MRRETURN(MATCH_NOMATCH);
4190                }
4191              if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4192              }
4193          break;          break;
4194    
4195          case OP_WHITESPACE:          case OP_WHITESPACE:
4196          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4197            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            {
4198              if (eptr >= md->end_subject)
4199                {
4200                SCHECK_PARTIAL();
4201                MRRETURN(MATCH_NOMATCH);
4202                }
4203