/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 512 by ph10, Tue Mar 30 11:11:52 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 119  Returns:     nothing
119  static void  static void
120  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121  {  {
122  int c;  unsigned int c;
123  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124  while (length-- > 0)  while (length-- > 0)
125    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 133  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 150  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 186  calls by keeping local variables that ne Line 230  calls by keeping local variables that ne
230  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58 };
259    
260  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
261  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
262    actually used in this definition. */
263    
264  #ifndef NO_RECURSE  #ifndef NO_RECURSE
265  #define REGISTER register  #define REGISTER register
266  #ifdef DEBUG  
267  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
268    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
269    { \    { \
270    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
271    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
272    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
273    }    }
274  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 277  versions and production versions. */
277    return ra; \    return ra; \
278    }    }
279  #else  #else
280  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
282  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
283  #endif  #endif
284    
285  #else  #else
286    
287    
288  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
289  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
290  match(), which never changes. */  argument of match(), which never changes. */
291    
292  #define REGISTER  #define REGISTER
293    
294  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
295    {\    {\
296    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
297    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
298      {\    newframe->Xeptr = ra;\
299      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
300      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
301      newframe->Xoffset_top = rc;\    newframe->Xmarkptr = markptr;\
302      newframe->Xims = re;\    newframe->Xoffset_top = rc;\
303      newframe->Xeptrb = rf;\    newframe->Xims = re;\
304      newframe->Xflags = rg;\    newframe->Xeptrb = rf;\
305      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xflags = rg;\
306      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
307      frame = newframe;\    newframe->Xprevframe = frame;\
308      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
309      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
310      }\    goto HEAP_RECURSE;\
311    else\    L_##rw:\
312      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
313    }    }
314    
315  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 254  match(), which never changes. */ Line 319  match(), which never changes. */
319    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
320    if (frame != NULL)\    if (frame != NULL)\
321      {\      {\
322      frame->Xresult = ra;\      rrc = ra;\
323      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
324      }\      }\
325    return ra;\    return ra;\
326    }    }
# Line 269  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function arguments that may change */    /* Function arguments that may change */
335    
336    const uschar *Xeptr;    USPTR Xeptr;
337    const uschar *Xecode;    const uschar *Xecode;
338      USPTR Xmstart;
339      USPTR Xmarkptr;
340    int Xoffset_top;    int Xoffset_top;
341    long int Xims;    long int Xims;
342    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 279  typedef struct heapframe { Line 345  typedef struct heapframe {
345    
346    /* Function local variables */    /* Function local variables */
347    
348    const uschar *Xcallpat;    USPTR Xcallpat;
349    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
350    const uschar *Xdata;    USPTR Xcharptr;
351    const uschar *Xnext;  #endif
352    const uschar *Xpp;    USPTR Xdata;
353    const uschar *Xprev;    USPTR Xnext;
354    const uschar *Xsaved_eptr;    USPTR Xpp;
355      USPTR Xprev;
356      USPTR Xsaved_eptr;
357    
358    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
359    
360    BOOL Xcur_is_word;    BOOL Xcur_is_word;
361    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
362    BOOL Xprev_is_word;    BOOL Xprev_is_word;
363    
364    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 370  typedef struct heapframe {
370    int Xprop_category;    int Xprop_category;
371    int Xprop_chartype;    int Xprop_chartype;
372    int Xprop_script;    int Xprop_script;
373    int *Xprop_test_variable;    int Xoclength;
374      uschar Xocchars[8];
375  #endif  #endif
376    
377      int Xcodelink;
378    int Xctype;    int Xctype;
379    int Xfc;    unsigned int Xfc;
380    int Xfi;    int Xfi;
381    int Xlength;    int Xlength;
382    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 390  typedef struct heapframe {
390    
391    eptrblock Xnewptrb;    eptrblock Xnewptrb;
392    
393    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
394    
395    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
396    
397  } heapframe;  } heapframe;
398    
# Line 340  typedef struct heapframe { Line 408  typedef struct heapframe {
408  *         Match from current position            *  *         Match from current position            *
409  *************************************************/  *************************************************/
410    
411  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
412  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
413  same response.  same response. */
414    
415    /* These macros pack up tests that are used for partial matching, and which
416    appears several times in the code. We set the "hit end" flag if the pointer is
417    at the end of the subject and also past the start of the subject (i.e.
418    something has been matched). For hard partial matching, we then return
419    immediately. The second one is used when we already know we are past the end of
420    the subject. */
421    
422    #define CHECK_PARTIAL()\
423      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
424        {\
425        md->hitend = TRUE;\
426        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
427        }
428    
429    #define SCHECK_PARTIAL()\
430      if (md->partial != 0 && eptr > mstart)\
431        {\
432        md->hitend = TRUE;\
433        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
434        }
435    
436  Performance note: It might be tempting to extract commonly used fields from the  
437  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
438    the md structure (e.g. utf8, end_subject) into individual variables to improve
439  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
440  made performance worse.  made performance worse.
441    
442  Arguments:  Arguments:
443     eptr        pointer in subject     eptr        pointer to current character in subject
444     ecode       position in code     ecode       pointer to current position in compiled code
445       mstart      pointer to the current match start position (can be modified
446                     by encountering \K)
447       markptr     pointer to the most recent MARK name, or NULL
448     offset_top  current top pointer     offset_top  current top pointer
449     md          pointer to "static" info for the match     md          pointer to "static" info for the match
450     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 452  Arguments:
452                   brackets - for testing for empty matches                   brackets - for testing for empty matches
453     flags       can contain     flags       can contain
454                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
455                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
456                       group that can match an empty string
457     rdepth      the recursion depth     rdepth      the recursion depth
458    
459  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
460                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
461                   a negative MATCH_xxx value for PRUNE, SKIP, etc
462                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
463                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
464  */  */
465    
466  static int  static int
467  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
468    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
469    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
470  {  {
471  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
472  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
473  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
474    
475  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
476  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
477  register unsigned int  c;  /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
478  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
479    
480    BOOL minimize, possessive; /* Quantifier options */
481    int condcode;
482    
483  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
484  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
485  heap storage. Set up the top-level frame here; others are obtained from the  heap storage. Set up the top-level frame here; others are obtained from the
# Line 398  frame->Xprevframe = NULL;            /* Line 493  frame->Xprevframe = NULL;            /*
493    
494  frame->Xeptr = eptr;  frame->Xeptr = eptr;
495  frame->Xecode = ecode;  frame->Xecode = ecode;
496    frame->Xmstart = mstart;
497    frame->Xmarkptr = markptr;
498  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
499  frame->Xims = ims;  frame->Xims = ims;
500  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 509  HEAP_RECURSE:
509    
510  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
511  #define ecode              frame->Xecode  #define ecode              frame->Xecode
512    #define mstart             frame->Xmstart
513    #define markptr            frame->Xmarkptr
514  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
515  #define ims                frame->Xims  #define ims                frame->Xims
516  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 424  HEAP_RECURSE: Line 523  HEAP_RECURSE:
523  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
524  #endif  #endif
525  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
526    #define codelink           frame->Xcodelink
527  #define data               frame->Xdata  #define data               frame->Xdata
528  #define next               frame->Xnext  #define next               frame->Xnext
529  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 434  HEAP_RECURSE: Line 534  HEAP_RECURSE:
534    
535  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
536  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
537  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
538    
539  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 545  HEAP_RECURSE:
545  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
546  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
547  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
548  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
549    #define occhars            frame->Xocchars
550  #endif  #endif
551    
552  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 570  HEAP_RECURSE:
570  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
571  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
572    
573  #else  #else         /* NO_RECURSE not defined */
574  #define fi i  #define fi i
575  #define fc c  #define fc c
576    
# Line 489  recursion_info new_recursive;      /* wi Line 589  recursion_info new_recursive;      /* wi
589                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
590  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
591  BOOL condition;  BOOL condition;
 BOOL minimize;  
592  BOOL prev_is_word;  BOOL prev_is_word;
593    
594  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 600  int prop_fail_result;
600  int prop_category;  int prop_category;
601  int prop_chartype;  int prop_chartype;
602  int prop_script;  int prop_script;
603  int *prop_test_variable;  int oclength;
604    uschar occhars[8];
605  #endif  #endif
606    
607    int codelink;
608  int ctype;  int ctype;
609  int length;  int length;
610  int max;  int max;
# Line 516  int save_offset1, save_offset2, save_off Line 617  int save_offset1, save_offset2, save_off
617  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
618    
619  eptrblock newptrb;  eptrblock newptrb;
620  #endif  #endif     /* NO_RECURSE */
621    
622  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
623  variables. */  variables. */
# Line 524  variables. */ Line 625  variables. */
625  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
626  prop_value = 0;  prop_value = 0;
627  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
628  #endif  #endif
629    
630    
631  /* This label is used for tail recursion, which is used in a few cases even  /* This label is used for tail recursion, which is used in a few cases even
632  when NO_RECURSE is not defined, in order to reduce the amount of stack that is  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
633  used. Thanks to Ian Taylor for noticing this possibility and sending the  used. Thanks to Ian Taylor for noticing this possibility and sending the
# Line 537  TAIL_RECURSE: Line 638  TAIL_RECURSE:
638  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
639  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
640  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
641  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
642  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
643  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
644  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
645    
646    #ifdef SUPPORT_UTF8
647    utf8 = md->utf8;       /* Local copy of the flag */
648    #else
649    utf8 = FALSE;
650    #endif
651    
652  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
653  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
654    
# Line 550  if (rdepth >= md->match_limit_recursion) Line 657  if (rdepth >= md->match_limit_recursion)
657    
658  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
659    
660  #ifdef SUPPORT_UTF8  /* At the start of a group with an unlimited repeat that may match an empty
661  utf8 = md->utf8;       /* Local copy of the flag */  string, the match_cbegroup flag is set. When this is the case, add the current
662  #else  subject pointer to the chain of such remembered pointers, to be checked when we
663  utf8 = FALSE;  hit the closing ket, in order to break infinite loops that match no characters.
664  #endif  When match() is called in other circumstances, don't add to the chain. The
665    match_cbegroup flag must NOT be used with tail recursion, because the memory
666  /* At the start of a bracketed group, add the current subject pointer to the  block that is used is on the stack, so a new one may be required for each
667  stack of such pointers, to be re-instated at the end of the group when we hit  match(). */
 the closing ket. When match() is called in other circumstances, we don't add to  
 this stack. */  
668    
669  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
670    {    {
   newptrb.epb_prev = eptrb;  
671    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
672      newptrb.epb_prev = eptrb;
673    eptrb = &newptrb;    eptrb = &newptrb;
674    }    }
675    
676  /* Now start processing the operations. */  /* Now start processing the opcodes. */
677    
678  for (;;)  for (;;)
679    {    {
680      minimize = possessive = FALSE;
681    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
682    
683    if (md->partial &&    switch(op)
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
684      {      {
685      number = op - OP_BRA;      case OP_MARK:
686        markptr = ecode + 2;
687      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
688      number from a dummy opcode at the start. */        ims, eptrb, flags, RM55);
689    
690      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
691        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
692        argument. It is passed back in md->start_match_ptr (an overloading of that
693        variable). If it does match, we reset that variable to the current subject
694        position and return MATCH_SKIP. Otherwise, pass back the return code
695        unaltered. */
696    
697        if (rrc == MATCH_SKIP_ARG &&
698            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
699          {
700          md->start_match_ptr = eptr;
701          RRETURN(MATCH_SKIP);
702          }
703    
704        if (md->mark == NULL) md->mark = markptr;
705        RRETURN(rrc);
706    
707        case OP_FAIL:
708        MRRETURN(MATCH_NOMATCH);
709    
710        case OP_COMMIT:
711        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
712          ims, eptrb, flags, RM52);
713        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
714        MRRETURN(MATCH_COMMIT);
715    
716        case OP_PRUNE:
717        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718          ims, eptrb, flags, RM51);
719        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
720        MRRETURN(MATCH_PRUNE);
721    
722        case OP_PRUNE_ARG:
723        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
724          ims, eptrb, flags, RM56);
725        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
726        md->mark = ecode + 2;
727        RRETURN(MATCH_PRUNE);
728    
729        case OP_SKIP:
730        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
731          ims, eptrb, flags, RM53);
732        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
733        md->start_match_ptr = eptr;   /* Pass back current position */
734        MRRETURN(MATCH_SKIP);
735    
736        case OP_SKIP_ARG:
737        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
738          ims, eptrb, flags, RM57);
739        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
740    
741        /* Pass back the current skip name by overloading md->start_match_ptr and
742        returning the special MATCH_SKIP_ARG return code. This will either be
743        caught by a matching MARK, or get to the top, where it is treated the same
744        as PRUNE. */
745    
746        md->start_match_ptr = ecode + 2;
747        RRETURN(MATCH_SKIP_ARG);
748    
749        case OP_THEN:
750        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
751          ims, eptrb, flags, RM54);
752        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
753        MRRETURN(MATCH_THEN);
754    
755        case OP_THEN_ARG:
756        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757          ims, eptrb, flags, RM58);
758        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
759        md->mark = ecode + 2;
760        RRETURN(MATCH_THEN);
761    
762        /* Handle a capturing bracket. If there is space in the offset vector, save
763        the current subject position in the working slot at the top of the vector.
764        We mustn't change the current values of the data slot, because they may be
765        set from a previous iteration of this group, and be referred to by a
766        reference inside the group.
767    
768        If the bracket fails to match, we need to restore this value and also the
769        values of the final offsets, in case they were set by a previous iteration
770        of the same bracket.
771    
772        If there isn't enough space in the offset vector, treat this as if it were
773        a non-capturing bracket. Don't worry about setting the flag for the error
774        case here; that is handled in the code for KET. */
775    
776        case OP_CBRA:
777        case OP_SCBRA:
778        number = GET2(ecode, 1+LINK_SIZE);
779      offset = number << 1;      offset = number << 1;
780    
781  #ifdef DEBUG  #ifdef PCRE_DEBUG
782      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
783        printf("subject=");
784      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
785      printf("\n");      printf("\n");
786  #endif  #endif
# Line 624  for (;;) Line 795  for (;;)
795        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
796        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
797    
798          flags = (op == OP_SCBRA)? match_cbegroup : 0;
799        do        do
800          {          {
801          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
802            match_isgroup);            ims, eptrb, flags, RM1);
803          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
804          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
805          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
806          }          }
# Line 640  for (;;) Line 812  for (;;)
812        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
813        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
814    
815          if (rrc != MATCH_THEN) md->mark = markptr;
816        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
817        }        }
818    
819      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
820        as a non-capturing bracket. */
821    
822      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   
   /* Other types of node can be handled by a switch */  
   
   switch(op)  
     {  
     case OP_BRA:     /* Non-capturing bracket: optimized */  
     DPRINTF(("start bracket 0\n"));  
824    
825      /* Loop for all the alternatives */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
826    
827        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
829    
830        /* Non-capturing bracket. Loop for all the alternatives. When we get to the
831        final alternative within the brackets, we would return the result of a
832        recursive call to match() whatever happened. We can reduce stack usage by
833        turning this into a tail recursion, except in the case when match_cbegroup
834        is set.*/
835    
836        case OP_BRA:
837        case OP_SBRA:
838        DPRINTF(("start non-capturing bracket\n"));
839        flags = (op >= OP_SBRA)? match_cbegroup : 0;
840      for (;;)      for (;;)
841        {        {
842        /* When we get to the final alternative within the brackets, we would        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
843        return the result of a recursive call to match() whatever happened. We          {
844        can reduce stack usage by turning this into a tail recursion. */          if (flags == 0)    /* Not a possibly empty group */
845              {
846        if (ecode[GET(ecode, 1)] != OP_ALT)            ecode += _pcre_OP_lengths[*ecode];
847         {            DPRINTF(("bracket 0 tail recursion\n"));
848         ecode += 1 + LINK_SIZE;            goto TAIL_RECURSE;
849         flags = match_isgroup;            }
850         DPRINTF(("bracket 0 tail recursion\n"));  
851         goto TAIL_RECURSE;          /* Possibly empty group; can't use tail recursion. */
852         }  
853            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
854              eptrb, flags, RM48);
855            if (rrc == MATCH_NOMATCH) md->mark = markptr;
856            RRETURN(rrc);
857            }
858    
859        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
860        otherwise return. */        otherwise return. */
861    
862        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
863          match_isgroup);          eptrb, flags, RM2);
864        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
865        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
866        }        }
867      /* Control never reaches here. */      /* Control never reaches here. */
# Line 688  for (;;) Line 873  for (;;)
873      obeyed, we can use tail recursion to avoid using another stack frame. */      obeyed, we can use tail recursion to avoid using another stack frame. */
874    
875      case OP_COND:      case OP_COND:
876      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
877        codelink= GET(ecode, 1);
878    
879        /* Because of the way auto-callout works during compile, a callout item is
880        inserted between OP_COND and an assertion condition. */
881    
882        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
883          {
884          if (pcre_callout != NULL)
885            {
886            pcre_callout_block cb;
887            cb.version          = 1;   /* Version 1 of the callout block */
888            cb.callout_number   = ecode[LINK_SIZE+2];
889            cb.offset_vector    = md->offset_vector;
890            cb.subject          = (PCRE_SPTR)md->start_subject;
891            cb.subject_length   = md->end_subject - md->start_subject;
892            cb.start_match      = mstart - md->start_subject;
893            cb.current_position = eptr - md->start_subject;
894            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
895            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
896            cb.capture_top      = offset_top/2;
897            cb.capture_last     = md->capture_last;
898            cb.callout_data     = md->callout_data;
899            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
900            if (rrc < 0) RRETURN(rrc);
901            }
902          ecode += _pcre_OP_lengths[OP_CALLOUT];
903          }
904    
905        condcode = ecode[LINK_SIZE+1];
906    
907        /* Now see what the actual condition is */
908    
909        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
910          {
911          if (md->recursive == NULL)                /* Not recursing => FALSE */
912            {
913            condition = FALSE;
914            ecode += GET(ecode, 1);
915            }
916          else
917            {
918            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
919            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
920    
921            /* If the test is for recursion into a specific subpattern, and it is
922            false, but the test was set up by name, scan the table to see if the
923            name refers to any other numbers, and test them. The condition is true
924            if any one is set. */
925    
926            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
927              {
928              uschar *slotA = md->name_table;
929              for (i = 0; i < md->name_count; i++)
930                {
931                if (GET2(slotA, 0) == recno) break;
932                slotA += md->name_entry_size;
933                }
934    
935              /* Found a name for the number - there can be only one; duplicate
936              names for different numbers are allowed, but not vice versa. First
937              scan down for duplicates. */
938    
939              if (i < md->name_count)
940                {
941                uschar *slotB = slotA;
942                while (slotB > md->name_table)
943                  {
944                  slotB -= md->name_entry_size;
945                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
946                    {
947                    condition = GET2(slotB, 0) == md->recursive->group_num;
948                    if (condition) break;
949                    }
950                  else break;
951                  }
952    
953                /* Scan up for duplicates */
954    
955                if (!condition)
956                  {
957                  slotB = slotA;
958                  for (i++; i < md->name_count; i++)
959                    {
960                    slotB += md->name_entry_size;
961                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
962                      {
963                      condition = GET2(slotB, 0) == md->recursive->group_num;
964                      if (condition) break;
965                      }
966                    else break;
967                    }
968                  }
969                }
970              }
971    
972            /* Chose branch according to the condition */
973    
974            ecode += condition? 3 : GET(ecode, 1);
975            }
976          }
977    
978        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
979        {        {
980        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
981        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
982          (md->recursive != NULL) :  
983          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
984        ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));        scan the table to see if the name refers to any other numbers, and test
985        flags = match_isgroup;        them. The condition is true if any one is set. This is tediously similar
986        goto TAIL_RECURSE;        to the code above, but not close enough to try to amalgamate. */
987    
988          if (!condition && condcode == OP_NCREF)
989            {
990            int refno = offset >> 1;
991            uschar *slotA = md->name_table;
992    
993            for (i = 0; i < md->name_count; i++)
994              {
995              if (GET2(slotA, 0) == refno) break;
996              slotA += md->name_entry_size;
997              }
998    
999            /* Found a name for the number - there can be only one; duplicate names
1000            for different numbers are allowed, but not vice versa. First scan down
1001            for duplicates. */
1002    
1003            if (i < md->name_count)
1004              {
1005              uschar *slotB = slotA;
1006              while (slotB > md->name_table)
1007                {
1008                slotB -= md->name_entry_size;
1009                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1010                  {
1011                  offset = GET2(slotB, 0) << 1;
1012                  condition = offset < offset_top &&
1013                    md->offset_vector[offset] >= 0;
1014                  if (condition) break;
1015                  }
1016                else break;
1017                }
1018    
1019              /* Scan up for duplicates */
1020    
1021              if (!condition)
1022                {
1023                slotB = slotA;
1024                for (i++; i < md->name_count; i++)
1025                  {
1026                  slotB += md->name_entry_size;
1027                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1028                    {
1029                    offset = GET2(slotB, 0) << 1;
1030                    condition = offset < offset_top &&
1031                      md->offset_vector[offset] >= 0;
1032                    if (condition) break;
1033                    }
1034                  else break;
1035                  }
1036                }
1037              }
1038            }
1039    
1040          /* Chose branch according to the condition */
1041    
1042          ecode += condition? 3 : GET(ecode, 1);
1043          }
1044    
1045        else if (condcode == OP_DEF)     /* DEFINE - always false */
1046          {
1047          condition = FALSE;
1048          ecode += GET(ecode, 1);
1049        }        }
1050    
1051      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1052      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1053        assertion. */
1054    
1055      else      else
1056        {        {
1057        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1058            match_condassert | match_isgroup);            match_condassert, RM3);
1059        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1060          {          {
1061          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1062            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1063          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1064          }          }
1065        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1066          {          {
1067          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1068          }          }
1069        else ecode += GET(ecode, 1);        else
1070            {
1071            condition = FALSE;
1072            ecode += codelink;
1073            }
1074          }
1075    
1076        /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1077        we can use tail recursion to avoid using another stack frame. */      we can use tail recursion to avoid using another stack frame, except when
1078        match_cbegroup is required for an unlimited repeat of a possibly empty
1079        group. If the second alternative doesn't exist, we can just plough on. */
1080    
1081        if (condition || *ecode == OP_ALT)
1082          {
1083        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1084        flags = match_isgroup;        if (op == OP_SCOND)        /* Possibly empty group */
1085        goto TAIL_RECURSE;          {
1086            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1087            RRETURN(rrc);
1088            }
1089          else                       /* Group must match something */
1090            {
1091            flags = 0;
1092            goto TAIL_RECURSE;
1093            }
1094        }        }
1095      /* Control never reaches here */      else                         /* Condition false & no alternative */
1096          {
1097          ecode += 1 + LINK_SIZE;
1098          }
1099        break;
1100    
1101    
1102        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1103        to close any currently open capturing brackets. */
1104    
1105        case OP_CLOSE:
1106        number = GET2(ecode, 1);
1107        offset = number << 1;
1108    
1109      /* Skip over conditional reference or large extraction number data if  #ifdef PCRE_DEBUG
1110      encountered. */        printf("end bracket %d at *ACCEPT", number);
1111          printf("\n");
1112    #endif
1113    
1114      case OP_CREF:      md->capture_last = number;
1115      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1116          {
1117          md->offset_vector[offset] =
1118            md->offset_vector[md->offset_end - number];
1119          md->offset_vector[offset+1] = eptr - md->start_subject;
1120          if (offset_top <= offset) offset_top = offset + 2;
1121          }
1122      ecode += 3;      ecode += 3;
1123      break;      break;
1124    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1125    
1126        /* End of the pattern, either real or forced. If we are in a top-level
1127        recursion, we should restore the offsets appropriately and continue from
1128        after the call. */
1129    
1130        case OP_ACCEPT:
1131      case OP_END:      case OP_END:
1132      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1133        {        {
# Line 745  for (;;) Line 1136  for (;;)
1136        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1137        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1138          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1139        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1140        ims = original_ims;        ims = original_ims;
1141        ecode = rec->after_call;        ecode = rec->after_call;
1142        break;        break;
1143        }        }
1144    
1145      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1146      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1147        the subject. In both cases, backtracking will then try other alternatives,
1148        if any. */
1149    
1150        if (eptr == mstart &&
1151            (md->notempty ||
1152              (md->notempty_atstart &&
1153                mstart == md->start_subject + md->start_offset)))
1154          MRRETURN(MATCH_NOMATCH);
1155    
1156        /* Otherwise, we have a match. */
1157    
1158        md->end_match_ptr = eptr;           /* Record where we ended */
1159        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1160        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1161    
1162      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      /* For some reason, the macros don't work properly if an expression is
1163      md->end_match_ptr = eptr;          /* Record where we ended */      given as the argument to MRRETURN when the heap is in use. */
1164      md->end_offset_top = offset_top;   /* and how many extracts were taken */  
1165      RRETURN(MATCH_MATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1166        MRRETURN(rrc);
1167    
1168      /* Change option settings */      /* Change option settings */
1169    
# Line 777  for (;;) Line 1183  for (;;)
1183      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1184      do      do
1185        {        {
1186        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1187          match_isgroup);          RM4);
1188        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1189        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1190            mstart = md->start_match_ptr;   /* In case \K reset it */
1191            break;
1192            }
1193          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1194        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1195        }        }
1196      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1197      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1198    
1199      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1200    
# Line 798  for (;;) Line 1208  for (;;)
1208      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1209      continue;      continue;
1210    
1211      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1212        PRUNE, or COMMIT means we must assume failure without checking subsequent
1213        branches. */
1214    
1215      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1216      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1217      do      do
1218        {        {
1219        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1220          match_isgroup);          RM5);
1221        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1222        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1223            {
1224            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1225            break;
1226            }
1227          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1228        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1229        }        }
1230      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 826  for (;;) Line 1243  for (;;)
1243  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1244      if (utf8)      if (utf8)
1245        {        {
1246        c = GET(ecode,1);        i = GET(ecode, 1);
1247        for (i = 0; i < c; i++)        while (i-- > 0)
1248          {          {
1249          eptr--;          eptr--;
1250          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1251          BACKCHAR(eptr)          BACKCHAR(eptr);
1252          }          }
1253        }        }
1254      else      else
# Line 840  for (;;) Line 1257  for (;;)
1257      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1258    
1259        {        {
1260        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1261        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1262        }        }
1263    
1264      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1265    
1266        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1267      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1268      break;      break;
1269    
# Line 862  for (;;) Line 1280  for (;;)
1280        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1281        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1282        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1283        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1284        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1285        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1286        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1287        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1288        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1289        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1290        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1291        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1292        }        }
1293      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 897  for (;;) Line 1315  for (;;)
1315      case OP_RECURSE:      case OP_RECURSE:
1316        {        {
1317        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1318        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1319            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1320    
1321        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1322    
# Line 929  for (;;) Line 1342  for (;;)
1342    
1343        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1344              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1345        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1346    
1347        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1348        restore the offset and recursion data. */        restore the offset and recursion data. */
1349    
1350        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1351          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1352        do        do
1353          {          {
1354          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1355              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1356          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1357            {            {
1358            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1359            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1360            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1361              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1362            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1363            }            }
1364          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1365            {            {
1366            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1367              if (new_recursive.offset_save != stacksave)
1368                (pcre_free)(new_recursive.offset_save);
1369            RRETURN(rrc);            RRETURN(rrc);
1370            }            }
1371    
# Line 965  for (;;) Line 1380  for (;;)
1380        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1381        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1382          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1383        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1384        }        }
1385      /* Control never reaches here */      /* Control never reaches here */
1386    
# Line 974  for (;;) Line 1389  for (;;)
1389      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1390      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1391      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1392      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1393        the start-of-match value in case it was changed by \K. */
1394    
1395      case OP_ONCE:      case OP_ONCE:
1396      prev = ecode;      prev = ecode;
# Line 982  for (;;) Line 1398  for (;;)
1398    
1399      do      do
1400        {        {
1401        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1402          eptrb, match_isgroup);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1403        if (rrc == MATCH_MATCH) break;          {
1404        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1405            break;
1406            }
1407          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1408        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1409        }        }
1410      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 997  for (;;) Line 1416  for (;;)
1416      /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1417      mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1418    
1419      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1420    
1421      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1422      eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
# Line 1028  for (;;) Line 1447  for (;;)
1447    
1448      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1449        {        {
1450        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1451        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1452        ecode = prev;        ecode = prev;
1453        flags = match_isgroup;        flags = 0;
1454        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1455        }        }
1456      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1457        {        {
1458        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1459        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1460        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1461        flags = 0;        flags = 0;
# Line 1051  for (;;) Line 1470  for (;;)
1470      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1471      break;      break;
1472    
1473      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1474      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1475      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1476      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1477      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1478    
1479      case OP_BRAZERO:      case OP_BRAZERO:
1480        {        {
1481        next = ecode+1;        next = ecode+1;
1482        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1483        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1484        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1485        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1486        }        }
1487      break;      break;
1488    
1489      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1490        {        {
1491        next = ecode+1;        next = ecode+1;
1492        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1493        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1494        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1495        ecode++;        ecode++;
1496        }        }
1497      break;      break;
1498    
1499      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1500      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1501      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1502      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1503          ecode = next + 1 + LINK_SIZE;
1504          }
1505        break;
1506    
1507        /* End of a group, repeated or non-repeating. */
1508    
1509      case OP_KET:      case OP_KET:
1510      case OP_KETRMIN:      case OP_KETRMIN:
1511      case OP_KETRMAX:      case OP_KETRMAX:
1512      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
     saved_eptr = eptrb->epb_saved_eptr;  
1513    
1514      /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1515        infinite repeats of empty string matches, retrieve the subject start from
1516        the chain. Otherwise, set it NULL. */
1517    
1518        if (*prev >= OP_SBRA)
1519          {
1520          saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1521          eptrb = eptrb->epb_prev;              /* Backup to previous group */
1522          }
1523        else saved_eptr = NULL;
1524    
1525      eptrb = eptrb->epb_prev;      /* If we are at the end of an assertion group or an atomic group, stop
1526        matching and return MATCH_MATCH, but record the current high water mark for
1527        use by positive assertions. We also need to record the match start in case
1528        it was changed by \K. */
1529    
1530      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1531          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1099  for (;;) Line 1533  for (;;)
1533        {        {
1534        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1535        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1536        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1537          MRRETURN(MATCH_MATCH);
1538        }        }
1539    
1540      /* In all other cases except a conditional group we have to check the      /* For capturing groups we have to check the group number back at the start
1541      group number back at the start and if necessary complete handling an      and if necessary complete handling an extraction by setting the offsets and
1542      extraction by setting the offsets and bumping the high water mark. */      bumping the high water mark. Note that whole-pattern recursion is coded as
1543        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1544        when the OP_END is reached. Other recursion is handled here. */
1545    
1546      if (*prev != OP_COND)      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1547        {        {
1548        number = *prev - OP_BRA;        number = GET2(prev, 1+LINK_SIZE);
   
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);  
1549        offset = number << 1;        offset = number << 1;
1550    
1551  #ifdef DEBUG  #ifdef PCRE_DEBUG
1552        printf("end bracket %d", number);        printf("end bracket %d", number);
1553        printf("\n");        printf("\n");
1554  #endif  #endif
1555    
1556        /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1557        of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       into group 0, so it won't be picked up here. Instead, we catch it when  
       the OP_END is reached. */  
   
       if (number > 0)  
1558          {          {
1559          md->capture_last = number;          md->offset_vector[offset] =
1560          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            md->offset_vector[md->offset_end - number];
1561            {          md->offset_vector[offset+1] = eptr - md->start_subject;
1562            md->offset_vector[offset] =          if (offset_top <= offset) offset_top = offset + 2;
1563              md->offset_vector[md->offset_end - number];          }
1564            md->offset_vector[offset+1] = eptr - md->start_subject;  
1565            if (offset_top <= offset) offset_top = offset + 2;        /* Handle a recursively called group. Restore the offsets
1566            }        appropriately and continue from after the call. */
1567    
1568          /* Handle a recursively called group. Restore the offsets        if (md->recursive != NULL && md->recursive->group_num == number)
1569          appropriately and continue from after the call. */          {
1570            recursion_info *rec = md->recursive;
1571          if (md->recursive != NULL && md->recursive->group_num == number)          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1572            {          md->recursive = rec->prevrec;
1573            recursion_info *rec = md->recursive;          memcpy(md->offset_vector, rec->offset_save,
1574            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));            rec->saved_max * sizeof(int));
1575            md->recursive = rec->prevrec;          offset_top = rec->save_offset_top;
1576            md->start_match = rec->save_start;          ecode = rec->after_call;
1577            memcpy(md->offset_vector, rec->offset_save,          ims = original_ims;
1578              rec->saved_max * sizeof(int));          break;
           ecode = rec->after_call;  
           ims = original_ims;  
           break;  
           }  
1579          }          }
1580        }        }
1581    
1582      /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1583      the group. */      flags, in case they got changed during the group. */
1584    
1585      ims = original_ims;      ims = original_ims;
1586      DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
# Line 1175  for (;;) Line 1599  for (;;)
1599    
1600      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1601      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1602      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1603        unlimited repeat of a group that can match an empty string. */
1604    
1605        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1606    
1607      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1608        {        {
1609        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1610        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1611          if (flags != 0)    /* Could match an empty string */
1612            {
1613            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1614            RRETURN(rrc);
1615            }
1616        ecode = prev;        ecode = prev;
       flags = match_isgroup;  
1617        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1618        }        }
1619      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1620        {        {
1621        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1622        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1623        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1624        flags = 0;        flags = 0;
# Line 1198  for (;;) Line 1629  for (;;)
1629      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1630    
1631      case OP_CIRC:      case OP_CIRC:
1632      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1633      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1634        {        {
1635        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1636            (eptr == md->end_subject ||            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1637             eptr < md->start_subject + md->nllen ||          MRRETURN(MATCH_NOMATCH);
            !IS_NEWLINE(eptr - md->nllen)))  
         RRETURN(MATCH_NOMATCH);  
1638        ecode++;        ecode++;
1639        break;        break;
1640        }        }
# Line 1214  for (;;) Line 1643  for (;;)
1643      /* Start of subject assertion */      /* Start of subject assertion */
1644    
1645      case OP_SOD:      case OP_SOD:
1646      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1647      ecode++;      ecode++;
1648      break;      break;
1649    
1650      /* Start of match assertion */      /* Start of match assertion */
1651    
1652      case OP_SOM:      case OP_SOM:
1653      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1654        ecode++;
1655        break;
1656    
1657        /* Reset the start of match point */
1658    
1659        case OP_SET_SOM:
1660        mstart = eptr;
1661      ecode++;      ecode++;
1662      break;      break;
1663    
# Line 1232  for (;;) Line 1668  for (;;)
1668      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1669        {        {
1670        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1671          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1672        else        else
1673          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1674        ecode++;        ecode++;
1675        break;        break;
1676        }        }
1677      else      else
1678        {        {
1679        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1680        if (!md->endonly)        if (!md->endonly)
1681          {          {
1682          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1683              (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1684            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1685          ecode++;          ecode++;
1686          break;          break;
1687          }          }
# Line 1255  for (;;) Line 1691  for (;;)
1691      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1692    
1693      case OP_EOD:      case OP_EOD:
1694      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1695      ecode++;      ecode++;
1696      break;      break;
1697    
# Line 1263  for (;;) Line 1699  for (;;)
1699    
1700      case OP_EODN:      case OP_EODN:
1701      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1702          (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1703        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1704      ecode++;      ecode++;
1705      break;      break;
1706    
# Line 1276  for (;;) Line 1712  for (;;)
1712    
1713        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1714        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1715        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1716          partial matching. */
1717    
1718  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1719        if (utf8)        if (utf8)
1720          {          {
1721          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1722            {            {
1723            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1724            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1725              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1726            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1727            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1728            }            }
1729          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1730              {
1731              SCHECK_PARTIAL();
1732              cur_is_word = FALSE;
1733              }
1734            else
1735            {            {
1736            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1737            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1297  for (;;) Line 1740  for (;;)
1740        else        else
1741  #endif  #endif
1742    
1743        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1744    
1745          {          {
1746          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1747            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1748          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1749            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1750              }
1751            if (eptr >= md->end_subject)
1752              {
1753              SCHECK_PARTIAL();
1754              cur_is_word = FALSE;
1755              }
1756            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1757          }          }
1758    
1759        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1760    
1761        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1762             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1763          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1764        }        }
1765      break;      break;
1766    
1767      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1768    
1769      case OP_ANY:      case OP_ANY:
1770      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1771        /* Fall through */
1772    
1773        case OP_ALLANY:
1774        if (eptr++ >= md->end_subject)
1775        {        {
1776        if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))        SCHECK_PARTIAL();
1777          RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1778        }        }
1779      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1780      ecode++;      ecode++;
1781      break;      break;
1782    
# Line 1332  for (;;) Line 1784  for (;;)
1784      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1785    
1786      case OP_ANYBYTE:      case OP_ANYBYTE:
1787      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1788          {
1789          SCHECK_PARTIAL();
1790          MRRETURN(MATCH_NOMATCH);
1791          }
1792      ecode++;      ecode++;
1793      break;      break;
1794    
1795      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1796      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1797          {
1798          SCHECK_PARTIAL();
1799          MRRETURN(MATCH_NOMATCH);
1800          }
1801      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1802      if (      if (
1803  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1345  for (;;) Line 1805  for (;;)
1805  #endif  #endif
1806         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1807         )         )
1808        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1809      ecode++;      ecode++;
1810      break;      break;
1811    
1812      case OP_DIGIT:      case OP_DIGIT:
1813      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1814          {
1815          SCHECK_PARTIAL();
1816          MRRETURN(MATCH_NOMATCH);
1817          }
1818      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1819      if (      if (
1820  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1358  for (;;) Line 1822  for (;;)
1822  #endif  #endif
1823         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1824         )         )
1825        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1826      ecode++;      ecode++;
1827      break;      break;
1828    
1829      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1830      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1831          {
1832          SCHECK_PARTIAL();
1833          MRRETURN(MATCH_NOMATCH);
1834          }
1835      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1836      if (      if (
1837  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1371  for (;;) Line 1839  for (;;)
1839  #endif  #endif
1840         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1841         )         )
1842        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1843      ecode++;      ecode++;
1844      break;      break;
1845    
1846      case OP_WHITESPACE:      case OP_WHITESPACE:
1847      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1848          {
1849          SCHECK_PARTIAL();
1850          MRRETURN(MATCH_NOMATCH);
1851          }
1852      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1853      if (      if (
1854  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1384  for (;;) Line 1856  for (;;)
1856  #endif  #endif
1857         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1858         )         )
1859        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1860      ecode++;      ecode++;
1861      break;      break;
1862    
1863      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1864      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1865          {
1866          SCHECK_PARTIAL();
1867          MRRETURN(MATCH_NOMATCH);
1868          }
1869      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1870      if (      if (
1871  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1397  for (;;) Line 1873  for (;;)
1873  #endif  #endif
1874         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1875         )         )
1876        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1877      ecode++;      ecode++;
1878      break;      break;
1879    
1880      case OP_WORDCHAR:      case OP_WORDCHAR:
1881      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1882          {
1883          SCHECK_PARTIAL();
1884          MRRETURN(MATCH_NOMATCH);
1885          }
1886      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1887      if (      if (
1888  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1410  for (;;) Line 1890  for (;;)
1890  #endif  #endif
1891         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1892         )         )
1893        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1894      ecode++;      ecode++;
1895      break;      break;
1896    
1897  #ifdef SUPPORT_UCP      case OP_ANYNL:
1898      /* Check the next character by Unicode property. We will get here only      if (eptr >= md->end_subject)
     if the support is in the binary; otherwise a compile-time error occurs. */  
   
     case OP_PROP:  
     case OP_NOTPROP:  
     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);  
     GETCHARINCTEST(c, eptr);  
1899        {        {
1900        int chartype, script;        SCHECK_PARTIAL();
1901        int category = _pcre_ucp_findprop(c, &chartype, &script);        MRRETURN(MATCH_NOMATCH);
1902          }
1903        GETCHARINCTEST(c, eptr);
1904        switch(c)
1905          {
1906          default: MRRETURN(MATCH_NOMATCH);
1907          case 0x000d:
1908          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1909          break;
1910    
1911          case 0x000a:
1912          break;
1913    
1914          case 0x000b:
1915          case 0x000c:
1916          case 0x0085:
1917          case 0x2028:
1918          case 0x2029:
1919          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1920          break;
1921          }
1922        ecode++;
1923        break;
1924    
1925        case OP_NOT_HSPACE:
1926        if (eptr >= md->end_subject)
1927          {
1928          SCHECK_PARTIAL();
1929          MRRETURN(MATCH_NOMATCH);
1930          }
1931        GETCHARINCTEST(c, eptr);
1932        switch(c)
1933          {
1934          default: break;
1935          case 0x09:      /* HT */
1936          case 0x20:      /* SPACE */
1937          case 0xa0:      /* NBSP */
1938          case 0x1680:    /* OGHAM SPACE MARK */
1939          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1940          case 0x2000:    /* EN QUAD */
1941          case 0x2001:    /* EM QUAD */
1942          case 0x2002:    /* EN SPACE */
1943          case 0x2003:    /* EM SPACE */
1944          case 0x2004:    /* THREE-PER-EM SPACE */
1945          case 0x2005:    /* FOUR-PER-EM SPACE */
1946          case 0x2006:    /* SIX-PER-EM SPACE */
1947          case 0x2007:    /* FIGURE SPACE */
1948          case 0x2008:    /* PUNCTUATION SPACE */
1949          case 0x2009:    /* THIN SPACE */
1950          case 0x200A:    /* HAIR SPACE */
1951          case 0x202f:    /* NARROW NO-BREAK SPACE */
1952          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1953          case 0x3000:    /* IDEOGRAPHIC SPACE */
1954          MRRETURN(MATCH_NOMATCH);
1955          }
1956        ecode++;
1957        break;
1958    
1959        case OP_HSPACE:
1960        if (eptr >= md->end_subject)
1961          {
1962          SCHECK_PARTIAL();
1963          MRRETURN(MATCH_NOMATCH);
1964          }
1965        GETCHARINCTEST(c, eptr);
1966        switch(c)
1967          {
1968          default: MRRETURN(MATCH_NOMATCH);
1969          case 0x09:      /* HT */
1970          case 0x20:      /* SPACE */
1971          case 0xa0:      /* NBSP */
1972          case 0x1680:    /* OGHAM SPACE MARK */
1973          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1974          case 0x2000:    /* EN QUAD */
1975          case 0x2001:    /* EM QUAD */
1976          case 0x2002:    /* EN SPACE */
1977          case 0x2003:    /* EM SPACE */
1978          case 0x2004:    /* THREE-PER-EM SPACE */
1979          case 0x2005:    /* FOUR-PER-EM SPACE */
1980          case 0x2006:    /* SIX-PER-EM SPACE */
1981          case 0x2007:    /* FIGURE SPACE */
1982          case 0x2008:    /* PUNCTUATION SPACE */
1983          case 0x2009:    /* THIN SPACE */
1984          case 0x200A:    /* HAIR SPACE */
1985          case 0x202f:    /* NARROW NO-BREAK SPACE */
1986          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1987          case 0x3000:    /* IDEOGRAPHIC SPACE */
1988          break;
1989          }
1990        ecode++;
1991        break;
1992    
1993        case OP_NOT_VSPACE:
1994        if (eptr >= md->end_subject)
1995          {
1996          SCHECK_PARTIAL();
1997          MRRETURN(MATCH_NOMATCH);
1998          }
1999        GETCHARINCTEST(c, eptr);
2000        switch(c)
2001          {
2002          default: break;
2003          case 0x0a:      /* LF */
2004          case 0x0b:      /* VT */
2005          case 0x0c:      /* FF */
2006          case 0x0d:      /* CR */
2007          case 0x85:      /* NEL */
2008          case 0x2028:    /* LINE SEPARATOR */
2009          case 0x2029:    /* PARAGRAPH SEPARATOR */
2010          MRRETURN(MATCH_NOMATCH);
2011          }
2012        ecode++;
2013        break;
2014    
2015        case OP_VSPACE:
2016        if (eptr >= md->end_subject)
2017          {
2018          SCHECK_PARTIAL();
2019          MRRETURN(MATCH_NOMATCH);
2020          }
2021        GETCHARINCTEST(c, eptr);
2022        switch(c)
2023          {
2024          default: MRRETURN(MATCH_NOMATCH);
2025          case 0x0a:      /* LF */
2026          case 0x0b:      /* VT */
2027          case 0x0c:      /* FF */
2028          case 0x0d:      /* CR */
2029          case 0x85:      /* NEL */
2030          case 0x2028:    /* LINE SEPARATOR */
2031          case 0x2029:    /* PARAGRAPH SEPARATOR */
2032          break;
2033          }
2034        ecode++;
2035        break;
2036    
2037    #ifdef SUPPORT_UCP
2038        /* Check the next character by Unicode property. We will get here only
2039        if the support is in the binary; otherwise a compile-time error occurs. */
2040    
2041        case OP_PROP:
2042        case OP_NOTPROP:
2043        if (eptr >= md->end_subject)
2044          {
2045          SCHECK_PARTIAL();
2046          MRRETURN(MATCH_NOMATCH);
2047          }
2048        GETCHARINCTEST(c, eptr);
2049          {
2050          const ucd_record *prop = GET_UCD(c);
2051    
2052        switch(ecode[1])        switch(ecode[1])
2053          {          {
2054          case PT_ANY:          case PT_ANY:
2055          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2056          break;          break;
2057    
2058          case PT_LAMP:          case PT_LAMP:
2059          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2060               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2061               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2062            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2063           break;           break;
2064    
2065          case PT_GC:          case PT_GC:
2066          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2067            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2068          break;          break;
2069    
2070          case PT_PC:          case PT_PC:
2071          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2072            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2073          break;          break;
2074    
2075          case PT_SC:          case PT_SC:
2076          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2077            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2078          break;          break;
2079    
2080          default:          default:
2081          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
2082          }          }
2083    
2084        ecode += 3;        ecode += 3;
# Line 1467  for (;;) Line 2089  for (;;)
2089      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2090    
2091      case OP_EXTUNI:      case OP_EXTUNI:
2092      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2093          {
2094          SCHECK_PARTIAL();
2095          MRRETURN(MATCH_NOMATCH);
2096          }
2097      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2098        {        {
2099        int chartype, script;        int category = UCD_CATEGORY(c);
2100        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2101        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2102          {          {
2103          int len = 1;          int len = 1;
# Line 1480  for (;;) Line 2105  for (;;)
2105            {            {
2106            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2107            }            }
2108          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2109          if (category != ucp_M) break;          if (category != ucp_M) break;
2110          eptr += len;          eptr += len;
2111          }          }
# Line 1501  for (;;) Line 2126  for (;;)
2126      case OP_REF:      case OP_REF:
2127        {        {
2128        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2129        ecode += 3;                                 /* Advance past item */        ecode += 3;
2130    
2131          /* If the reference is unset, there are two possibilities:
2132    
2133          (a) In the default, Perl-compatible state, set the length to be longer
2134          than the amount of subject left; this ensures that every attempt at a
2135          match fails. We can't just fail here, because of the possibility of
2136          quantifiers with zero minima.
2137    
2138          (b) If the JavaScript compatibility flag is set, set the length to zero
2139          so that the back reference matches an empty string.
2140    
2141          Otherwise, set the length to the length of what was matched by the
2142          referenced subpattern. */
2143    
2144        /* If the reference is unset, set the length to be longer than the amount        if (offset >= offset_top || md->offset_vector[offset] < 0)
2145        of subject left; this ensures that every attempt at a match fails. We          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2146        can't just fail here, because of the possibility of quantifiers with zero        else
2147        minima. */          length = md->offset_vector[offset+1] - md->offset_vector[offset];
   
       length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
         md->end_subject - eptr + 1 :  
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2148    
2149        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2150    
# Line 1539  for (;;) Line 2173  for (;;)
2173          break;          break;
2174    
2175          default:               /* No repeat follows */          default:               /* No repeat follows */
2176          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2177              {
2178              CHECK_PARTIAL();
2179              MRRETURN(MATCH_NOMATCH);
2180              }
2181          eptr += length;          eptr += length;
2182          continue;              /* With the main loop */          continue;              /* With the main loop */
2183          }          }
# Line 1555  for (;;) Line 2193  for (;;)
2193    
2194        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2195          {          {
2196          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2197              {
2198              CHECK_PARTIAL();
2199              MRRETURN(MATCH_NOMATCH);
2200              }
2201          eptr += length;          eptr += length;
2202          }          }
2203    
# Line 1570  for (;;) Line 2212  for (;;)
2212          {          {
2213          for (fi = min;; fi++)          for (fi = min;; fi++)
2214            {            {
2215            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2216            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2217            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2218              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2219                {
2220                CHECK_PARTIAL();
2221                MRRETURN(MATCH_NOMATCH);
2222                }
2223            eptr += length;            eptr += length;
2224            }            }
2225          /* Control never gets here */          /* Control never gets here */
# Line 1586  for (;;) Line 2232  for (;;)
2232          pp = eptr;          pp = eptr;
2233          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2234            {            {
2235            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2236                {
2237                CHECK_PARTIAL();
2238                break;
2239                }
2240            eptr += length;            eptr += length;
2241            }            }
2242          while (eptr >= pp)          while (eptr >= pp)
2243            {            {
2244            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2245            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2246            eptr -= length;            eptr -= length;
2247            }            }
2248          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2249          }          }
2250        }        }
2251      /* Control never gets here */      /* Control never gets here */
2252    
   
   
2253      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2254      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2255      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1656  for (;;) Line 2304  for (;;)
2304          {          {
2305          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2306            {            {
2307            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2308                {
2309                SCHECK_PARTIAL();
2310                MRRETURN(MATCH_NOMATCH);
2311                }
2312            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2313            if (c > 255)            if (c > 255)
2314              {              {
2315              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2316              }              }
2317            else            else
2318              {              {
2319              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2320              }              }
2321            }            }
2322          }          }
# Line 1674  for (;;) Line 2326  for (;;)
2326          {          {
2327          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2328            {            {
2329            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2330                {
2331                SCHECK_PARTIAL();
2332                MRRETURN(MATCH_NOMATCH);
2333                }
2334            c = *eptr++;            c = *eptr++;
2335            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2336            }            }
2337          }          }
2338    
# Line 1696  for (;;) Line 2352  for (;;)
2352            {            {
2353            for (fi = min;; fi++)            for (fi = min;; fi++)
2354              {              {
2355              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2356              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2357              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2358                if (eptr >= md->end_subject)
2359                  {
2360                  SCHECK_PARTIAL();
2361                  MRRETURN(MATCH_NOMATCH);
2362                  }
2363              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2364              if (c > 255)              if (c > 255)
2365                {                {
2366                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2367                }                }
2368              else              else
2369                {                {
2370                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2371                }                }
2372              }              }
2373            }            }
# Line 1716  for (;;) Line 2377  for (;;)
2377            {            {
2378            for (fi = min;; fi++)            for (fi = min;; fi++)
2379              {              {
2380              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2381              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2382              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2383                if (eptr >= md->end_subject)
2384                  {
2385                  SCHECK_PARTIAL();
2386                  MRRETURN(MATCH_NOMATCH);
2387                  }
2388              c = *eptr++;              c = *eptr++;
2389              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2390              }              }
2391            }            }
2392          /* Control never gets here */          /* Control never gets here */
# Line 1739  for (;;) Line 2405  for (;;)
2405            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2406              {              {
2407              int len = 1;              int len = 1;
2408              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2409                  {
2410                  SCHECK_PARTIAL();
2411                  break;
2412                  }
2413              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2414              if (c > 255)              if (c > 255)
2415                {                {
# Line 1753  for (;;) Line 2423  for (;;)
2423              }              }
2424            for (;;)            for (;;)
2425              {              {
2426              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2427              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2428              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2429              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1765  for (;;) Line 2435  for (;;)
2435            {            {
2436            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2437              {              {
2438              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2439                  {
2440                  SCHECK_PARTIAL();
2441                  break;
2442                  }
2443              c = *eptr;              c = *eptr;
2444              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2445              eptr++;              eptr++;
2446              }              }
2447            while (eptr >= pp)            while (eptr >= pp)
2448              {              {
2449              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2450              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2451              eptr--;              eptr--;
2452              }              }
2453            }            }
2454    
2455          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2456          }          }
2457        }        }
2458      /* Control never gets here */      /* Control never gets here */
2459    
2460    
2461      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2462      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2463        mode, because Unicode properties are supported in non-UTF-8 mode. */
2464    
2465  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2466      case OP_XCLASS:      case OP_XCLASS:
# Line 1826  for (;;) Line 2501  for (;;)
2501    
2502        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2503          {          {
2504          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2505          GETCHARINC(c, eptr);            {
2506          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2507              MRRETURN(MATCH_NOMATCH);
2508              }
2509            GETCHARINCTEST(c, eptr);
2510            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2511          }          }
2512    
2513        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1843  for (;;) Line 2522  for (;;)
2522          {          {
2523          for (fi = min;; fi++)          for (fi = min;; fi++)
2524            {            {
2525            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2526            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2528            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2529            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2530                SCHECK_PARTIAL();
2531                MRRETURN(MATCH_NOMATCH);
2532                }
2533              GETCHARINCTEST(c, eptr);
2534              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2535            }            }
2536          /* Control never gets here */          /* Control never gets here */
2537          }          }
# Line 1860  for (;;) Line 2544  for (;;)
2544          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2545            {            {
2546            int len = 1;            int len = 1;
2547            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2548            GETCHARLEN(c, eptr, len);              {
2549                SCHECK_PARTIAL();
2550                break;
2551                }
2552              GETCHARLENTEST(c, eptr, len);
2553            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2554            eptr += len;            eptr += len;
2555            }            }
2556          for(;;)          for(;;)
2557            {            {
2558            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2559            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2560            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2561            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2562            }            }
2563          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2564          }          }
2565    
2566        /* Control never gets here */        /* Control never gets here */
# Line 1888  for (;;) Line 2576  for (;;)
2576        length = 1;        length = 1;
2577        ecode++;        ecode++;
2578        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2579        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2580        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2581            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2582            MRRETURN(MATCH_NOMATCH);
2583            }
2584          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2585        }        }
2586      else      else
2587  #endif  #endif
2588    
2589      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2590        {        {
2591        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2592        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2593            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2594            MRRETURN(MATCH_NOMATCH);
2595            }
2596          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2597        ecode += 2;        ecode += 2;
2598        }        }
2599      break;      break;
# Line 1912  for (;;) Line 2608  for (;;)
2608        ecode++;        ecode++;
2609        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2610    
2611        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2612            {
2613            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2614            MRRETURN(MATCH_NOMATCH);
2615            }
2616    
2617        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2618        can use the fast lookup table. */        can use the fast lookup table. */
2619    
2620        if (fc < 128)        if (fc < 128)
2621          {          {
2622          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2623          }          }
2624    
2625        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2626    
2627        else        else
2628          {          {
2629          int dc;          unsigned int dc;
2630          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2631          ecode += length;          ecode += length;
2632    
# Line 1936  for (;;) Line 2636  for (;;)
2636          if (fc != dc)          if (fc != dc)
2637            {            {
2638  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2639            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2640  #endif  #endif
2641              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2642            }            }
2643          }          }
2644        }        }
# Line 1947  for (;;) Line 2647  for (;;)
2647    
2648      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2649        {        {
2650        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2651        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2652            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2653            MRRETURN(MATCH_NOMATCH);
2654            }
2655          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2656        ecode += 2;        ecode += 2;
2657        }        }
2658      break;      break;
2659    
2660      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2661    
2662      case OP_EXACT:      case OP_EXACT:
2663      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2664      ecode += 3;      ecode += 3;
2665      goto REPEATCHAR;      goto REPEATCHAR;
2666    
2667        case OP_POSUPTO:
2668        possessive = TRUE;
2669        /* Fall through */
2670    
2671      case OP_UPTO:      case OP_UPTO:
2672      case OP_MINUPTO:      case OP_MINUPTO:
2673      min = 0;      min = 0;
# Line 1968  for (;;) Line 2676  for (;;)
2676      ecode += 3;      ecode += 3;
2677      goto REPEATCHAR;      goto REPEATCHAR;
2678    
2679        case OP_POSSTAR:
2680        possessive = TRUE;
2681        min = 0;
2682        max = INT_MAX;
2683        ecode++;
2684        goto REPEATCHAR;
2685    
2686        case OP_POSPLUS:
2687        possessive = TRUE;
2688        min = 1;
2689        max = INT_MAX;
2690        ecode++;
2691        goto REPEATCHAR;
2692    
2693        case OP_POSQUERY:
2694        possessive = TRUE;
2695        min = 0;
2696        max = 1;
2697        ecode++;
2698        goto REPEATCHAR;
2699    
2700      case OP_STAR:      case OP_STAR:
2701      case OP_MINSTAR:      case OP_MINSTAR:
2702      case OP_PLUS:      case OP_PLUS:
# Line 1976  for (;;) Line 2705  for (;;)
2705      case OP_MINQUERY:      case OP_MINQUERY:
2706      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2707      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2708    
2709      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2710      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2711      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2712    
2713      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2714    
2715      REPEATCHAR:      REPEATCHAR:
2716  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1991  for (;;) Line 2719  for (;;)
2719        length = 1;        length = 1;
2720        charptr = ecode;        charptr = ecode;
2721        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2722        ecode += length;        ecode += length;
2723    
2724        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1999  for (;;) Line 2726  for (;;)
2726    
2727        if (length > 1)        if (length > 1)
2728          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2729  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2730          int othercase;          unsigned int othercase;
2731          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2732              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase >= 0)  
2733            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2734            else oclength = 0;
2735  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2736    
2737          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2738            {            {
2739            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2740            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2741            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2742              else if (oclength > 0 &&
2743                       eptr <= md->end_subject - oclength &&
2744                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2745    #endif  /* SUPPORT_UCP */
2746            else            else
2747              {              {
2748              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2749              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2750              }              }
2751            }            }
2752    
# Line 2028  for (;;) Line 2756  for (;;)
2756            {            {
2757            for (fi = min;; fi++)            for (fi = min;; fi++)
2758              {              {
2759              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2760              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2761              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2762              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2763              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2764              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2765                else if (oclength > 0 &&
2766                         eptr <= md->end_subject - oclength &&
2767                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2768    #endif  /* SUPPORT_UCP */
2769              else              else
2770                {                {
2771                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2772                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2773                }                }
2774              }              }
2775            /* Control never gets here */            /* Control never gets here */
2776            }            }
2777          else  
2778            else  /* Maximize */
2779            {            {
2780            pp = eptr;            pp = eptr;
2781            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2782              {              {
2783              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2784              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2785              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2786                else if (oclength > 0 &&
2787                         eptr <= md->end_subject - oclength &&
2788                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2789    #endif  /* SUPPORT_UCP */
2790              else              else
2791                {                {
2792                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2793                eptr += oclength;                break;
2794                }                }
2795              }              }
2796            while (eptr >= pp)  
2797             {            if (possessive) continue;
2798             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2799             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2800             eptr -= length;              {
2801             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2802            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2803                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2804    #ifdef SUPPORT_UCP
2805                eptr--;
2806                BACKCHAR(eptr);
2807    #else   /* without SUPPORT_UCP */
2808                eptr -= length;
2809    #endif  /* SUPPORT_UCP */
2810                }
2811            }            }
2812          /* Control never gets here */          /* Control never gets here */
2813          }          }
# Line 2075  for (;;) Line 2820  for (;;)
2820  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2821    
2822      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2823        {  
2824        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2825    
2826      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2827      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2096  for (;;) Line 2839  for (;;)
2839        {        {
2840        fc = md->lcc[fc];        fc = md->lcc[fc];
2841        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2842          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2843            if (eptr >= md->end_subject)
2844              {
2845              SCHECK_PARTIAL();
2846              MRRETURN(MATCH_NOMATCH);
2847              }
2848            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2849            }
2850        if (min == max) continue;        if (min == max) continue;
2851        if (minimize)        if (minimize)
2852          {          {
2853          for (fi = min;; fi++)          for (fi = min;; fi++)
2854            {            {
2855            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2856            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2857            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2858                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2859              RRETURN(MATCH_NOMATCH);              {
2860                SCHECK_PARTIAL();
2861                MRRETURN(MATCH_NOMATCH);
2862                }
2863              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2864            }            }
2865          /* Control never gets here */          /* Control never gets here */
2866          }          }
2867        else        else  /* Maximize */
2868          {          {
2869          pp = eptr;          pp = eptr;
2870          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2871            {            {
2872            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2873                {
2874                SCHECK_PARTIAL();
2875                break;
2876                }
2877              if (fc != md->lcc[*eptr]) break;
2878            eptr++;            eptr++;
2879            }            }
2880    
2881            if (possessive) continue;
2882    
2883          while (eptr >= pp)          while (eptr >= pp)
2884            {            {
2885            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2886            eptr--;            eptr--;
2887            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2888            }            }
2889          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2890          }          }
2891        /* Control never gets here */        /* Control never gets here */
2892        }        }
# Line 2133  for (;;) Line 2895  for (;;)
2895    
2896      else      else
2897        {        {
2898        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2899            {
2900            if (eptr >= md->end_subject)
2901              {
2902              SCHECK_PARTIAL();
2903              MRRETURN(MATCH_NOMATCH);
2904              }
2905            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2906            }
2907    
2908        if (min == max) continue;        if (min == max) continue;
2909    
2910        if (minimize)        if (minimize)
2911          {          {
2912          for (fi = min;; fi++)          for (fi = min;; fi++)
2913            {            {
2914            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2915            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2916            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2917              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2918                {
2919                SCHECK_PARTIAL();
2920                MRRETURN(MATCH_NOMATCH);
2921                }
2922              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2923            }            }
2924          /* Control never gets here */          /* Control never gets here */
2925          }          }
2926        else        else  /* Maximize */
2927          {          {
2928          pp = eptr;          pp = eptr;
2929          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2930            {            {
2931            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2932                {
2933                SCHECK_PARTIAL();
2934                break;
2935                }
2936              if (fc != *eptr) break;
2937            eptr++;            eptr++;
2938            }            }
2939            if (possessive) continue;
2940    
2941          while (eptr >= pp)          while (eptr >= pp)
2942            {            {
2943            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2944            eptr--;            eptr--;
2945            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2946            }            }
2947          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2948          }          }
2949        }        }
2950      /* Control never gets here */      /* Control never gets here */
# Line 2169  for (;;) Line 2953  for (;;)
2953      checking can be multibyte. */      checking can be multibyte. */
2954    
2955      case OP_NOT:      case OP_NOT:
2956      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2957          {
2958          SCHECK_PARTIAL();
2959          MRRETURN(MATCH_NOMATCH);
2960          }
2961      ecode++;      ecode++;
2962      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2963      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2178  for (;;) Line 2966  for (;;)
2966        if (c < 256)        if (c < 256)
2967  #endif  #endif
2968        c = md->lcc[c];        c = md->lcc[c];
2969        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
2970        }        }
2971      else      else
2972        {        {
2973        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
2974        }        }
2975      break;      break;
2976    
# Line 2206  for (;;) Line 2994  for (;;)
2994      ecode += 3;      ecode += 3;
2995      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2996    
2997        case OP_NOTPOSSTAR:
2998        possessive = TRUE;
2999        min = 0;
3000        max = INT_MAX;
3001        ecode++;
3002        goto REPEATNOTCHAR;
3003    
3004        case OP_NOTPOSPLUS:
3005        possessive = TRUE;
3006        min = 1;
3007        max = INT_MAX;
3008        ecode++;
3009        goto REPEATNOTCHAR;
3010    
3011        case OP_NOTPOSQUERY:
3012        possessive = TRUE;
3013        min = 0;
3014        max = 1;
3015        ecode++;
3016        goto REPEATNOTCHAR;
3017    
3018        case OP_NOTPOSUPTO:
3019        possessive = TRUE;
3020        min = 0;
3021        max = GET2(ecode, 1);
3022        ecode += 3;
3023        goto REPEATNOTCHAR;
3024    
3025      case OP_NOTSTAR:      case OP_NOTSTAR:
3026      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3027      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2218  for (;;) Line 3034  for (;;)
3034      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3035      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3036    
3037      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3038    
3039      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3040      fc = *ecode++;      fc = *ecode++;
3041    
3042      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2245  for (;;) Line 3058  for (;;)
3058        /* UTF-8 mode */        /* UTF-8 mode */
3059        if (utf8)        if (utf8)
3060          {          {
3061          register int d;          register unsigned int d;
3062          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3063            {            {
3064              if (eptr >= md->end_subject)
3065                {
3066                SCHECK_PARTIAL();
3067                MRRETURN(MATCH_NOMATCH);
3068                }
3069            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3070            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3071            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3072            }            }
3073          }          }
3074        else        else
# Line 2259  for (;;) Line 3077  for (;;)
3077        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3078          {          {
3079          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3080            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3081              if (eptr >= md->end_subject)
3082                {
3083                SCHECK_PARTIAL();
3084                MRRETURN(MATCH_NOMATCH);
3085                }
3086              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3087              }
3088          }          }
3089    
3090        if (min == max) continue;        if (min == max) continue;
# Line 2270  for (;;) Line 3095  for (;;)
3095          /* UTF-8 mode */          /* UTF-8 mode */
3096          if (utf8)          if (utf8)
3097            {            {
3098            register int d;            register unsigned int d;
3099            for (fi = min;; fi++)            for (fi = min;; fi++)
3100              {              {
3101              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3102              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3103                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3104                if (eptr >= md->end_subject)
3105                  {
3106                  SCHECK_PARTIAL();
3107                  MRRETURN(MATCH_NOMATCH);
3108                  }
3109              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3110              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3111              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3112              }              }
3113            }            }
3114          else          else
# Line 2287  for (;;) Line 3117  for (;;)
3117            {            {
3118            for (fi = min;; fi++)            for (fi = min;; fi++)
3119              {              {
3120              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3121              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3122              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3123                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3124                  {
3125                  SCHECK_PARTIAL();
3126                  MRRETURN(MATCH_NOMATCH);
3127                  }
3128                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3129              }              }
3130            }            }
3131          /* Control never gets here */          /* Control never gets here */
# Line 2306  for (;;) Line 3141  for (;;)
3141          /* UTF-8 mode */          /* UTF-8 mode */
3142          if (utf8)          if (utf8)
3143            {            {
3144            register int d;            register unsigned int d;
3145            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3146              {              {
3147              int len = 1;              int len = 1;
3148              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3149                  {
3150                  SCHECK_PARTIAL();
3151                  break;
3152                  }
3153              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3154              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3155              if (fc == d) break;              if (fc == d) break;
3156              eptr += len;              eptr += len;
3157              }              }
3158            for(;;)          if (possessive) continue;
3159            for(;;)
3160              {              {
3161              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3162              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3163              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3164              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2330  for (;;) Line 3170  for (;;)
3170            {            {
3171            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3172              {              {
3173              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3174                  {
3175                  SCHECK_PARTIAL();
3176                  break;
3177                  }
3178                if (fc == md->lcc[*eptr]) break;
3179              eptr++;              eptr++;
3180              }              }
3181              if (possessive) continue;
3182            while (eptr >= pp)            while (eptr >= pp)
3183              {              {
3184              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3185              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3186              eptr--;              eptr--;
3187              }              }
3188            }            }
3189    
3190          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3191          }          }
3192        /* Control never gets here */        /* Control never gets here */
3193        }        }
# Line 2354  for (;;) Line 3200  for (;;)
3200        /* UTF-8 mode */        /* UTF-8 mode */
3201        if (utf8)        if (utf8)
3202          {          {
3203          register int d;          register unsigned int d;
3204          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3205            {            {
3206              if (eptr >= md->end_subject)
3207                {
3208                SCHECK_PARTIAL();
3209                MRRETURN(MATCH_NOMATCH);
3210                }
3211            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3212            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3213            }            }
3214          }          }
3215        else        else
# Line 2366  for (;;) Line 3217  for (;;)
3217        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3218          {          {
3219          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3220            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3221              if (eptr >= md->end_subject)
3222                {
3223                SCHECK_PARTIAL();
3224                MRRETURN(MATCH_NOMATCH);
3225                }
3226              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3227              }
3228          }          }
3229    
3230        if (min == max) continue;        if (min == max) continue;
# Line 2377  for (;;) Line 3235  for (;;)
3235          /* UTF-8 mode */          /* UTF-8 mode */
3236          if (utf8)          if (utf8)
3237            {            {
3238            register int d;            register unsigned int d;
3239            for (fi = min;; fi++)            for (fi = min;; fi++)
3240              {              {
3241              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3242              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3243                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3244                if (eptr >= md->end_subject)
3245                  {
3246                  SCHECK_PARTIAL();
3247                  MRRETURN(MATCH_NOMATCH);
3248                  }
3249              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3250              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3251              }              }
3252            }            }
3253          else          else
# Line 2393  for (;;) Line 3256  for (;;)
3256            {            {
3257            for (fi = min;; fi++)            for (fi = min;; fi++)
3258              {              {
3259              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3260              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3261              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3262                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3263                  {
3264                  SCHECK_PARTIAL();
3265                  MRRETURN(MATCH_NOMATCH);
3266                  }
3267                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3268              }              }
3269            }            }
3270          /* Control never gets here */          /* Control never gets here */
# Line 2412  for (;;) Line 3280  for (;;)
3280          /* UTF-8 mode */          /* UTF-8 mode */
3281          if (utf8)          if (utf8)
3282            {            {
3283            register int d;            register unsigned int d;
3284            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3285              {              {
3286              int len = 1;              int len = 1;
3287              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3288                  {
3289                  SCHECK_PARTIAL();
3290                  break;
3291                  }
3292              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3293              if (fc == d) break;              if (fc == d) break;
3294              eptr += len;              eptr += len;
3295              }              }
3296              if (possessive) continue;
3297            for(;;)            for(;;)
3298              {              {
3299              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3300              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3302              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2435  for (;;) Line 3308  for (;;)
3308            {            {
3309            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3310              {              {
3311              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3312                  {
3313                  SCHECK_PARTIAL();
3314                  break;
3315                  }
3316                if (fc == *eptr) break;
3317              eptr++;              eptr++;
3318              }              }
3319              if (possessive) continue;
3320            while (eptr >= pp)            while (eptr >= pp)
3321              {              {
3322              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3323              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3324              eptr--;              eptr--;
3325              }              }
3326            }            }
3327    
3328          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3329          }          }
3330        }        }
3331      /* Control never gets here */      /* Control never gets here */
# Line 2469  for (;;) Line 3348  for (;;)
3348      ecode += 3;      ecode += 3;
3349      goto REPEATTYPE;      goto REPEATTYPE;
3350    
3351        case OP_TYPEPOSSTAR:
3352        possessive = TRUE;
3353        min = 0;
3354        max = INT_MAX;
3355        ecode++;
3356        goto REPEATTYPE;
3357    
3358        case OP_TYPEPOSPLUS:
3359        possessive = TRUE;
3360        min = 1;
3361        max = INT_MAX;
3362        ecode++;
3363        goto REPEATTYPE;
3364    
3365        case OP_TYPEPOSQUERY:
3366        possessive = TRUE;
3367        min = 0;
3368        max = 1;
3369        ecode++;
3370        goto REPEATTYPE;
3371    
3372        case OP_TYPEPOSUPTO:
3373        possessive = TRUE;
3374        min = 0;
3375        max = GET2(ecode, 1);
3376        ecode += 3;
3377        goto REPEATTYPE;
3378    
3379      case OP_TYPESTAR:      case OP_TYPESTAR:
3380      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3381      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2500  for (;;) Line 3407  for (;;)
3407    
3408      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3409      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3410      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3411      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3412      and single-bytes. */      and single-bytes. */
3413    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3414      if (min > 0)      if (min > 0)
3415        {        {
3416  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2515  for (;;) Line 3419  for (;;)
3419          switch(prop_type)          switch(prop_type)
3420            {            {
3421            case PT_ANY:            case PT_ANY:
3422            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3423            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3424              {              {
3425              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3426              GETCHARINC(c, eptr);                {
3427                  SCHECK_PARTIAL();
3428                  MRRETURN(MATCH_NOMATCH);
3429                  }
3430                GETCHARINCTEST(c, eptr);
3431              }              }
3432            break;            break;
3433    
3434            case PT_LAMP:            case PT_LAMP:
3435            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3436              {              {
3437              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3438              GETCHARINC(c, eptr);                {
3439              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3440                  MRRETURN(MATCH_NOMATCH);
3441                  }
3442                GETCHARINCTEST(c, eptr);
3443                prop_chartype = UCD_CHARTYPE(c);
3444              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3445                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3446                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3447                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3448              }              }
3449            break;            break;
3450    
3451            case PT_GC:            case PT_GC:
3452            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3453              {              {
3454              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3455              GETCHARINC(c, eptr);                {
3456              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3457                  MRRETURN(MATCH_NOMATCH);
3458                  }
3459                GETCHARINCTEST(c, eptr);
3460                prop_category = UCD_CATEGORY(c);
3461              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3462                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3463              }              }
3464            break;            break;
3465    
3466            case PT_PC:            case PT_PC:
3467            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3468              {              {
3469              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3470              GETCHARINC(c, eptr);                {
3471              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3472                  MRRETURN(MATCH_NOMATCH);
3473                  }
3474                GETCHARINCTEST(c, eptr);
3475                prop_chartype = UCD_CHARTYPE(c);
3476              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3477                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3478              }              }
3479            break;            break;
3480    
3481            case PT_SC:            case PT_SC:
3482            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3483              {              {
3484              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3485              GETCHARINC(c, eptr);                {
3486              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3487                  MRRETURN(MATCH_NOMATCH);
3488                  }
3489                GETCHARINCTEST(c, eptr);
3490                prop_script = UCD_SCRIPT(c);
3491              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3492                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3493              }              }
3494            break;            break;
3495    
3496            default:            default:
3497            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3498            }            }
3499          }          }
3500    
# Line 2582  for (;;) Line 3505  for (;;)
3505          {          {
3506          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3507            {            {
3508              if (eptr >= md->end_subject)
3509                {
3510                SCHECK_PARTIAL();
3511                MRRETURN(MATCH_NOMATCH);
3512                }
3513            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3514            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3515            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3516            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3517              {              {
3518              int len = 1;              int len = 1;
3519              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3520                {                else { GETCHARLEN(c, eptr, len); }
3521                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3522              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3523              eptr += len;              eptr += len;
3524              }              }
# Line 2610  for (;;) Line 3536  for (;;)
3536          case OP_ANY:          case OP_ANY:
3537          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3538            {            {
3539            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3540                 ((ims & PCRE_DOTALL) == 0 &&              {
3541                   eptr <= md->end_subject - md->nllen &&              SCHECK_PARTIAL();
3542                   IS_NEWLINE(eptr)))              MRRETURN(MATCH_NOMATCH);
3543              RRETURN(MATCH_NOMATCH);              }
3544              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3545              eptr++;
3546              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3547              }
3548            break;
3549    
3550            case OP_ALLANY:
3551            for (i = 1; i <= min; i++)
3552              {
3553              if (eptr >= md->end_subject)
3554                {
3555                SCHECK_PARTIAL();
3556                MRRETURN(MATCH_NOMATCH);
3557                }
3558            eptr++;            eptr++;
3559            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3560            }            }
3561          break;          break;
3562    
3563          case OP_ANYBYTE:          case OP_ANYBYTE:
3564            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3565          eptr += min;          eptr += min;
3566          break;          break;
3567    
3568            case OP_ANYNL:
3569            for (i = 1; i <= min; i++)
3570              {
3571              if (eptr >= md->end_subject)
3572                {
3573                SCHECK_PARTIAL();
3574                MRRETURN(MATCH_NOMATCH);
3575                }
3576              GETCHARINC(c, eptr);
3577              switch(c)
3578                {
3579                default: MRRETURN(MATCH_NOMATCH);
3580                case 0x000d:
3581                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3582                break;
3583    
3584                case 0x000a:
3585                break;
3586    
3587                case 0x000b:
3588                case 0x000c:
3589                case 0x0085:
3590                case 0x2028:
3591                case 0x2029:
3592                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3593                break;
3594                }
3595              }
3596            break;
3597    
3598            case OP_NOT_HSPACE:
3599            for (i = 1; i <= min; i++)
3600              {
3601              if (eptr >= md->end_subject)
3602                {
3603                SCHECK_PARTIAL();
3604                MRRETURN(MATCH_NOMATCH);
3605                }
3606              GETCHARINC(c, eptr);
3607              switch(c)
3608                {
3609                default: break;
3610                case 0x09:      /* HT */
3611                case 0x20:      /* SPACE */
3612                case 0xa0:      /* NBSP */
3613                case 0x1680:    /* OGHAM SPACE MARK */
3614                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3615                case 0x2000:    /* EN QUAD */
3616                case 0x2001:    /* EM QUAD */
3617                case 0x2002:    /* EN SPACE */
3618                case 0x2003:    /* EM SPACE */
3619                case 0x2004:    /* THREE-PER-EM SPACE */
3620                case 0x2005:    /* FOUR-PER-EM SPACE */
3621                case 0x2006:    /* SIX-PER-EM SPACE */
3622                case 0x2007:    /* FIGURE SPACE */
3623                case 0x2008:    /* PUNCTUATION SPACE */
3624                case 0x2009:    /* THIN SPACE */
3625                case 0x200A:    /* HAIR SPACE */
3626                case 0x202f:    /* NARROW NO-BREAK SPACE */
3627                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3628                case 0x3000:    /* IDEOGRAPHIC SPACE */
3629                MRRETURN(MATCH_NOMATCH);
3630                }
3631              }
3632            break;
3633    
3634            case OP_HSPACE:
3635            for (i = 1; i <= min; i++)
3636              {
3637              if (eptr >= md->end_subject)
3638                {
3639                SCHECK_PARTIAL();
3640                MRRETURN(MATCH_NOMATCH);
3641                }
3642              GETCHARINC(c, eptr);
3643              switch(c)
3644                {
3645                default: MRRETURN(MATCH_NOMATCH);
3646                case 0x09:      /* HT */
3647                case 0x20:      /* SPACE */
3648                case 0xa0:      /* NBSP */
3649                case 0x1680:    /* OGHAM SPACE MARK */
3650                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3651                case 0x2000:    /* EN QUAD */
3652                case 0x2001:    /* EM QUAD */
3653                case 0x2002:    /* EN SPACE */
3654                case 0x2003:    /* EM SPACE */
3655                case 0x2004:    /* THREE-PER-EM SPACE */
3656                case 0x2005:    /* FOUR-PER-EM SPACE */
3657                case 0x2006:    /* SIX-PER-EM SPACE */
3658                case 0x2007:    /* FIGURE SPACE */
3659                case 0x2008:    /* PUNCTUATION SPACE */
3660                case 0x2009:    /* THIN SPACE */
3661                case 0x200A:    /* HAIR SPACE */
3662                case 0x202f:    /* NARROW NO-BREAK SPACE */
3663                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3664                case 0x3000:    /* IDEOGRAPHIC SPACE */
3665                break;
3666                }
3667              }
3668            break;
3669    
3670            case OP_NOT_VSPACE:
3671            for (i = 1; i <= min; i++)
3672              {
3673              if (eptr >= md->end_subject)
3674                {
3675                SCHECK_PARTIAL();
3676                MRRETURN(MATCH_NOMATCH);
3677                }
3678              GETCHARINC(c, eptr);
3679              switch(c)
3680                {
3681                default: break;
3682                case 0x0a:      /* LF */
3683                case 0x0b:      /* VT */
3684                case 0x0c:      /* FF */
3685                case 0x0d:      /* CR */
3686                case 0x85:      /* NEL */
3687                case 0x2028:    /* LINE SEPARATOR */
3688                case 0x2029:    /* PARAGRAPH SEPARATOR */
3689                MRRETURN(MATCH_NOMATCH);
3690                }
3691              }
3692            break;
3693    
3694            case OP_VSPACE:
3695            for (i = 1; i <= min; i++)
3696              {
3697              if (eptr >= md->end_subject)
3698                {
3699                SCHECK_PARTIAL();
3700                MRRETURN(MATCH_NOMATCH);
3701                }
3702              GETCHARINC(c, eptr);
3703              switch(c)
3704                {
3705                default: MRRETURN(MATCH_NOMATCH);
3706                case 0x0a:      /* LF */
3707                case 0x0b:      /* VT */
3708                case 0x0c:      /* FF */
3709                case 0x0d:      /* CR */
3710                case 0x85:      /* NEL */
3711                case 0x2028:    /* LINE SEPARATOR */
3712                case 0x2029:    /* PARAGRAPH SEPARATOR */
3713                break;
3714                }
3715              }
3716            break;
3717    
3718          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3719          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3720            {            {
3721            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3722                {
3723                SCHECK_PARTIAL();
3724                MRRETURN(MATCH_NOMATCH);
3725                }
3726            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3727            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3728              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3729            }            }
3730          break;          break;
3731    
3732          case OP_DIGIT:          case OP_DIGIT:
3733          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3734            {            {
3735            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3736               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3737              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3738                MRRETURN(MATCH_NOMATCH);
3739                }
3740              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3741                MRRETURN(MATCH_NOMATCH);
3742            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3743            }            }
3744          break;          break;
# Line 2647  for (;;) Line 3746  for (;;)
3746          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3747          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3748            {            {
3749            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3750               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3751              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3752            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3753                }
3754              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3755                MRRETURN(MATCH_NOMATCH);
3756              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3757            }            }
3758          break;          break;
3759    
3760          case OP_WHITESPACE:          case OP_WHITESPACE:
3761          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3762            {            {
3763            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3764               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3765              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3766                MRRETURN(MATCH_NOMATCH);
3767                }
3768              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3769                MRRETURN(MATCH_NOMATCH);
3770            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3771            }            }
3772          break;          break;
# Line 2667  for (;;) Line 3774  for (;;)
3774          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3775          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3776            {            {
3777            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3778               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3779              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3780            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3781                }
3782              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3783                MRRETURN(MATCH_NOMATCH);
3784              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3785            }            }
3786          break;          break;
3787    
3788          case OP_WORDCHAR:          case OP_WORDCHAR:
3789          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3790            {            {
3791            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3792               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3793              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3794                MRRETURN(MATCH_NOMATCH);
3795                }
3796              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3797                MRRETURN(MATCH_NOMATCH);
3798            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3799            }            }
3800          break;          break;
# Line 2697  for (;;) Line 3812  for (;;)
3812        switch(ctype)        switch(ctype)
3813          {          {
3814          case OP_ANY:          case OP_ANY:
3815          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3816            {            {
3817            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3818              {              {
3819              if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))              SCHECK_PARTIAL();
3820                RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
             eptr++;  
3821              }              }
3822              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3823              eptr++;
3824            }            }
3825          else eptr += min;          break;
3826    
3827            case OP_ALLANY:
3828            if (eptr > md->end_subject - min)
3829              {
3830              SCHECK_PARTIAL();
3831              MRRETURN(MATCH_NOMATCH);
3832              }
3833            eptr += min;
3834          break;          break;
3835    
3836          case OP_ANYBYTE:          case OP_ANYBYTE:
3837            if (eptr > md->end_subject - min)
3838              {
3839              SCHECK_PARTIAL();
3840              MRRETURN(MATCH_NOMATCH);
3841              }
3842          eptr += min;          eptr += min;
3843          break;          break;
3844    
3845            case OP_ANYNL:
3846            for (i = 1; i <= min; i++)
3847              {
3848              if (eptr >= md->end_subject)
3849                {
3850                SCHECK_PARTIAL();
3851                MRRETURN(MATCH_NOMATCH);
3852                }
3853              switch(*eptr++)
3854                {