/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 144 by ph10, Mon Apr 2 13:32:07 2007 UTC revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 152  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 188  calls by keeping local variables that ne Line 220  calls by keeping local variables that ne
220  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
221  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
222  always used to.  always used to.
223    
224    The original heap-recursive code used longjmp(). However, it seems that this
225    can be very slow on some operating systems. Following a suggestion from Stan
226    Switzer, the use of longjmp() has been abolished, at the cost of having to
227    provide a unique number for each call to RMATCH. There is no way of generating
228    a sequence of numbers at compile time in C. I have given them names, to make
229    them stand out more clearly.
230    
231    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233    tests. Furthermore, not using longjmp() means that local dynamic variables
234    don't have indeterminate values; this has meant that the frame size can be
235    reduced because the result can be "passed back" by straight setting of the
236    variable instead of being passed in the frame.
237  ****************************************************************************  ****************************************************************************
238  ***************************************************************************/  ***************************************************************************/
239    
240    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241    below must be updated in sync.  */
242    
243    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248           RM51,  RM52, RM53, RM54 };
249    
250  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
251  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
252    actuall used in this definition. */
253    
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256    
257  #ifdef DEBUG  #ifdef DEBUG
258  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259    { \    { \
260    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
261    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
263    }    }
264  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 210  versions and production versions. */ Line 267  versions and production versions. */
267    return ra; \    return ra; \
268    }    }
269  #else  #else
270  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
273  #endif  #endif
274    
275  #else  #else
276    
277    
278  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
279  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280  match(), which never changes. */  argument of match(), which never changes. */
281    
282  #define REGISTER  #define REGISTER
283    
284  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285    {\    {\
286    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
288      {\    newframe->Xeptr = ra;\
289      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
290      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
291      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
292      newframe->Xims = re;\    newframe->Xims = re;\
293      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
294      newframe->Xflags = rg;\    newframe->Xflags = rg;\
295      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
296      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
297      frame = newframe;\    frame = newframe;\
298      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
299      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
300      }\    L_##rw:\
301    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
302    }    }
303    
304  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 256  match(), which never changes. */ Line 308  match(), which never changes. */
308    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
309    if (frame != NULL)\    if (frame != NULL)\
310      {\      {\
311      frame->Xresult = ra;\      rrc = ra;\
312      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
313      }\      }\
314    return ra;\    return ra;\
315    }    }
# Line 271  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327      USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 281  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
# Line 308  typedef struct heapframe { Line 362  typedef struct heapframe {
362    uschar Xocchars[8];    uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    unsigned int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
# Line 323  typedef struct heapframe { Line 378  typedef struct heapframe {
378    
379    eptrblock Xnewptrb;    eptrblock Xnewptrb;
380    
381    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
382    
383    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
384    
385  } heapframe;  } heapframe;
386    
# Line 344  typedef struct heapframe { Line 398  typedef struct heapframe {
398    
399  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
400  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
401  same response.  same response. */
402    
403    /* These macros pack up tests that are used for partial matching, and which
404    appears several times in the code. We set the "hit end" flag if the pointer is
405    at the end of the subject and also past the start of the subject (i.e.
406    something has been matched). For hard partial matching, we then return
407    immediately. The second one is used when we already know we are past the end of
408    the subject. */
409    
410    #define CHECK_PARTIAL()\
411      if (md->partial && eptr >= md->end_subject && eptr > mstart)\
412        {\
413        md->hitend = TRUE;\
414        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415        }
416    
417  Performance note: It might be tempting to extract commonly used fields from the  #define SCHECK_PARTIAL()\
418  md structure (e.g. utf8, end_subject) into individual variables to improve    if (md->partial && eptr > mstart)\
419        {\
420        md->hitend = TRUE;\
421        md->hitend = TRUE;\
422        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
423        }
424    
425    
426    /* Performance note: It might be tempting to extract commonly used fields from
427    the md structure (e.g. utf8, end_subject) into individual variables to improve
428  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
429  made performance worse.  made performance worse.
430    
431  Arguments:  Arguments:
432     eptr        pointer to current character in subject     eptr        pointer to current character in subject
433     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
434       mstart      pointer to the current match start position (can be modified
435                     by encountering \K)
436     offset_top  current top pointer     offset_top  current top pointer
437     md          pointer to "static" info for the match     md          pointer to "static" info for the match
438     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 363  Arguments: Line 442  Arguments:
442                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
443                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
444                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
445     rdepth      the recursion depth     rdepth      the recursion depth
446    
447  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 373  Returns:       MATCH_MATCH if matched Line 451  Returns:       MATCH_MATCH if matched
451  */  */
452    
453  static int  static int
454  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
455    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
456    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
457  {  {
# Line 387  register unsigned int c;   /* Character Line 465  register unsigned int c;   /* Character
465  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
466    
467  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
468    int condcode;
469    
470  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
471  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 401  frame->Xprevframe = NULL;            /* Line 480  frame->Xprevframe = NULL;            /*
480    
481  frame->Xeptr = eptr;  frame->Xeptr = eptr;
482  frame->Xecode = ecode;  frame->Xecode = ecode;
483    frame->Xmstart = mstart;
484  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
485  frame->Xims = ims;  frame->Xims = ims;
486  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 415  HEAP_RECURSE: Line 495  HEAP_RECURSE:
495    
496  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
497  #define ecode              frame->Xecode  #define ecode              frame->Xecode
498    #define mstart             frame->Xmstart
499  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
500  #define ims                frame->Xims  #define ims                frame->Xims
501  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 427  HEAP_RECURSE: Line 508  HEAP_RECURSE:
508  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
509  #endif  #endif
510  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
511    #define codelink           frame->Xcodelink
512  #define data               frame->Xdata  #define data               frame->Xdata
513  #define next               frame->Xnext  #define next               frame->Xnext
514  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 507  int oclength; Line 589  int oclength;
589  uschar occhars[8];  uschar occhars[8];
590  #endif  #endif
591    
592    int codelink;
593  int ctype;  int ctype;
594  int length;  int length;
595  int max;  int max;
# Line 545  defined). However, RMATCH isn't like a f Line 628  defined). However, RMATCH isn't like a f
628  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
629  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
630    
631    #ifdef SUPPORT_UTF8
632    utf8 = md->utf8;       /* Local copy of the flag */
633    #else
634    utf8 = FALSE;
635    #endif
636    
637  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
638  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
639    
# Line 553  if (rdepth >= md->match_limit_recursion) Line 642  if (rdepth >= md->match_limit_recursion)
642    
643  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
644    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
645  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
646  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
647  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
648  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
649  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
650  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
651  already used. */  block that is used is on the stack, so a new one may be required for each
652    match(). */
653    
654  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
655    {    {
656    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
657    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
658      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
659    }    }
660    
661  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 589  for (;;) Line 666  for (;;)
666    op = *ecode;    op = *ecode;
667    
668    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
669    matching at least one subject character. */    matching at least one subject character. This code is now wrapped in a macro
670      because it appears several times below. */
671    
672    if (md->partial &&    CHECK_PARTIAL();
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
673    
674    switch(op)    switch(op)
675      {      {
676        case OP_FAIL:
677        RRETURN(MATCH_NOMATCH);
678    
679        case OP_PRUNE:
680        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
681          ims, eptrb, flags, RM51);
682        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
683        RRETURN(MATCH_PRUNE);
684    
685        case OP_COMMIT:
686        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
687          ims, eptrb, flags, RM52);
688        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
689        RRETURN(MATCH_COMMIT);
690    
691        case OP_SKIP:
692        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
693          ims, eptrb, flags, RM53);
694        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
695        md->start_match_ptr = eptr;   /* Pass back current position */
696        RRETURN(MATCH_SKIP);
697    
698        case OP_THEN:
699        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
700          ims, eptrb, flags, RM54);
701        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
702        RRETURN(MATCH_THEN);
703    
704      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
705      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
706      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 637  for (;;) Line 740  for (;;)
740        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
741        do        do
742          {          {
743          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744            ims, eptrb, flags);            ims, eptrb, flags, RM1);
745          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
746          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
747          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
748          }          }
# Line 654  for (;;) Line 757  for (;;)
757        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
758        }        }
759    
760      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
761      bracket. */      as a non-capturing bracket. */
762    
763        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
764        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
765    
766      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
767    
768        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
769        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
770    
771      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
772      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
773      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
774      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
775        is set.*/
776    
777      case OP_BRA:      case OP_BRA:
778      case OP_SBRA:      case OP_SBRA:
# Line 670  for (;;) Line 780  for (;;)
780      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
781      for (;;)      for (;;)
782        {        {
783        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
784          {          {
785          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
786          flags |= match_tail_recursed;            {
787          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
788          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
789              goto TAIL_RECURSE;
790              }
791    
792            /* Possibly empty group; can't use tail recursion. */
793    
794            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
795              eptrb, flags, RM48);
796            RRETURN(rrc);
797          }          }
798    
799        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
800        otherwise return. */        otherwise return. */
801    
802        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
803          eptrb, flags);          eptrb, flags, RM2);
804        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
805        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
806        }        }
807      /* Control never reaches here. */      /* Control never reaches here. */
# Line 696  for (;;) Line 814  for (;;)
814    
815      case OP_COND:      case OP_COND:
816      case OP_SCOND:      case OP_SCOND:
817      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
818    
819        /* Because of the way auto-callout works during compile, a callout item is
820        inserted between OP_COND and an assertion condition. */
821    
822        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
823          {
824          if (pcre_callout != NULL)
825            {
826            pcre_callout_block cb;
827            cb.version          = 1;   /* Version 1 of the callout block */
828            cb.callout_number   = ecode[LINK_SIZE+2];
829            cb.offset_vector    = md->offset_vector;
830            cb.subject          = (PCRE_SPTR)md->start_subject;
831            cb.subject_length   = md->end_subject - md->start_subject;
832            cb.start_match      = mstart - md->start_subject;
833            cb.current_position = eptr - md->start_subject;
834            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
835            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
836            cb.capture_top      = offset_top/2;
837            cb.capture_last     = md->capture_last;
838            cb.callout_data     = md->callout_data;
839            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
840            if (rrc < 0) RRETURN(rrc);
841            }
842          ecode += _pcre_OP_lengths[OP_CALLOUT];
843          }
844    
845        condcode = ecode[LINK_SIZE+1];
846    
847        /* Now see what the actual condition is */
848    
849        if (condcode == OP_RREF)         /* Recursion test */
850        {        {
851        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
852        condition = md->recursive != NULL &&        condition = md->recursive != NULL &&
# Line 704  for (;;) Line 854  for (;;)
854        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
855        }        }
856    
857      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF)    /* Group used test */
858        {        {
859        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
860        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
861        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
862        }        }
863    
864      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
865        {        {
866        condition = FALSE;        condition = FALSE;
867        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 723  for (;;) Line 873  for (;;)
873    
874      else      else
875        {        {
876        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
877            match_condassert);            match_condassert, RM3);
878        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
879          {          {
880          condition = TRUE;          condition = TRUE;
881          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
882          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
883          }          }
884        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
885          {          {
886          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
887          }          }
888        else        else
889          {          {
890          condition = FALSE;          condition = FALSE;
891          ecode += GET(ecode, 1);          ecode += codelink;
892          }          }
893        }        }
894    
895      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
896      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
897      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
898        group. If the second alternative doesn't exist, we can just plough on. */
899    
900      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
901        {        {
902        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
903        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
904        goto TAIL_RECURSE;          {
905            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
906            RRETURN(rrc);
907            }
908          else                       /* Group must match something */
909            {
910            flags = 0;
911            goto TAIL_RECURSE;
912            }
913        }        }
914      else      else                         /* Condition false & no alternative */
915        {        {
916        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
917        }        }
918      break;      break;
919    
920    
921      /* End of the pattern. If we are in a top-level recursion, we should      /* End of the pattern, either real or forced. If we are in a top-level
922      restore the offsets appropriately and continue from after the call. */      recursion, we should restore the offsets appropriately and continue from
923        after the call. */
924    
925        case OP_ACCEPT:
926      case OP_END:      case OP_END:
927      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
928        {        {
# Line 770  for (;;) Line 931  for (;;)
931        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
932        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
933          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
934        md->start_match = rec->save_start;        mstart = rec->save_start;
935        ims = original_ims;        ims = original_ims;
936        ecode = rec->after_call;        ecode = rec->after_call;
937        break;        break;
# Line 779  for (;;) Line 940  for (;;)
940      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
941      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
942    
943      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
944      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
945      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
946        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
947      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
948    
949      /* Change option settings */      /* Change option settings */
# Line 802  for (;;) Line 964  for (;;)
964      case OP_ASSERTBACK:      case OP_ASSERTBACK:
965      do      do
966        {        {
967        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
968            RM4);
969        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
970        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
971        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
972        }        }
973      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 828  for (;;) Line 991  for (;;)
991      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
992      do      do
993        {        {
994        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
995            RM5);
996        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
997        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
998        ecode += GET(ecode,1);        ecode += GET(ecode,1);
999        }        }
1000      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 854  for (;;) Line 1018  for (;;)
1018          {          {
1019          eptr--;          eptr--;
1020          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1021          BACKCHAR(eptr)          BACKCHAR(eptr);
1022          }          }
1023        }        }
1024      else      else
# Line 885  for (;;) Line 1049  for (;;)
1049        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1050        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1051        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1052        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1053        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1054        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1055        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 947  for (;;) Line 1111  for (;;)
1111    
1112        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1113              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1114        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
1115        md->start_match = eptr;        mstart = eptr;
1116    
1117        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1118        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 957  for (;;) Line 1121  for (;;)
1121        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1122        do        do
1123          {          {
1124          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1125            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1126          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1127            {            {
1128            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 967  for (;;) Line 1131  for (;;)
1131              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1132            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1133            }            }
1134          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1135            {            {
1136            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1137              if (new_recursive.offset_save != stacksave)
1138                (pcre_free)(new_recursive.offset_save);
1139            RRETURN(rrc);            RRETURN(rrc);
1140            }            }
1141    
# Line 1001  for (;;) Line 1167  for (;;)
1167    
1168      do      do
1169        {        {
1170        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, 0);  
1171        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1172        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1173        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1174        }        }
1175      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1047  for (;;) Line 1212  for (;;)
1212    
1213      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1214        {        {
1215        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1216        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1217        ecode = prev;        ecode = prev;
1218        flags = match_tail_recursed;        flags = 0;
1219        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1220        }        }
1221      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1222        {        {
1223        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1224        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1225        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1226        flags = match_tail_recursed;        flags = 0;
1227        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1228        }        }
1229      /* Control never gets here */      /* Control never gets here */
# Line 1070  for (;;) Line 1235  for (;;)
1235      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1236      break;      break;
1237    
1238      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1239      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1240      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1241      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1242      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1243    
1244      case OP_BRAZERO:      case OP_BRAZERO:
1245        {        {
1246        next = ecode+1;        next = ecode+1;
1247        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1248        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1249        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1250        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1090  for (;;) Line 1255  for (;;)
1255        {        {
1256        next = ecode+1;        next = ecode+1;
1257        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1258        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1259        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1260        ecode++;        ecode++;
1261        }        }
1262      break;      break;
1263    
1264        case OP_SKIPZERO:
1265          {
1266          next = ecode+1;
1267          do next += GET(next,1); while (*next == OP_ALT);
1268          ecode = next + 1 + LINK_SIZE;
1269          }
1270        break;
1271    
1272      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1273    
1274      case OP_KET:      case OP_KET:
# Line 1160  for (;;) Line 1333  for (;;)
1333          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1334          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1335          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
1336          md->start_match = rec->save_start;          mstart = rec->save_start;
1337          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1338            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1339          ecode = rec->after_call;          ecode = rec->after_call;
# Line 1189  for (;;) Line 1362  for (;;)
1362    
1363      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1364      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1365      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1366        unlimited repeat of a group that can match an empty string. */
1367    
1368      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1369    
1370      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1371        {        {
1372        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1373        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1374          if (flags != 0)    /* Could match an empty string */
1375            {
1376            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1377            RRETURN(rrc);
1378            }
1379        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1380        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1381        }        }
1382      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1383        {        {
1384        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1385        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1386        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1387        flags = match_tail_recursed;        flags = 0;
1388        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1389        }        }
1390      /* Control never gets here */      /* Control never gets here */
# Line 1239  for (;;) Line 1417  for (;;)
1417      ecode++;      ecode++;
1418      break;      break;
1419    
1420        /* Reset the start of match point */
1421    
1422        case OP_SET_SOM:
1423        mstart = eptr;
1424        ecode++;
1425        break;
1426    
1427      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1428      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1429    
# Line 1297  for (;;) Line 1482  for (;;)
1482          {          {
1483          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1484            {            {
1485            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1486            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1487            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1488            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1331  for (;;) Line 1516  for (;;)
1516      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1517    
1518      case OP_ANY:      case OP_ANY:
1519      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1520        {      /* Fall through */
1521        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1522        }      case OP_ALLANY:
1523      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1524      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1525      ecode++;      ecode++;
1526      break;      break;
1527    
# Line 1436  for (;;) Line 1620  for (;;)
1620        case 0x000d:        case 0x000d:
1621        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1622        break;        break;
1623    
1624        case 0x000a:        case 0x000a:
1625          break;
1626    
1627        case 0x000b:        case 0x000b:
1628        case 0x000c:        case 0x000c:
1629        case 0x0085:        case 0x0085:
1630        case 0x2028:        case 0x2028:
1631        case 0x2029:        case 0x2029:
1632          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1633          break;
1634          }
1635        ecode++;
1636        break;
1637    
1638        case OP_NOT_HSPACE:
1639        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1640        GETCHARINCTEST(c, eptr);
1641        switch(c)
1642          {
1643          default: break;
1644          case 0x09:      /* HT */
1645          case 0x20:      /* SPACE */
1646          case 0xa0:      /* NBSP */
1647          case 0x1680:    /* OGHAM SPACE MARK */
1648          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1649          case 0x2000:    /* EN QUAD */
1650          case 0x2001:    /* EM QUAD */
1651          case 0x2002:    /* EN SPACE */
1652          case 0x2003:    /* EM SPACE */
1653          case 0x2004:    /* THREE-PER-EM SPACE */
1654          case 0x2005:    /* FOUR-PER-EM SPACE */
1655          case 0x2006:    /* SIX-PER-EM SPACE */
1656          case 0x2007:    /* FIGURE SPACE */
1657          case 0x2008:    /* PUNCTUATION SPACE */
1658          case 0x2009:    /* THIN SPACE */
1659          case 0x200A:    /* HAIR SPACE */
1660          case 0x202f:    /* NARROW NO-BREAK SPACE */
1661          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1662          case 0x3000:    /* IDEOGRAPHIC SPACE */
1663          RRETURN(MATCH_NOMATCH);
1664          }
1665        ecode++;
1666        break;
1667    
1668        case OP_HSPACE:
1669        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1670        GETCHARINCTEST(c, eptr);
1671        switch(c)
1672          {
1673          default: RRETURN(MATCH_NOMATCH);
1674          case 0x09:      /* HT */
1675          case 0x20:      /* SPACE */
1676          case 0xa0:      /* NBSP */
1677          case 0x1680:    /* OGHAM SPACE MARK */
1678          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1679          case 0x2000:    /* EN QUAD */
1680          case 0x2001:    /* EM QUAD */
1681          case 0x2002:    /* EN SPACE */
1682          case 0x2003:    /* EM SPACE */
1683          case 0x2004:    /* THREE-PER-EM SPACE */
1684          case 0x2005:    /* FOUR-PER-EM SPACE */
1685          case 0x2006:    /* SIX-PER-EM SPACE */
1686          case 0x2007:    /* FIGURE SPACE */
1687          case 0x2008:    /* PUNCTUATION SPACE */
1688          case 0x2009:    /* THIN SPACE */
1689          case 0x200A:    /* HAIR SPACE */
1690          case 0x202f:    /* NARROW NO-BREAK SPACE */
1691          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1692          case 0x3000:    /* IDEOGRAPHIC SPACE */
1693          break;
1694          }
1695        ecode++;
1696        break;
1697    
1698        case OP_NOT_VSPACE:
1699        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1700        GETCHARINCTEST(c, eptr);
1701        switch(c)
1702          {
1703          default: break;
1704          case 0x0a:      /* LF */
1705          case 0x0b:      /* VT */
1706          case 0x0c:      /* FF */
1707          case 0x0d:      /* CR */
1708          case 0x85:      /* NEL */
1709          case 0x2028:    /* LINE SEPARATOR */
1710          case 0x2029:    /* PARAGRAPH SEPARATOR */
1711          RRETURN(MATCH_NOMATCH);
1712          }
1713        ecode++;
1714        break;
1715    
1716        case OP_VSPACE:
1717        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1718        GETCHARINCTEST(c, eptr);
1719        switch(c)
1720          {
1721          default: RRETURN(MATCH_NOMATCH);
1722          case 0x0a:      /* LF */
1723          case 0x0b:      /* VT */
1724          case 0x0c:      /* FF */
1725          case 0x0d:      /* CR */
1726          case 0x85:      /* NEL */
1727          case 0x2028:    /* LINE SEPARATOR */
1728          case 0x2029:    /* PARAGRAPH SEPARATOR */
1729        break;        break;
1730        }        }
1731      ecode++;      ecode++;
# Line 1456  for (;;) Line 1740  for (;;)
1740      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1741      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1742        {        {
1743        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1744    
1745        switch(ecode[1])        switch(ecode[1])
1746          {          {
# Line 1466  for (;;) Line 1749  for (;;)
1749          break;          break;
1750    
1751          case PT_LAMP:          case PT_LAMP:
1752          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1753               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1754               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1755            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1756           break;           break;
1757    
1758          case PT_GC:          case PT_GC:
1759          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1760            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1761          break;          break;
1762    
1763          case PT_PC:          case PT_PC:
1764          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1765            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1766          break;          break;
1767    
1768          case PT_SC:          case PT_SC:
1769          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1770            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1771          break;          break;
1772    
# Line 1502  for (;;) Line 1785  for (;;)
1785      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1786      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1787        {        {
1788        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1789        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1790        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1791          {          {
# Line 1512  for (;;) Line 1794  for (;;)
1794            {            {
1795            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1796            }            }
1797          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1798          if (category != ucp_M) break;          if (category != ucp_M) break;
1799          eptr += len;          eptr += len;
1800          }          }
# Line 1533  for (;;) Line 1815  for (;;)
1815      case OP_REF:      case OP_REF:
1816        {        {
1817        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1818        ecode += 3;                                 /* Advance past item */        ecode += 3;
1819    
1820          /* If the reference is unset, there are two possibilities:
1821    
1822          (a) In the default, Perl-compatible state, set the length to be longer
1823          than the amount of subject left; this ensures that every attempt at a
1824          match fails. We can't just fail here, because of the possibility of
1825          quantifiers with zero minima.
1826    
1827        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
1828        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
1829        can't just fail here, because of the possibility of quantifiers with zero  
1830        minima. */        Otherwise, set the length to the length of what was matched by the
1831          referenced subpattern. */
1832        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
1833          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
1834          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1835          else
1836            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1837    
1838        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1839    
# Line 1587  for (;;) Line 1878  for (;;)
1878    
1879        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
1880          {          {
1881          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1882              {
1883              CHECK_PARTIAL();
1884              RRETURN(MATCH_NOMATCH);
1885              }
1886          eptr += length;          eptr += length;
1887          }          }
1888    
# Line 1602  for (;;) Line 1897  for (;;)
1897          {          {
1898          for (fi = min;; fi++)          for (fi = min;; fi++)
1899            {            {
1900            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1901            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1902            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1903                {
1904                CHECK_PARTIAL();
1905              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1906                }
1907            eptr += length;            eptr += length;
1908            }            }
1909          /* Control never gets here */          /* Control never gets here */
# Line 1621  for (;;) Line 1919  for (;;)
1919            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
1920            eptr += length;            eptr += length;
1921            }            }
1922            CHECK_PARTIAL();
1923          while (eptr >= pp)          while (eptr >= pp)
1924            {            {
1925            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1926            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1927            eptr -= length;            eptr -= length;
1928            }            }
# Line 1688  for (;;) Line 1987  for (;;)
1987          {          {
1988          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
1989            {            {
1990            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
1991                {
1992                CHECK_PARTIAL();
1993                RRETURN(MATCH_NOMATCH);
1994                }
1995            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
1996            if (c > 255)            if (c > 255)
1997              {              {
# Line 1706  for (;;) Line 2009  for (;;)
2009          {          {
2010          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2011            {            {
2012            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2013                {
2014                CHECK_PARTIAL();
2015                RRETURN(MATCH_NOMATCH);
2016                }
2017            c = *eptr++;            c = *eptr++;
2018            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2019            }            }
# Line 1728  for (;;) Line 2035  for (;;)
2035            {            {
2036            for (fi = min;; fi++)            for (fi = min;; fi++)
2037              {              {
2038              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2039              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2040              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2041                  {
2042                  CHECK_PARTIAL();
2043                  RRETURN(MATCH_NOMATCH);
2044                  }
2045                if (eptr >= md->end_subject)
2046                  {
2047                  SCHECK_PARTIAL();
2048                  RRETURN(MATCH_NOMATCH);
2049                  }
2050              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2051              if (c > 255)              if (c > 255)
2052                {                {
# Line 1748  for (;;) Line 2064  for (;;)
2064            {            {
2065            for (fi = min;; fi++)            for (fi = min;; fi++)
2066              {              {
2067              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2068              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2069              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2070                  {
2071                  CHECK_PARTIAL();
2072                  RRETURN(MATCH_NOMATCH);
2073                  }
2074                if (eptr >= md->end_subject)
2075                  {
2076                  SCHECK_PARTIAL();
2077                  RRETURN(MATCH_NOMATCH);
2078                  }
2079              c = *eptr++;              c = *eptr++;
2080              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2081              }              }
# Line 1783  for (;;) Line 2108  for (;;)
2108                }                }
2109              eptr += len;              eptr += len;
2110              }              }
2111              CHECK_PARTIAL();
2112            for (;;)            for (;;)
2113              {              {
2114              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2115              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2116              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2117              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1802  for (;;) Line 2128  for (;;)
2128              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2129              eptr++;              eptr++;
2130              }              }
2131              CHECK_PARTIAL();
2132            while (eptr >= pp)            while (eptr >= pp)
2133              {              {
2134              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2135              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2136              eptr--;              eptr--;
2137              }              }
# Line 1817  for (;;) Line 2144  for (;;)
2144    
2145    
2146      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2147      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2148        mode, because Unicode properties are supported in non-UTF-8 mode. */
2149    
2150  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2151      case OP_XCLASS:      case OP_XCLASS:
# Line 1858  for (;;) Line 2186  for (;;)
2186    
2187        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2188          {          {
2189          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2190          GETCHARINC(c, eptr);            {
2191              SCHECK_PARTIAL();
2192              RRETURN(MATCH_NOMATCH);
2193              }
2194            GETCHARINCTEST(c, eptr);
2195          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2196          }          }
2197    
# Line 1875  for (;;) Line 2207  for (;;)
2207          {          {
2208          for (fi = min;; fi++)          for (fi = min;; fi++)
2209            {            {
2210            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2211            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
2213            GETCHARINC(c, eptr);              {
2214                CHECK_PARTIAL();
2215                RRETURN(MATCH_NOMATCH);
2216                }
2217              if (eptr >= md->end_subject)
2218                {
2219                SCHECK_PARTIAL();
2220                RRETURN(MATCH_NOMATCH);
2221                }
2222              GETCHARINCTEST(c, eptr);
2223            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2224            }            }
2225          /* Control never gets here */          /* Control never gets here */
# Line 1893  for (;;) Line 2234  for (;;)
2234            {            {
2235            int len = 1;            int len = 1;
2236            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2237            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2238            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2239            eptr += len;            eptr += len;
2240            }            }
2241            CHECK_PARTIAL();
2242          for(;;)          for(;;)
2243            {            {
2244            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2245            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2246            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2247            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2248            }            }
2249          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2250          }          }
# Line 1968  for (;;) Line 2310  for (;;)
2310          if (fc != dc)          if (fc != dc)
2311            {            {
2312  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2313            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2314  #endif  #endif
2315              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2316            }            }
# Line 2037  for (;;) Line 2379  for (;;)
2379      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2380      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2381    
2382      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2383    
2384      REPEATCHAR:      REPEATCHAR:
2385  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2048  for (;;) Line 2388  for (;;)
2388        length = 1;        length = 1;
2389        charptr = ecode;        charptr = ecode;
2390        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2391        ecode += length;        ecode += length;
2392    
2393        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2059  for (;;) Line 2398  for (;;)
2398  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2399          unsigned int othercase;          unsigned int othercase;
2400          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2401              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2402            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2403          else oclength = 0;          else oclength = 0;
2404  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2405    
2406          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2407            {            {
2408            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2409                memcmp(eptr, charptr, length) == 0) eptr += length;
2410  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2411            /* Need braces because of following else */            else if (oclength > 0 &&
2412            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2413                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2414    #endif  /* SUPPORT_UCP */
2415            else            else
2416              {              {
2417              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2418              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2419              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2420            }            }
2421    
2422          if (min == max) continue;          if (min == max) continue;
# Line 2086  for (;;) Line 2425  for (;;)
2425            {            {
2426            for (fi = min;; fi++)            for (fi = min;; fi++)
2427              {              {
2428              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2429              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2430              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2431              if (memcmp(eptr, charptr, length) == 0) eptr += length;                {
2432                  CHECK_PARTIAL();
2433                  RRETURN(MATCH_NOMATCH);
2434                  }
2435                if (eptr <= md->end_subject - length &&
2436                  memcmp(eptr, charptr, length) == 0) eptr += length;
2437  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2438              /* Need braces because of following else */              else if (oclength > 0 &&
2439              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2440                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2441    #endif  /* SUPPORT_UCP */
2442              else              else
2443                {                {
2444                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2445                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2446                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2447              }              }
2448            /* Control never gets here */            /* Control never gets here */
2449            }            }
# Line 2110  for (;;) Line 2453  for (;;)
2453            pp = eptr;            pp = eptr;
2454            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2455              {              {
2456              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2457              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2458  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2459              else if (oclength == 0) break;              else if (oclength > 0 &&
2460              else                       eptr <= md->end_subject - oclength &&
2461                {                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
               if (memcmp(eptr, occhars, oclength) != 0) break;  
               eptr += oclength;  
               }  
 #else   /* without SUPPORT_UCP */  
             else break;  
2462  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2463                else break;
2464              }              }
2465    
2466              CHECK_PARTIAL();
2467            if (possessive) continue;            if (possessive) continue;
2468    
2469            for(;;)            for(;;)
2470             {              {
2471             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2472             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2473             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2474  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2475             eptr--;              eptr--;
2476             BACKCHAR(eptr);              BACKCHAR(eptr);
2477  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2478             eptr -= length;              eptr -= length;
2479  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2480             }              }
2481            }            }
2482          /* Control never gets here */          /* Control never gets here */
2483          }          }
# Line 2149  for (;;) Line 2490  for (;;)
2490  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2491    
2492      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2493        {  
2494        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2495    
2496      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2497      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2170  for (;;) Line 2509  for (;;)
2509        {        {
2510        fc = md->lcc[fc];        fc = md->lcc[fc];
2511        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2512            {
2513            if (eptr >= md->end_subject)
2514              {
2515              SCHECK_PARTIAL();
2516              RRETURN(MATCH_NOMATCH);
2517              }
2518          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2519            }
2520        if (min == max) continue;        if (min == max) continue;
2521        if (minimize)        if (minimize)
2522          {          {
2523          for (fi = min;; fi++)          for (fi = min;; fi++)
2524            {            {
2525            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2526            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
2528                fc != md->lcc[*eptr++])              {
2529                CHECK_PARTIAL();
2530              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2531                }
2532              if (eptr >= md->end_subject)
2533                {
2534                SCHECK_PARTIAL();
2535                RRETURN(MATCH_NOMATCH);
2536                }
2537              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2538            }            }
2539          /* Control never gets here */          /* Control never gets here */
2540          }          }
# Line 2192  for (;;) Line 2546  for (;;)
2546            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2547            eptr++;            eptr++;
2548            }            }
2549    
2550            CHECK_PARTIAL();
2551          if (possessive) continue;          if (possessive) continue;
2552    
2553          while (eptr >= pp)          while (eptr >= pp)
2554            {            {
2555            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2556            eptr--;            eptr--;
2557            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2558            }            }
# Line 2208  for (;;) Line 2565  for (;;)
2565    
2566      else      else
2567        {        {
2568        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2569            {
2570            if (eptr >= md->end_subject)
2571              {
2572              SCHECK_PARTIAL();
2573              RRETURN(MATCH_NOMATCH);
2574              }
2575            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2576            }
2577        if (min == max) continue;        if (min == max) continue;
2578        if (minimize)        if (minimize)
2579          {          {
2580          for (fi = min;; fi++)          for (fi = min;; fi++)
2581            {            {
2582            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2583            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2584            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max)
2585                {
2586                CHECK_PARTIAL();
2587                RRETURN(MATCH_NOMATCH);
2588                }
2589              if (eptr >= md->end_subject)
2590                {
2591                SCHECK_PARTIAL();
2592              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2593                }
2594              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2595            }            }
2596          /* Control never gets here */          /* Control never gets here */
2597          }          }
# Line 2229  for (;;) Line 2603  for (;;)
2603            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2604            eptr++;            eptr++;
2605            }            }
2606            CHECK_PARTIAL();
2607          if (possessive) continue;          if (possessive) continue;
2608          while (eptr >= pp)          while (eptr >= pp)
2609            {            {
2610            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2611            eptr--;            eptr--;
2612            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2613            }            }
# Line 2322  for (;;) Line 2697  for (;;)
2697      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2698      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2699    
2700      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2701    
2702      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2703      fc = *ecode++;      fc = *ecode++;
2704    
2705      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2352  for (;;) Line 2724  for (;;)
2724          register unsigned int d;          register unsigned int d;
2725          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2726            {            {
2727              if (eptr >= md->end_subject)
2728                {
2729                SCHECK_PARTIAL();
2730                RRETURN(MATCH_NOMATCH);
2731                }
2732            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2733            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
2734            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2363  for (;;) Line 2740  for (;;)
2740        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2741          {          {
2742          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2743              {
2744              if (eptr >= md->end_subject)
2745                {
2746                SCHECK_PARTIAL();
2747                RRETURN(MATCH_NOMATCH);
2748                }
2749            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2750              }
2751          }          }
2752    
2753        if (min == max) continue;        if (min == max) continue;
# Line 2377  for (;;) Line 2761  for (;;)
2761            register unsigned int d;            register unsigned int d;
2762            for (fi = min;; fi++)            for (fi = min;; fi++)
2763              {              {
2764              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2765              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2766                if (fi >= max)
2767                  {
2768                  CHECK_PARTIAL();
2769                  RRETURN(MATCH_NOMATCH);
2770                  }
2771                if (eptr >= md->end_subject)
2772                  {
2773                  SCHECK_PARTIAL();
2774                  RRETURN(MATCH_NOMATCH);
2775                  }
2776              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2777              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2778              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2779              }              }
2780            }            }
2781          else          else
# Line 2391  for (;;) Line 2784  for (;;)
2784            {            {
2785            for (fi = min;; fi++)            for (fi = min;; fi++)
2786              {              {
2787              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2788              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2789              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max)
2790                  {
2791                  CHECK_PARTIAL();
2792                  RRETURN(MATCH_NOMATCH);
2793                  }
2794                if (eptr >= md->end_subject)
2795                  {
2796                  SCHECK_PARTIAL();
2797                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2798                  }
2799                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2800              }              }
2801            }            }
2802          /* Control never gets here */          /* Control never gets here */
# Line 2420  for (;;) Line 2822  for (;;)
2822              if (fc == d) break;              if (fc == d) break;
2823              eptr += len;              eptr += len;
2824              }              }
2825            CHECK_PARTIAL();
2826          if (possessive) continue;          if (possessive) continue;
2827          for(;;)          for(;;)
2828              {              {
2829              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2830              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2831              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2832              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2438  for (;;) Line 2841  for (;;)
2841              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2842              eptr++;              eptr++;
2843              }              }
2844              CHECK_PARTIAL();
2845            if (possessive) continue;            if (possessive) continue;
2846            while (eptr >= pp)            while (eptr >= pp)
2847              {              {
2848              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2849              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2850              eptr--;              eptr--;
2851              }              }
# Line 2463  for (;;) Line 2867  for (;;)
2867          register unsigned int d;          register unsigned int d;
2868          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2869            {            {
2870              if (eptr >= md->end_subject)
2871                {
2872                SCHECK_PARTIAL();
2873                RRETURN(MATCH_NOMATCH);
2874                }
2875            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2876            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
2877            }            }
# Line 2472  for (;;) Line 2881  for (;;)
2881        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2882          {          {
2883          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2884              {
2885              if (eptr >= md->end_subject)
2886                {
2887                SCHECK_PARTIAL();
2888                RRETURN(MATCH_NOMATCH);
2889                }
2890            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2891              }
2892          }          }
2893    
2894        if (min == max) continue;        if (min == max) continue;
# Line 2486  for (;;) Line 2902  for (;;)
2902            register unsigned int d;            register unsigned int d;
2903            for (fi = min;; fi++)            for (fi = min;; fi++)
2904              {              {
2905              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2906              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2907              GETCHARINC(d, eptr);              if (fi >= max)
2908              if (fi >= max || eptr >= md->end_subject || fc == d)                {
2909                  CHECK_PARTIAL();
2910                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2911                  }
2912                if (eptr >= md->end_subject)
2913                  {
2914                  SCHECK_PARTIAL();
2915                  RRETURN(MATCH_NOMATCH);
2916                  }
2917                GETCHARINC(d, eptr);
2918                if (fc == d) RRETURN(MATCH_NOMATCH);
2919              }              }
2920            }            }
2921          else          else
# Line 2499  for (;;) Line 2924  for (;;)
2924            {            {
2925            for (fi = min;; fi++)            for (fi = min;; fi++)
2926              {              {
2927              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2928              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2929              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max)
2930                  {
2931                  CHECK_PARTIAL();
2932                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2933                  }
2934                if (eptr >= md->end_subject)
2935                  {
2936                  SCHECK_PARTIAL();
2937                  RRETURN(MATCH_NOMATCH);
2938                  }
2939                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2940              }              }
2941            }            }
2942          /* Control never gets here */          /* Control never gets here */
# Line 2527  for (;;) Line 2961  for (;;)
2961              if (fc == d) break;              if (fc == d) break;
2962              eptr += len;              eptr += len;
2963              }              }
2964              CHECK_PARTIAL();
2965            if (possessive) continue;            if (possessive) continue;
2966            for(;;)            for(;;)
2967              {              {
2968              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2969              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2970              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2971              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2545  for (;;) Line 2980  for (;;)
2980              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2981              eptr++;              eptr++;
2982              }              }
2983              CHECK_PARTIAL();
2984            if (possessive) continue;            if (possessive) continue;
2985            while (eptr >= pp)            while (eptr >= pp)
2986              {              {
2987              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2988              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2989              eptr--;              eptr--;
2990              }              }
# Line 2636  for (;;) Line 3072  for (;;)
3072    
3073      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3074      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3075      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3076      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3077      and single-bytes. */      and single-bytes. */
3078    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3079      if (min > 0)      if (min > 0)
3080        {        {
3081  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2654  for (;;) Line 3087  for (;;)
3087            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3088            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3089              {              {
3090              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3091              GETCHARINC(c, eptr);                {
3092                  SCHECK_PARTIAL();
3093                  RRETURN(MATCH_NOMATCH);
3094                  }
3095                GETCHARINCTEST(c, eptr);
3096              }              }
3097            break;            break;
3098    
3099            case PT_LAMP:            case PT_LAMP:
3100            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3101              {              {
3102              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3103              GETCHARINC(c, eptr);                {
3104              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3105                  RRETURN(MATCH_NOMATCH);
3106                  }
3107                GETCHARINCTEST(c, eptr);
3108                prop_chartype = UCD_CHARTYPE(c);
3109              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3110                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3111                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2675  for (;;) Line 3116  for (;;)
3116            case PT_GC:            case PT_GC:
3117            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3118              {              {
3119              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3120              GETCHARINC(c, eptr);                {
3121              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3122                  RRETURN(MATCH_NOMATCH);
3123                  }
3124                GETCHARINCTEST(c, eptr);
3125                prop_category = UCD_CATEGORY(c);
3126              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3127                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3128              }              }
# Line 2686  for (;;) Line 3131  for (;;)
3131            case PT_PC:            case PT_PC:
3132            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3133              {              {
3134              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3135              GETCHARINC(c, eptr);                {
3136              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3137                  RRETURN(MATCH_NOMATCH);
3138                  }
3139                GETCHARINCTEST(c, eptr);
3140                prop_chartype = UCD_CHARTYPE(c);
3141              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3142                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3143              }              }
# Line 2697  for (;;) Line 3146  for (;;)
3146            case PT_SC:            case PT_SC:
3147            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3148              {              {
3149              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3150              GETCHARINC(c, eptr);                {
3151              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3152                  RRETURN(MATCH_NOMATCH);
3153                  }
3154                GETCHARINCTEST(c, eptr);
3155                prop_script = UCD_SCRIPT(c);
3156              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3157                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3158              }              }
# Line 2717  for (;;) Line 3170  for (;;)
3170          {          {
3171          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3172            {            {
3173              if (eptr >= md->end_subject)
3174                {
3175                SCHECK_PARTIAL();
3176                RRETURN(MATCH_NOMATCH);
3177                }
3178            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3179            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3180            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3181            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3182              {              {
3183              int len = 1;              int len = 1;
3184              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3185                {                else { GETCHARLEN(c, eptr, len); }
3186                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3187              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3188              eptr += len;              eptr += len;
3189              }              }
# Line 2745  for (;;) Line 3201  for (;;)
3201          case OP_ANY:          case OP_ANY:
3202          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3203            {            {
3204            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3205                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3206                SCHECK_PARTIAL();
3207                RRETURN(MATCH_NOMATCH);
3208                }
3209              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3210              eptr++;
3211              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3212              }
3213            break;
3214    
3215            case OP_ALLANY:
3216            for (i = 1; i <= min; i++)
3217              {
3218              if (eptr >= md->end_subject)
3219                {
3220                SCHECK_PARTIAL();
3221              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3222                }
3223            eptr++;            eptr++;
3224            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3225            }            }
3226          break;          break;
3227    
3228          case OP_ANYBYTE:          case OP_ANYBYTE:
3229            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3230          eptr += min;          eptr += min;
3231          break;          break;
3232    
3233          case OP_ANYNL:          case OP_ANYNL:
3234          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3235            {            {
3236            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3237                {
3238                SCHECK_PARTIAL();
3239                RRETURN(MATCH_NOMATCH);
3240                }
3241            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3242            switch(c)            switch(c)
3243              {              {
# Line 2768  for (;;) Line 3245  for (;;)
3245              case 0x000d:              case 0x000d:
3246              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3247              break;              break;
3248    
3249              case 0x000a:              case 0x000a:
3250                break;
3251    
3252              case 0x000b:              case 0x000b:
3253              case 0x000c:              case 0x000c:
3254              case 0x0085:              case 0x0085:
3255              case 0x2028:              case 0x2028:
3256              case 0x2029:              case 0x2029:
3257                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3258                break;
3259                }
3260              }
3261            break;
3262    
3263            case OP_NOT_HSPACE:
3264            for (i = 1; i <= min; i++)
3265              {
3266              if (eptr >= md->end_subject)
3267                {
3268                SCHECK_PARTIAL();
3269                RRETURN(MATCH_NOMATCH);
3270                }
3271              GETCHARINC(c, eptr);
3272              switch(c)
3273                {
3274                default: break;
3275                case 0x09:      /* HT */
3276                case 0x20:      /* SPACE */
3277                case 0xa0:      /* NBSP */
3278                case 0x1680:    /* OGHAM SPACE MARK */
3279                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3280                case 0x2000:    /* EN QUAD */
3281                case 0x2001:    /* EM QUAD */
3282                case 0x2002:    /* EN SPACE */
3283                case 0x2003:    /* EM SPACE */
3284                case 0x2004:    /* THREE-PER-EM SPACE */
3285                case 0x2005:    /* FOUR-PER-EM SPACE */
3286                case 0x2006:    /* SIX-PER-EM SPACE */
3287                case 0x2007:    /* FIGURE SPACE */
3288                case 0x2008:    /* PUNCTUATION SPACE */
3289                case 0x2009:    /* THIN SPACE */
3290                case 0x200A:    /* HAIR SPACE */
3291                case 0x202f:    /* NARROW NO-BREAK SPACE */
3292                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3293                case 0x3000:    /* IDEOGRAPHIC SPACE */
3294                RRETURN(MATCH_NOMATCH);
3295                }
3296              }
3297            break;
3298    
3299            case OP_HSPACE:
3300            for (i = 1; i <= min; i++)
3301              {
3302              if (eptr >= md->end_subject)
3303                {
3304                SCHECK_PARTIAL();
3305                RRETURN(MATCH_NOMATCH);
3306                }
3307              GETCHARINC(c, eptr);
3308              switch(c)
3309                {
3310                default: RRETURN(MATCH_NOMATCH);
3311                case 0x09:      /* HT */
3312                case 0x20:      /* SPACE */
3313                case 0xa0:      /* NBSP */
3314                case 0x1680:    /* OGHAM SPACE MARK */
3315                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3316                case 0x2000:    /* EN QUAD */
3317                case 0x2001:    /* EM QUAD */
3318                case 0x2002:    /* EN SPACE */
3319                case 0x2003:    /* EM SPACE */
3320                case 0x2004:    /* THREE-PER-EM SPACE */
3321                case 0x2005:    /* FOUR-PER-EM SPACE */
3322                case 0x2006:    /* SIX-PER-EM SPACE */
3323                case 0x2007:    /* FIGURE SPACE */
3324                case 0x2008:    /* PUNCTUATION SPACE */
3325                case 0x2009:    /* THIN SPACE */
3326                case 0x200A:    /* HAIR SPACE */
3327                case 0x202f:    /* NARROW NO-BREAK SPACE */
3328                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3329                case 0x3000:    /* IDEOGRAPHIC SPACE */
3330                break;
3331                }
3332              }
3333            break;
3334    
3335            case OP_NOT_VSPACE:
3336            for (i = 1; i <= min; i++)
3337              {
3338              if (eptr >= md->end_subject)
3339                {
3340                SCHECK_PARTIAL();
3341                RRETURN(MATCH_NOMATCH);
3342                }
3343              GETCHARINC(c, eptr);
3344              switch(c)
3345                {
3346                default: break;
3347                case 0x0a:      /* LF */
3348                case 0x0b:      /* VT */
3349                case 0x0c:      /* FF */
3350                case 0x0d:      /* CR */
3351                case 0x85:      /* NEL */
3352                case 0x2028:    /* LINE SEPARATOR */
3353                case 0x2029:    /* PARAGRAPH SEPARATOR */
3354                RRETURN(MATCH_NOMATCH);
3355                }
3356              }
3357            break;
3358    
3359            case OP_VSPACE:
3360            for (i = 1; i <= min; i++)
3361              {
3362              if (eptr >= md->end_subject)
3363                {
3364                SCHECK_PARTIAL();
3365                RRETURN(MATCH_NOMATCH);
3366                }
3367              GETCHARINC(c, eptr);
3368              switch(c)
3369                {
3370                default: RRETURN(MATCH_NOMATCH);
3371                case 0x0a:      /* LF */
3372                case 0x0b:      /* VT */
3373                case 0x0c:      /* FF */
3374                case 0x0d:      /* CR */
3375                case 0x85:      /* NEL */
3376                case 0x2028:    /* LINE SEPARATOR */
3377                case 0x2029:    /* PARAGRAPH SEPARATOR */
3378              break;              break;
3379              }              }
3380            }            }
# Line 2782  for (;;) Line 3383  for (;;)
3383          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3384          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3385            {            {
3386            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3387                {
3388                SCHECK_PARTIAL();
3389                RRETURN(MATCH_NOMATCH);
3390                }
3391            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3392            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3393              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2792  for (;;) Line 3397  for (;;)
3397          case OP_DIGIT:          case OP_DIGIT:
3398          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3399            {            {
3400            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3401               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3402                SCHECK_PARTIAL();
3403                RRETURN(MATCH_NOMATCH);
3404                }
3405              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3406              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3407            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3408            }            }
# Line 2802  for (;;) Line 3411  for (;;)
3411          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3412          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3413            {            {
3414            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3415               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3416                SCHECK_PARTIAL();
3417              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3418            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              }
3419              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3420                RRETURN(MATCH_NOMATCH);
3421              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3422            }            }
3423          break;          break;
3424    
3425          case OP_WHITESPACE:          case OP_WHITESPACE:
3426          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3427            {            {
3428            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3429               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3430                SCHECK_PARTIAL();
3431                RRETURN(MATCH_NOMATCH);
3432                }
3433              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3434              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3435            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3436            }            }
# Line 2823  for (;;) Line 3440  for (;;)
3440          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3441            {            {
3442            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3443               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3444              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3445            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3446            }            }
3447          break;          break;
3448    
3449          case OP_WORDCHAR:          case OP_WORDCHAR:
3450          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3451            {            {
3452            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3453               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3454                SCHECK_PARTIAL();
3455                RRETURN(MATCH_NOMATCH);
3456                }
3457              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3458              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3459            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3460            }            }
# Line 2847  for (;;) Line 3468  for (;;)
3468  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3469    
3470        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3471        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3472    
3473        switch(ctype)        switch(ctype)
3474          {          {
3475          case OP_ANY:          case OP_ANY:
3476          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3477            {            {
3478            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3479              {              {
3480              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3481              eptr++;              RRETURN(MATCH_NOMATCH);
3482              }              }
3483              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3484              eptr++;
3485            }            }
         else eptr += min;  
3486          break;          break;
3487    
3488          case OP_ANYBYTE:          case OP_ALLANY:
3489            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3490          eptr += min;          eptr += min;
3491          break;          break;
3492    
3493          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
3494          bytes are present in this case. */          if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3495            eptr += min;
3496            break;
3497    
3498          case OP_ANYNL:          case OP_ANYNL:
3499          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3500            {            {
3501            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3502                {
3503                SCHECK_PARTIAL();
3504                RRETURN(MATCH_NOMATCH);
3505                }
3506            switch(*eptr++)            switch(*eptr++)
3507              {              {
3508              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 2882  for (;;) Line 3510  for (;;)
3510              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3511              break;              break;
3512              case 0x000a:              case 0x000a:
3513                break;
3514    
3515              case 0x000b:              case 0x000b:
3516              case 0x000c:              case 0x000c:
3517              case 0x0085:              case 0x0085:
3518                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3519                break;
3520                }
3521              }
3522            break;
3523    
3524            case OP_NOT_HSPACE:
3525            for (i = 1; i <= min; i++)
3526              {
3527              if (eptr >= md->end_subject)
3528                {
3529                SCHECK_PARTIAL();
3530                RRETURN(MATCH_NOMATCH);
3531                }
3532              switch(*eptr++)
3533                {
3534                default: break;
3535                case 0x09:      /* HT */
3536                case 0x20:      /* SPACE */
3537                case 0xa0:      /* NBSP */
3538                RRETURN(MATCH_NOMATCH);
3539                }
3540              }
3541            break;
3542    
3543            case OP_HSPACE:
3544            for (i = 1; i <= min; i++)
3545              {
3546              if (eptr >= md->end_subject)
3547                {
3548                SCHECK_PARTIAL();
3549                RRETURN(MATCH_NOMATCH);
3550                }
3551              switch(*eptr++)
3552                {
3553                default: RRETURN(MATCH_NOMATCH);
3554                case 0x09:      /* HT */
3555                case 0x20:      /* SPACE */
3556                case 0xa0:      /* NBSP */
3557                break;
3558                }
3559              }
3560            break;
3561    
3562            case OP_NOT_VSPACE:
3563            for (i = 1; i <= min; i++)
3564              {
3565              if (eptr >= md->end_subject)
3566                {
3567                SCHECK_PARTIAL();
3568                RRETURN(MATCH_NOMATCH);
3569                }
3570              switch(*eptr++)
3571                {
3572                default: break;
3573                case 0x0a:      /* LF */
3574                case 0x0b:      /* VT */
3575                case 0x0c:      /* FF */
3576                case 0x0d:      /* CR */
3577                case 0x85:      /* NEL */
3578                RRETURN(MATCH_NOMATCH);
3579                }
3580              }
3581            break;
3582    
3583            case OP_VSPACE:
3584            for (i = 1; i <= min; i++)
3585              {
3586              if (eptr >= md->end_subject)
3587                {
3588                SCHECK_PARTIAL();
3589                RRETURN(MATCH_NOMATCH);
3590                }
3591              switch(*eptr++)
3592                {
3593                default: RRETURN(MATCH_NOMATCH);
3594                case 0x0a:      /* LF */
3595                case 0x0b:      /* VT */
3596                case 0x0c:      /* FF */
3597                case 0x0d:      /* CR */
3598                case 0x85:      /* NEL */
3599              break;              break;
3600              }              }
3601            }            }
# Line 2892  for (;;) Line 3603  for (;;)
3603    
3604          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3605          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3606              {
3607              if (eptr >= md->end_subject)
3608                {
3609                SCHECK_PARTIAL();
3610                RRETURN(MATCH_NOMATCH);
3611                }
3612            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3613              }
3614          break;          break;
3615    
3616          case OP_DIGIT:          case OP_DIGIT:
3617          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3618              {
3619              if (eptr >= md->end_subject)
3620                {
3621                SCHECK_PARTIAL();
3622                RRETURN(MATCH_NOMATCH);
3623                }
3624            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3625              }
3626          break;          break;
3627    
3628          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3629          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3630              {
3631              if (eptr >= md->end_subject)
3632                {
3633                SCHECK_PARTIAL();
3634                RRETURN(MATCH_NOMATCH);
3635                }
3636            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3637              }
3638          break;          break;
3639    
3640          case OP_WHITESPACE:          case OP_WHITESPACE:
3641          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3642              {
3643              if (eptr >= md->end_subject)
3644                {
3645                SCHECK_PARTIAL();
3646                RRETURN(MATCH_NOMATCH);
3647                }
3648            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3649              }
3650          break;          break;
3651    
3652          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3653          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3654              {
3655              if (eptr >= md->end_subject)
3656                {
3657                SCHECK_PARTIAL();
3658                RRETURN(MATCH_NOMATCH);
3659                }
3660            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3661              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3662              }
3663          break;          break;
3664    
3665          case OP_WORDCHAR:          case OP_WORDCHAR:
3666          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3667              {
3668              if (eptr >= md->end_subject)
3669                {
3670                SCHECK_PARTIAL();
3671                RRETURN(MATCH_NOMATCH);
3672                }
3673            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3674              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3675              }
3676          break;          break;
3677    
3678          default:          default:
# Line 2945  for (;;) Line 3698  for (;;)
3698            case PT_ANY:            case PT_ANY:
3699            for (fi = min;; fi++)            for (fi = min;; fi++)
3700              {              {
3701              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3702              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3703              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3704                  {
3705                  CHECK_PARTIAL();
3706                  RRETURN(MATCH_NOMATCH);
3707                  }
3708                if (eptr >= md->end_subject)
3709                  {
3710                  SCHECK_PARTIAL();
3711                  RRETURN(MATCH_NOMATCH);
3712                  }
3713              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3714              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3715              }              }
# Line 2956  for (;;) Line 3718  for (;;)
3718            case PT_LAMP:            case PT_LAMP:
3719            for (fi = min;; fi++)            for (fi = min;; fi++)
3720              {              {
3721              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3722              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3723              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3724                  {
3725                  CHECK_PARTIAL();
3726                  RRETURN(MATCH_NOMATCH);
3727                  }
3728                if (eptr >= md->end_subject)
3729                  {
3730                  SCHECK_PARTIAL();
3731                  RRETURN(MATCH_NOMATCH);
3732                  }
3733              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3734              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3735              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3736                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3737                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2971  for (;;) Line 3742  for (;;)
3742            case PT_GC:            case PT_GC:
3743            for (fi = min;; fi++)            for (fi = min;; fi++)
3744              {              {
3745              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3746              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3747              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3748                  {
3749                  CHECK_PARTIAL();
3750                  RRETURN(MATCH_NOMATCH);
3751                  }
3752                if (eptr >= md->end_subject)
3753                  {
3754                  SCHECK_PARTIAL();
3755                  RRETURN(MATCH_NOMATCH);
3756                  }
3757              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3758              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3759              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3760                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3761              }              }
# Line 2984  for (;;) Line 3764  for (;;)
3764            case PT_PC:            case PT_PC:
3765            for (fi = min;; fi++)            for (fi = min;; fi++)
3766              {              {
3767              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3768              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3769              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3770                  {
3771                  CHECK_PARTIAL();
3772                  RRETURN(MATCH_NOMATCH);
3773                  }
3774                if (eptr >= md->end_subject)
3775                  {
3776                  SCHECK_PARTIAL();
3777                  RRETURN(MATCH_NOMATCH);
3778                  }
3779              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3780              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3781              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3782                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3783              }              }
# Line 2997  for (;;) Line 3786  for (;;)
3786            case PT_SC:            case PT_SC:
3787            for (fi = min;; fi++)            for (fi = min;; fi++)
3788              {              {
3789              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3790              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3792                  {
3793                  CHECK_PARTIAL();
3794                  RRETURN(MATCH_NOMATCH);
3795                  }
3796                if (eptr >= md->end_subject)
3797                  {
3798                  SCHECK_PARTIAL();
3799                  RRETURN(MATCH_NOMATCH);
3800                  }
3801              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3802              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3803              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3804                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3805              }              }
# Line 3019  for (;;) Line 3817  for (;;)
3817          {          {
3818          for (fi = min;; fi++)          for (fi = min;; fi++)
3819            {            {
3820            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3821            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3822            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
3823                {
3824                CHECK_PARTIAL();
3825                RRETURN(MATCH_NOMATCH);
3826                }
3827              if (eptr >= md->end_subject)
3828                {
3829                SCHECK_PARTIAL();
3830                RRETURN(MATCH_NOMATCH);
3831                }
3832            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3833            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3834            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3835            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3836              {              {
3837              int len = 1;              int len = 1;
3838              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3839                {                else { GETCHARLEN(c, eptr, len); }
3840                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3841              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3842              eptr += len;              eptr += len;
3843              }              }
# Line 3048  for (;;) Line 3853  for (;;)
3853          {          {
3854          for (fi = min;; fi++)          for (fi = min;; fi++)
3855            {            {
3856            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3857            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3858            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
3859                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&              {
3860                  IS_NEWLINE(eptr)))              CHECK_PARTIAL();
3861                RRETURN(MATCH_NOMATCH);
3862                }
3863              if (eptr >= md->end_subject)
3864                {
3865                SCHECK_PARTIAL();
3866                RRETURN(MATCH_NOMATCH);
3867                }
3868              if (ctype == OP_ANY && IS_NEWLINE(eptr))
3869              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
3870            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3871            switch(ctype)            switch(ctype)
3872              {              {
3873              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3874              break;              case OP_ALLANY:
   
3875              case OP_ANYBYTE:              case OP_ANYBYTE:
3876              break;              break;
3877    
# Line 3072  for (;;) Line 3883  for (;;)
3883                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3884                break;                break;
3885                case 0x000a:                case 0x000a:
3886                  break;
3887    
3888                case 0x000b:                case 0x000b:
3889                case 0x000c:                case 0x000c:
3890                case 0x0085:                case 0x0085:
3891                case 0x2028:                case 0x2028:
3892                case 0x2029:                case 0x2029:
3893                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3894                  break;
3895                  }
3896                break;
3897    
3898                case OP_NOT_HSPACE:
3899                switch(c)
3900                  {
3901                  default: break;
3902                  case 0x09:      /* HT */
3903                  case 0x20:      /* SPACE */
3904                  case 0xa0:      /* NBSP */
3905                  case 0x1680:    /* OGHAM SPACE MARK */
3906                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3907                  case 0x2000:    /* EN QUAD */
3908                  case 0x2001:    /* EM QUAD */
3909                  case 0x2002:    /* EN SPACE */
3910                  case 0x2003:    /* EM SPACE */
3911                  case 0x2004:    /* THREE-PER-EM SPACE */
3912                  case 0x2005:    /* FOUR-PER-EM SPACE */
3913                  case 0x2006:    /* SIX-PER-EM SPACE */
3914                  case 0x2007:    /* FIGURE SPACE */
3915                  case 0x2008:    /* PUNCTUATION SPACE */
3916                  case 0x2009:    /* THIN SPACE */
3917                  case 0x200A:    /* HAIR SPACE */
3918                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3919                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3920                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3921                  RRETURN(MATCH_NOMATCH);
3922                  }
3923                break;
3924    
3925                case OP_HSPACE:
3926                switch(c)
3927                  {
3928                  default: RRETURN(MATCH_NOMATCH);
3929                  case 0x09:      /* HT */
3930                  case 0x20:      /* SPACE */
3931                  case 0xa0:      /* NBSP */
3932                  case 0x1680:    /* OGHAM SPACE MARK */
3933                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3934                  case 0x2000:    /* EN QUAD */
3935                  case 0x2001:    /* EM QUAD */
3936                  case 0x2002:    /* EN SPACE */
3937                  case 0x2003:    /* EM SPACE */
3938                  case 0x2004:    /* THREE-PER-EM SPACE */
3939                  case 0x2005:    /* FOUR-PER-EM SPACE */
3940                  case 0x2006:    /* SIX-PER-EM SPACE */
3941                  case 0x2007:    /* FIGURE SPACE */
3942                  case 0x2008:    /* PUNCTUATION SPACE */
3943                  case 0x2009:    /* THIN SPACE */
3944                  case 0x200A:    /* HAIR SPACE */
3945                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3946                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3947                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3948                  break;
3949                  }
3950                break;
3951    
3952                case OP_NOT_VSPACE:
3953                switch(c)
3954                  {
3955                  default: break;
3956                  case 0x0a:      /* LF */
3957                  case 0x0b:      /* VT */
3958                  case 0x0c:      /* FF */
3959                  case 0x0d:      /* CR */
3960                  case 0x85:      /* NEL */
3961                  case 0x2028:    /* LINE SEPARATOR */
3962                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3963                  RRETURN(MATCH_NOMATCH);
3964                  }
3965                break;
3966    
3967                case OP_VSPACE:
3968                switch(c)
3969                  {
3970                  default: RRETURN(MATCH_NOMATCH);
3971                  case 0x0a:      /* LF */
3972                  case 0x0b:      /* VT */
3973                  case 0x0c:      /* FF */
3974                  case 0x0d:      /* CR */
3975                  case 0x85:      /* NEL */
3976                  case 0x2028:    /* LINE SEPARATOR */
3977                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3978                break;                break;
3979                }                }
3980              break;              break;
# Line 3122  for (;;) Line 4020  for (;;)
4020          {          {
4021          for (fi = min;; fi++)          for (fi = min;; fi++)
4022            {            {
4023            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4024            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4025            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
4026                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
4027                CHECK_PARTIAL();
4028                RRETURN(MATCH_NOMATCH);
4029                }
4030              if (eptr >= md->end_subject)
4031                {
4032                SCHECK_PARTIAL();
4033                RRETURN(MATCH_NOMATCH);
4034                }
4035              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4036              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
4037            c = *eptr++;            c = *eptr++;
4038            switch(ctype)            switch(ctype)
4039              {              {
4040              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
4041              break;              case OP_ALLANY:
   
4042              case OP_ANYBYTE:              case OP_ANYBYTE:
4043              break;              break;
4044    
# Line 3144  for (;;) Line 4049  for (;;)
4049                case 0x000d:                case 0x000d:
4050                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4051                break;                break;
4052    
4053                case 0x000a:                case 0x000a:
4054                  break;
4055    
4056                case 0x000b:                case 0x000b:
4057                case 0x000c:                case 0x000c:
4058                case 0x0085:                case 0x0085:
4059                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4060                  break;
4061                  }
4062                break;
4063    
4064                case OP_NOT_HSPACE:
4065                switch(c)
4066                  {
4067                  default: break;
4068                  case 0x09:      /* HT */
4069                  case 0x20:      /* SPACE */
4070                  case 0xa0:      /* NBSP */
4071                  RRETURN(MATCH_NOMATCH);
4072                  }
4073                break;
4074    
4075                case OP_HSPACE:
4076                switch(c)
4077                  {
4078                  default: RRETURN(MATCH_NOMATCH);
4079                  case 0x09:      /* HT */
4080                  case 0x20:      /* SPACE */
4081                  case 0xa0:      /* NBSP */
4082                  break;
4083                  }
4084                break;
4085    
4086                case OP_NOT_VSPACE:
4087                switch(c)
4088                  {
4089                  default: break;
4090                  case 0x0a:      /* LF */
4091                  case 0x0b:      /* VT */
4092                  case 0x0c:      /* FF */
4093                  case 0x0d:      /* CR */
4094                  case 0x85:      /* NEL */
4095                  RRETURN(MATCH_NOMATCH);
4096                  }
4097                break;
4098    
4099                case OP_VSPACE:
4100                switch(c)
4101                  {
4102                  default: RRETURN(MATCH_NOMATCH);
4103                  case 0x0a:      /* LF */
4104                  case 0x0b:      /* VT */
4105                  case 0x0c:      /* FF */
4106                  case 0x0d:      /* CR */
4107                  case 0x85:      /* NEL */
4108                break;                break;
4109                }                }
4110              break;              break;
# Line 3214  for (;;) Line 4171  for (;;)
4171              int len = 1;              int len = 1;
4172              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4173              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4174              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4175              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4176                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4177                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3229  for (;;) Line 4186  for (;;)
4186              int len = 1;              int len = 1;
4187              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4188              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4189              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4190              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4191                break;                break;
4192              eptr+= len;              eptr+= len;
# Line 3242  for (;;) Line 4199  for (;;)
4199              int len = 1;              int len = 1;
4200              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4201              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4202              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4203              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4204                break;                break;
4205              eptr+= len;              eptr+= len;
# Line 3255  for (;;) Line 4212  for (;;)
4212              int len = 1;              int len = 1;
4213              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4214              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4215              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
4216              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4217                break;                break;
4218              eptr+= len;              eptr+= len;
# Line 3265  for (;;) Line 4222  for (;;)
4222    
4223          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4224    
4225            CHECK_PARTIAL();
4226          if (possessive) continue;          if (possessive) continue;
4227          for(;;)          for(;;)
4228            {            {
4229            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4230            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4231            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4232            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
4233            }            }
4234          }          }
4235    
# Line 3284  for (;;) Line 4242  for (;;)
4242            {            {
4243            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
4244            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4245            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4246            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
4247            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4248              {              {
# Line 3293  for (;;) Line 4251  for (;;)
4251                {                {
4252                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4253                }                }
4254              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4255              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4256              eptr += len;              eptr += len;
4257              }              }
# Line 3301  for (;;) Line 4259  for (;;)
4259    
4260          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4261    
4262            CHECK_PARTIAL();
4263          if (possessive) continue;          if (possessive) continue;
4264          for(;;)          for(;;)
4265            {            {
4266            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4267            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4268            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4269            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
4270              {              {
4271              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
4272              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
4273                {                {
4274                  BACKCHAR(eptr);
4275                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4276                }                }
4277              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4278              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4279              eptr--;              eptr--;
4280              }              }
# Line 3333  for (;;) Line 4292  for (;;)
4292          switch(ctype)          switch(ctype)
4293            {            {
4294            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
4295            if (max < INT_MAX)            if (max < INT_MAX)
4296              {              {
4297              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
4298                {                {
4299                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4300                  {                eptr++;
4301                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
4302                }                }
4303              }              }
4304    
# Line 3364  for (;;) Line 4306  for (;;)
4306    
4307            else            else
4308              {              {
4309              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
4310                {                {
4311                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4312                  {                eptr++;
4313                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 }  
               break;  
4314                }                }
4315              else              }
4316              break;
4317    
4318              case OP_ALLANY:
4319              if (max < INT_MAX)
4320                {
4321                for (i = min; i < max; i++)
4322                {                {
4323                c = max - min;                if (eptr >= md->end_subject) break;
4324                if (c > (unsigned int)(md->end_subject - eptr))                eptr++;
4325                  c = md->end_subject - eptr;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               eptr += c;  
4326                }                }
4327              }              }
4328              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
4329            break;            break;
4330    
4331            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 3405  for (;;) Line 4350  for (;;)
4350                }                }
4351              else              else
4352                {                {
4353                if (c != 0x000a && c != 0x000b && c != 0x000c &&                if (c != 0x000a &&
4354                    c != 0x0085 && c != 0x2028 && c != 0x2029)                    (md->bsr_anycrlf ||
4355                       (c != 0x000b && c != 0x000c &&
4356                        c != 0x0085 && c != 0x2028 && c != 0x2029)))
4357                  break;                  break;
4358                eptr += len;                eptr += len;
4359                }                }
4360              }              }
4361            break;            break;
4362    
4363              case OP_NOT_HSPACE:
4364              case OP_HSPACE:
4365              for (i = min; i < max; i++)
4366                {
4367                BOOL gotspace;
4368                int len = 1;
4369                if (eptr >= md->end_subject) break;
4370                GETCHARLEN(c, eptr, len);
4371                switch(c)
4372                  {
4373                  default: gotspace = FALSE; break;
4374                  case 0x09:      /* HT */
4375                  case 0x20:      /* SPACE */
4376                  case 0xa0:      /* NBSP */
4377                  case 0x1680:    /* OGHAM SPACE MARK */
4378                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4379                  case 0x2000:    /* EN QUAD */
4380                  case 0x2001:    /* EM QUAD */
4381                  case 0x2002:    /* EN SPACE */
4382                  case 0x2003:    /* EM SPACE */
4383                  case 0x2004:    /* THREE-PER-EM SPACE */
4384                  case 0x2005:    /* FOUR-PER-EM SPACE */
4385                  case 0x2006:    /* SIX-PER-EM SPACE */
4386                  case 0x2007:    /* FIGURE SPACE */
4387                  case 0x2008:    /* PUNCTUATION SPACE */
4388                  case 0x2009:    /* THIN SPACE */
4389                  case 0x200A:    /* HAIR SPACE */
4390                  case 0x202f:    /* NARROW NO-BREAK SPACE */
4391                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4392                  case 0x3000:    /* IDEOGRAPHIC SPACE */
4393                  gotspace = TRUE;
4394                  break;
4395                  }
4396                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
4397                eptr += len;
4398                }
4399              break;
4400    
4401              case OP_NOT_VSPACE:
4402              case OP_VSPACE:
4403              for (i = min; i < max; i++)
4404                {
4405                BOOL gotspace;
4406                int len = 1;
4407                if (eptr >= md->end_subject) break;
4408                GETCHARLEN(c, eptr, len);
4409                switch(c)
4410                  {
4411                  default: gotspace = FALSE; break;
4412                  case 0x0a:      /* LF */
4413                  case 0x0b:      /* VT */
4414                  case 0x0c:      /* FF */
4415                  case 0x0d:      /* CR */
4416                  case 0x85:      /* NEL */
4417                  case 0x2028:    /* LINE SEPARATOR */
4418                  case 0x2029:    /* PARAGRAPH SEPARATOR */
4419                  gotspace = TRUE;
4420                  break;
4421                  }
4422                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
4423                eptr += len;
4424                }
4425              break;
4426    
4427            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4428            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4429              {              {
# Line 3485  for (;;) Line 4496  for (;;)
4496    
4497          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4498    
4499            CHECK_PARTIAL();
4500          if (possessive) continue;          if (possessive) continue;
4501          for(;;)          for(;;)
4502            {            {
4503            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4504            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4505            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4506            BACKCHAR(eptr);            BACKCHAR(eptr);
4507            }            }
4508          }          }
4509        else        else
4510  #endif  #endif  /* SUPPORT_UTF8 */
4511    
4512        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4513          {          {
4514          switch(ctype)          switch(ctype)
4515            {            {
4516            case OP_ANY:            case OP_ANY:
4517            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4518              {              {
4519              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4520                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4521              }              }
4522            /* For DOTALL case, fall through and treat as \C */            break;
4523    
4524              case OP_ALLANY:
4525            case OP_ANYBYTE:            case OP_ANYBYTE:
4526            c = max - min;            c = max - min;
4527            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 3532  for (;;) Line 4541  for (;;)
4541                }                }
4542              else              else
4543                {                {
4544                if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)                if (c != 0x000a &&
4545                      (md->bsr_anycrlf ||
4546                        (c != 0x000b && c != 0x000c && c != 0x0085)))
4547                  break;                  break;
4548                eptr++;                eptr++;
4549                }                }
4550              }              }
4551            break;            break;
4552    
4553              case OP_NOT_HSPACE:
4554              for (i = min; i < max; i++)
4555                {
4556                if (eptr >= md->end_subject) break;
4557                c = *eptr;
4558                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4559                eptr++;
4560                }
4561              break;
4562    
4563              case OP_HSPACE:
4564              for (i = min; i < max; i++)
4565                {
4566                if (eptr >= md->end_subject) break;
4567                c = *eptr;
4568                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4569                eptr++;
4570                }
4571              break;
4572    
4573              case OP_NOT_VSPACE:
4574              for (i = min; i < max; i++)
4575                {
4576                if (eptr >= md->end_subject) break;
4577                c = *eptr;
4578                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4579                  break;
4580                eptr++;
4581                }
4582              break;
4583    
4584              case OP_VSPACE:
4585              for (i = min; i < max; i++)
4586                {
4587                if (eptr >= md->end_subject) break;
4588                c = *eptr;
4589                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4590                  break;
4591                eptr++;
4592                }
4593              break;
4594    
4595            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4596            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4597              {              {
# Line 3599  for (;;) Line 4652  for (;;)
4652    
4653          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4654    
4655            CHECK_PARTIAL();
4656          if (possessive) continue;          if (possessive) continue;
4657          while (eptr >= pp)          while (eptr >= pp)
4658            {            {
4659            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4660            eptr--;            eptr--;
4661            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4662            }            }
# Line 3628  for (;;) Line 4682  for (;;)
4682    
4683    }             /* End of main loop */    }             /* End of main loop */
4684  /* Control never reaches here */  /* Control never reaches here */
4685    
4686    
4687    /* When compiling to use the heap rather than the stack for recursive calls to
4688    match(), the RRETURN() macro jumps here. The number that is saved in
4689    frame->Xwhere indicates which label we actually want to return to. */
4690    
4691    #ifdef NO_RECURSE
4692    #define LBL(val) case val: goto L_RM##val;
4693    HEAP_RETURN:
4694    switch (frame->Xwhere)
4695      {
4696      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4697      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4698      LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4699      LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4700      LBL(53) LBL(54)
4701    #ifdef SUPPORT_UTF8
4702      LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4703      LBL(32) LBL(34) LBL(42) LBL(46)
4704    #ifdef SUPPORT_UCP
4705      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4706    #endif  /* SUPPORT_UCP */
4707    #endif  /* SUPPORT_UTF8 */
4708      default:
4709      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4710      return PCRE_ERROR_INTERNAL;
4711      }
4712    #undef LBL
4713    #endif  /* NO_RECURSE */
4714  }  }
4715    
4716    
# Line 3640  Undefine all the macros that were define Line 4723  Undefine all the macros that were define
4723  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4724  #undef eptr  #undef eptr
4725  #undef ecode  #undef ecode
4726    #undef mstart
4727  #undef offset_top  #undef offset_top
4728  #undef ims  #undef ims
4729  #undef eptrb  #undef eptrb
# Line 3712  Returns:          > 0 => success; value Line 4796  Returns:          > 0 => success; value
4796                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4797  */  */
4798    
4799  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4800  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4801    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4802    int offsetcount)    int offsetcount)
# Line 3736  const uschar *tables; Line 4820  const uschar *tables;
4820  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4821  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4822  USPTR end_subject;  USPTR end_subject;
4823    USPTR start_partial = NULL;
4824  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
4825    
4826  pcre_study_data internal_study;  pcre_study_data internal_study;
4827  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3800  if (re->magic_number != MAGIC_NUMBER) Line 4884  if (re->magic_number != MAGIC_NUMBER)
4884  /* Set up other data */  /* Set up other data */
4885    
4886  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4887  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
4888  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
4889    
4890  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
# Line 3815  end_subject = md->end_subject; Line 4899  end_subject = md->end_subject;
4899    
4900  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4901  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4902    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4903    
4904  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4905  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4906  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4907  md->partial = (options & PCRE_PARTIAL) != 0;  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
4908                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
4909  md->hitend = FALSE;  md->hitend = FALSE;
4910    
4911  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
4912    
4913  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4914  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
4915    
4916    /* Handle different \R options. */
4917    
4918    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4919      {
4920      case 0:
4921      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4922        md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4923      else
4924    #ifdef BSR_ANYCRLF
4925      md->bsr_anycrlf = TRUE;
4926    #else
4927      md->bsr_anycrlf = FALSE;
4928    #endif
4929      break;
4930    
4931      case PCRE_BSR_ANYCRLF:
4932      md->bsr_anycrlf = TRUE;
4933      break;
4934    
4935      case PCRE_BSR_UNICODE:
4936      md->bsr_anycrlf = FALSE;
4937      break;
4938    
4939      default: return PCRE_ERROR_BADNEWLINE;
4940      }
4941    
4942  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
4943  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4944    
4945  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4946         PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4947    {    {
4948    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4949    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
4950    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
4951    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4952         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
4953    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4954      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4955    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
4956    }    }
4957    
4958  if (newline < 0)  if (newline == -2)
4959      {
4960      md->nltype = NLTYPE_ANYCRLF;
4961      }
4962    else if (newline < 0)
4963    {    {
4964    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
4965    }    }
# Line 3863  else Line 4979  else
4979      }      }
4980    }    }
4981    
4982  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching was originally supported only for a restricted set of
4983  moment. */  regexes; from release 8.00 there are no restrictions, but the bits are still
4984    defined (though never set). So there's no harm in leaving this code. */
4985    
4986  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4987    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4988    
4989  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 3875  back the character offset. */ Line 4992  back the character offset. */
4992  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4993  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4994    {    {
4995    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
4996      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
4997    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
4998      {      {
4999      int tb = ((uschar *)subject)[start_offset];      int tb = ((USPTR)subject)[start_offset];
5000      if (tb > 127)      if (tb > 127)
5001        {        {
5002        tb &= 0xc0;        tb &= 0xc0;
# Line 3943  studied, there may be a bitmap of possib Line 5060  studied, there may be a bitmap of possib
5060    
5061  if (!anchored)  if (!anchored)
5062    {    {
5063    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
5064      {      {
5065      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
5066      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 3958  if (!anchored) Line 5075  if (!anchored)
5075  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
5076  character" set. */  character" set. */
5077    
5078  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
5079    {    {
5080    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
5081    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 3974  the loop runs just once. */ Line 5091  the loop runs just once. */
5091  for(;;)  for(;;)
5092    {    {
5093    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
5094      USPTR new_start_match;
5095    
5096    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
5097    
# Line 3984  for(;;) Line 5102  for(;;)
5102      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
5103      }      }
5104    
5105    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* If firstline is TRUE, the start of the match is constrained to the first
5106    start of the match is constrained to the first line of a multiline string.    line of a multiline string. That is, the match must be before or at the first
5107    That is, the match must be before or at the first newline. Implement this by    newline. Implement this by temporarily adjusting end_subject so that we stop
5108    temporarily adjusting end_subject so that we stop scanning at a newline. If    scanning at a newline. If the match fails at the newline, later code breaks
5109    the match fails at the newline, later code breaks this loop. */    this loop. */
5110    
5111    if (firstline)    if (firstline)
5112      {      {
5113      USPTR t = start_match;      USPTR t = start_match;
5114    #ifdef SUPPORT_UTF8
5115        if (utf8)
5116          {
5117          while (t < md->end_subject && !IS_NEWLINE(t))
5118            {
5119            t++;
5120            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5121            }
5122          }
5123        else
5124    #endif
5125      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5126      end_subject = t;      end_subject = t;
5127      }      }
5128    
5129    /* Now test for a unique first byte */    /* There are some optimizations that avoid running the match if a known
5130      starting point is not found, or if a known later character is not present.
5131      However, there is an option that disables these, for testing and for ensuring
5132      that all callouts do actually occur. */
5133    
5134    if (first_byte >= 0)    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5135      {      {
5136      if (first_byte_caseless)      /* Advance to a unique first byte if there is one. */
5137        while (start_match < end_subject &&  
5138               md->lcc[*start_match] != first_byte)      if (first_byte >= 0)
5139          start_match++;        {
5140      else        if (first_byte_caseless)
5141        while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5142          start_match++;            start_match++;
5143      }        else
5144            while (start_match < end_subject && *start_match != first_byte)
5145              start_match++;
5146          }
5147    
5148    /* Or to just after a linebreak for a multiline match if possible */      /* Or to just after a linebreak for a multiline match */
5149    
5150    else if (startline)      else if (startline)
     {  
     if (start_match > md->start_subject + start_offset)  
5151        {        {
5152        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        if (start_match > md->start_subject + start_offset)
5153          start_match++;          {
5154    #ifdef SUPPORT_UTF8
5155            if (utf8)
5156              {
5157              while (start_match < end_subject && !WAS_NEWLINE(start_match))
5158                {
5159                start_match++;
5160                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5161                  start_match++;
5162                }
5163              }
5164            else
5165    #endif
5166            while (start_match < end_subject && !WAS_NEWLINE(start_match))
5167              start_match++;
5168    
5169        /* If we have just passed a CR and the newline option is ANY, and we are          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5170        now at a LF, advance the match position by one more character. */          and we are now at a LF, advance the match position by one more character.
5171            */
5172    
5173        if (start_match[-1] == '\r' &&          if (start_match[-1] == CHAR_CR &&
5174             md->nltype == NLTYPE_ANY &&               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5175             start_match < end_subject &&               start_match < end_subject &&
5176             *start_match == '\n')               *start_match == CHAR_NL)
5177          start_match++;            start_match++;
5178            }
5179        }        }
     }  
5180    
5181    /* Or to a non-unique first char after study */      /* Or to a non-unique first byte after study */
5182    
5183    else if (start_bits != NULL)      else if (start_bits != NULL)
     {  
     while (start_match < end_subject)  
5184        {        {
5185        register unsigned int c = *start_match;        while (start_match < end_subject)
5186        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;          {
5187            register unsigned int c = *start_match;
5188            if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
5189              else break;
5190            }
5191        }        }
5192      }      }   /* Starting optimizations */
5193    
5194    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5195    
# Line 4051  for(;;) Line 5201  for(;;)
5201    printf("\n");    printf("\n");
5202  #endif  #endif
5203    
5204    /* If req_byte is set, we know that that character must appear in the subject    /* If req_byte is set, we know that that character must appear in the
5205    for the match to succeed. If the first character is set, req_byte must be    subject for the match to succeed. If the first character is set, req_byte
5206    later in the subject; otherwise the test starts at the match point. This    must be later in the subject; otherwise the test starts at the match point.
5207    optimization can save a huge amount of backtracking in patterns with nested    This optimization can save a huge amount of backtracking in patterns with
5208    unlimited repeats that aren't going to match. Writing separate code for    nested unlimited repeats that aren't going to match. Writing separate code
5209    cased/caseless versions makes it go faster, as does using an autoincrement    for cased/caseless versions makes it go faster, as does using an
5210    and backing off on a match.    autoincrement and backing off on a match.
5211    
5212    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end
5213    take a long time, and give bad performance on quite ordinary patterns. This    can take a long time, and give bad performance on quite ordinary patterns.
5214    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte    This showed up when somebody was matching something like /^\d+C/ on a
5215    string... so we don't do this when the string is sufficiently long.    32-megabyte string... so we don't do this when the string is sufficiently
5216      long.
5217    
5218    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, or if
5219    */    disabling is explicitly requested. */
5220    
5221    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
5222          req_byte >= 0 &&
5223        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
5224        !md->partial)        !md->partial)
5225      {      {
# Line 4111  for(;;) Line 5263  for(;;)
5263     &nbs