/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 101  Returns:     nothing Line 109  Returns:     nothing
109  static void  static void
110  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111  {  {
112  int c;  unsigned int c;
113  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114  while (length-- > 0)  while (length-- > 0)
115    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 150  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 186  calls by keeping local variables that ne Line 220  calls by keeping local variables that ne
220  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
221  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
222  always used to.  always used to.
223    
224    The original heap-recursive code used longjmp(). However, it seems that this
225    can be very slow on some operating systems. Following a suggestion from Stan
226    Switzer, the use of longjmp() has been abolished, at the cost of having to
227    provide a unique number for each call to RMATCH. There is no way of generating
228    a sequence of numbers at compile time in C. I have given them names, to make
229    them stand out more clearly.
230    
231    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233    tests. Furthermore, not using longjmp() means that local dynamic variables
234    don't have indeterminate values; this has meant that the frame size can be
235    reduced because the result can be "passed back" by straight setting of the
236    variable instead of being passed in the frame.
237  ****************************************************************************  ****************************************************************************
238  ***************************************************************************/  ***************************************************************************/
239    
240    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241    below must be updated in sync.  */
242    
243    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248           RM51,  RM52, RM53, RM54 };
249    
250  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
251  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
252    actuall used in this definition. */
253    
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256    
257  #ifdef DEBUG  #ifdef DEBUG
258  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259    { \    { \
260    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
261    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
263    }    }
264  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 267  versions and production versions. */
267    return ra; \    return ra; \
268    }    }
269  #else  #else
270  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
273  #endif  #endif
274    
275  #else  #else
276    
277    
278  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
279  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280  match(), which never changes. */  argument of match(), which never changes. */
281    
282  #define REGISTER  #define REGISTER
283    
284  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285    {\    {\
286    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
288      {\    newframe->Xeptr = ra;\
289      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
290      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
291      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
292      newframe->Xims = re;\    newframe->Xims = re;\
293      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
294      newframe->Xflags = rg;\    newframe->Xflags = rg;\
295      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
296      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
297      frame = newframe;\    frame = newframe;\
298      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
299      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
300      }\    L_##rw:\
301    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
302    }    }
303    
304  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 254  match(), which never changes. */ Line 308  match(), which never changes. */
308    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
309    if (frame != NULL)\    if (frame != NULL)\
310      {\      {\
311      frame->Xresult = ra;\      rrc = ra;\
312      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
313      }\      }\
314    return ra;\    return ra;\
315    }    }
# Line 269  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327      USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 279  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
348    BOOL Xcur_is_word;    BOOL Xcur_is_word;
349    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
350    BOOL Xprev_is_word;    BOOL Xprev_is_word;
351    
352    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 358  typedef struct heapframe {
358    int Xprop_category;    int Xprop_category;
359    int Xprop_chartype;    int Xprop_chartype;
360    int Xprop_script;    int Xprop_script;
361    int *Xprop_test_variable;    int Xoclength;
362      uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
369    int Xlength;    int Xlength;
370    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 378  typedef struct heapframe {
378    
379    eptrblock Xnewptrb;    eptrblock Xnewptrb;
380    
381    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
382    
383    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
384    
385  } heapframe;  } heapframe;
386    
# Line 340  typedef struct heapframe { Line 396  typedef struct heapframe {
396  *         Match from current position            *  *         Match from current position            *
397  *************************************************/  *************************************************/
398    
399  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
400  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
401  same response.  same response. */
402    
403    /* These macros pack up tests that are used for partial matching, and which
404    appears several times in the code. We set the "hit end" flag if the pointer is
405    at the end of the subject and also past the start of the subject (i.e.
406    something has been matched). For hard partial matching, we then return
407    immediately. The second one is used when we already know we are past the end of
408    the subject. */
409    
410    #define CHECK_PARTIAL()\
411      if (md->partial && eptr >= md->end_subject && eptr > mstart)\
412        {\
413        md->hitend = TRUE;\
414        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415        }
416    
417    #define SCHECK_PARTIAL()\
418      if (md->partial && eptr > mstart)\
419        {\
420        md->hitend = TRUE;\
421        md->hitend = TRUE;\
422        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
423        }
424    
425  Performance note: It might be tempting to extract commonly used fields from the  
426  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
427    the md structure (e.g. utf8, end_subject) into individual variables to improve
428  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
429  made performance worse.  made performance worse.
430    
431  Arguments:  Arguments:
432     eptr        pointer in subject     eptr        pointer to current character in subject
433     ecode       position in code     ecode       pointer to current position in compiled code
434       mstart      pointer to the current match start position (can be modified
435                     by encountering \K)
436     offset_top  current top pointer     offset_top  current top pointer
437     md          pointer to "static" info for the match     md          pointer to "static" info for the match
438     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 440  Arguments:
440                   brackets - for testing for empty matches                   brackets - for testing for empty matches
441     flags       can contain     flags       can contain
442                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
443                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
444                       group that can match an empty string
445     rdepth      the recursion depth     rdepth      the recursion depth
446    
447  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 372  Returns:       MATCH_MATCH if matched Line 451  Returns:       MATCH_MATCH if matched
451  */  */
452    
453  static int  static int
454  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
455    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
456    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
457  {  {
458  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
459  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
460  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
461    
462  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
463  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
464  register unsigned int  c;  /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
465  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
466    
467    BOOL minimize, possessive; /* Quantifier options */
468    int condcode;
469    
470  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
471  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
472  heap storage. Set up the top-level frame here; others are obtained from the  heap storage. Set up the top-level frame here; others are obtained from the
# Line 398  frame->Xprevframe = NULL;            /* Line 480  frame->Xprevframe = NULL;            /*
480    
481  frame->Xeptr = eptr;  frame->Xeptr = eptr;
482  frame->Xecode = ecode;  frame->Xecode = ecode;
483    frame->Xmstart = mstart;
484  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
485  frame->Xims = ims;  frame->Xims = ims;
486  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 495  HEAP_RECURSE:
495    
496  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
497  #define ecode              frame->Xecode  #define ecode              frame->Xecode
498    #define mstart             frame->Xmstart
499  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
500  #define ims                frame->Xims  #define ims                frame->Xims
501  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 424  HEAP_RECURSE: Line 508  HEAP_RECURSE:
508  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
509  #endif  #endif
510  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
511    #define codelink           frame->Xcodelink
512  #define data               frame->Xdata  #define data               frame->Xdata
513  #define next               frame->Xnext  #define next               frame->Xnext
514  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 434  HEAP_RECURSE: Line 519  HEAP_RECURSE:
519    
520  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
521  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
522  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
523    
524  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 530  HEAP_RECURSE:
530  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
531  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
532  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
533  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
534    #define occhars            frame->Xocchars
535  #endif  #endif
536    
537  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 555  HEAP_RECURSE:
555  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
556  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
557    
558  #else  #else         /* NO_RECURSE not defined */
559  #define fi i  #define fi i
560  #define fc c  #define fc c
561    
# Line 489  recursion_info new_recursive;      /* wi Line 574  recursion_info new_recursive;      /* wi
574                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
575  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
576  BOOL condition;  BOOL condition;
 BOOL minimize;  
577  BOOL prev_is_word;  BOOL prev_is_word;
578    
579  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 585  int prop_fail_result;
585  int prop_category;  int prop_category;
586  int prop_chartype;  int prop_chartype;
587  int prop_script;  int prop_script;
588  int *prop_test_variable;  int oclength;
589    uschar occhars[8];
590  #endif  #endif
591    
592    int codelink;
593  int ctype;  int ctype;
594  int length;  int length;
595  int max;  int max;
# Line 516  int save_offset1, save_offset2, save_off Line 602  int save_offset1, save_offset2, save_off
602  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
603    
604  eptrblock newptrb;  eptrblock newptrb;
605  #endif  #endif     /* NO_RECURSE */
606    
607  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
608  variables. */  variables. */
# Line 524  variables. */ Line 610  variables. */
610  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
611  prop_value = 0;  prop_value = 0;
612  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
613  #endif  #endif
614    
615    
616  /* This label is used for tail recursion, which is used in a few cases even  /* This label is used for tail recursion, which is used in a few cases even
617  when NO_RECURSE is not defined, in order to reduce the amount of stack that is  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
618  used. Thanks to Ian Taylor for noticing this possibility and sending the  used. Thanks to Ian Taylor for noticing this possibility and sending the
# Line 542  defined). However, RMATCH isn't like a f Line 628  defined). However, RMATCH isn't like a f
628  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
629  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
630    
631    #ifdef SUPPORT_UTF8
632    utf8 = md->utf8;       /* Local copy of the flag */
633    #else
634    utf8 = FALSE;
635    #endif
636    
637  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
638  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
639    
# Line 550  if (rdepth >= md->match_limit_recursion) Line 642  if (rdepth >= md->match_limit_recursion)
642    
643  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
644    
645  #ifdef SUPPORT_UTF8  /* At the start of a group with an unlimited repeat that may match an empty
646  utf8 = md->utf8;       /* Local copy of the flag */  string, the match_cbegroup flag is set. When this is the case, add the current
647  #else  subject pointer to the chain of such remembered pointers, to be checked when we
648  utf8 = FALSE;  hit the closing ket, in order to break infinite loops that match no characters.
649  #endif  When match() is called in other circumstances, don't add to the chain. The
650    match_cbegroup flag must NOT be used with tail recursion, because the memory
651  /* At the start of a bracketed group, add the current subject pointer to the  block that is used is on the stack, so a new one may be required for each
652  stack of such pointers, to be re-instated at the end of the group when we hit  match(). */
 the closing ket. When match() is called in other circumstances, we don't add to  
 this stack. */  
653    
654  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
655    {    {
   newptrb.epb_prev = eptrb;  
656    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
657      newptrb.epb_prev = eptrb;
658    eptrb = &newptrb;    eptrb = &newptrb;
659    }    }
660    
661  /* Now start processing the operations. */  /* Now start processing the opcodes. */
662    
663  for (;;)  for (;;)
664    {    {
665      minimize = possessive = FALSE;
666    op = *ecode;    op = *ecode;
   minimize = FALSE;  
667    
668    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
669    matching at least one subject character. */    matching at least one subject character. This code is now wrapped in a macro
670      because it appears several times below. */
671    
672    if (md->partial &&    CHECK_PARTIAL();
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
673    
674    if (op > OP_BRA)    switch(op)
675      {      {
676      number = op - OP_BRA;      case OP_FAIL:
677        RRETURN(MATCH_NOMATCH);
678    
679      /* For extended extraction brackets (large number), we have to fish out the      case OP_PRUNE:
680      number from a dummy opcode at the start. */      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
681          ims, eptrb, flags, RM51);
682      if (number > EXTRACT_BASIC_MAX)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
683        number = GET2(ecode, 2+LINK_SIZE);      RRETURN(MATCH_PRUNE);
684    
685        case OP_COMMIT:
686        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
687          ims, eptrb, flags, RM52);
688        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
689        RRETURN(MATCH_COMMIT);
690    
691        case OP_SKIP:
692        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
693          ims, eptrb, flags, RM53);
694        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
695        md->start_match_ptr = eptr;   /* Pass back current position */
696        RRETURN(MATCH_SKIP);
697    
698        case OP_THEN:
699        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
700          ims, eptrb, flags, RM54);
701        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
702        RRETURN(MATCH_THEN);
703    
704        /* Handle a capturing bracket. If there is space in the offset vector, save
705        the current subject position in the working slot at the top of the vector.
706        We mustn't change the current values of the data slot, because they may be
707        set from a previous iteration of this group, and be referred to by a
708        reference inside the group.
709    
710        If the bracket fails to match, we need to restore this value and also the
711        values of the final offsets, in case they were set by a previous iteration
712        of the same bracket.
713    
714        If there isn't enough space in the offset vector, treat this as if it were
715        a non-capturing bracket. Don't worry about setting the flag for the error
716        case here; that is handled in the code for KET. */
717    
718        case OP_CBRA:
719        case OP_SCBRA:
720        number = GET2(ecode, 1+LINK_SIZE);
721      offset = number << 1;      offset = number << 1;
722    
723  #ifdef DEBUG  #ifdef DEBUG
724      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
725        printf("subject=");
726      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
727      printf("\n");      printf("\n");
728  #endif  #endif
# Line 624  for (;;) Line 737  for (;;)
737        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
738        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
739    
740          flags = (op == OP_SCBRA)? match_cbegroup : 0;
741        do        do
742          {          {
743          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744            match_isgroup);            ims, eptrb, flags, RM1);
745          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
746          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
747          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
748          }          }
# Line 643  for (;;) Line 757  for (;;)
757        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
758        }        }
759    
760      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
761        as a non-capturing bracket. */
     else op = OP_BRA;  
     }  
   
   /* Other types of node can be handled by a switch */  
762    
763    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
764      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
     case OP_BRA:     /* Non-capturing bracket: optimized */  
     DPRINTF(("start bracket 0\n"));  
765    
766      /* Loop for all the alternatives */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
767    
768        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
769        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
770    
771        /* Non-capturing bracket. Loop for all the alternatives. When we get to the
772        final alternative within the brackets, we would return the result of a
773        recursive call to match() whatever happened. We can reduce stack usage by
774        turning this into a tail recursion, except in the case when match_cbegroup
775        is set.*/
776    
777        case OP_BRA:
778        case OP_SBRA:
779        DPRINTF(("start non-capturing bracket\n"));
780        flags = (op >= OP_SBRA)? match_cbegroup : 0;
781      for (;;)      for (;;)
782        {        {
783        /* When we get to the final alternative within the brackets, we would        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
784        return the result of a recursive call to match() whatever happened. We          {
785        can reduce stack usage by turning this into a tail recursion. */          if (flags == 0)    /* Not a possibly empty group */
786              {
787        if (ecode[GET(ecode, 1)] != OP_ALT)            ecode += _pcre_OP_lengths[*ecode];
788         {            DPRINTF(("bracket 0 tail recursion\n"));
789         ecode += 1 + LINK_SIZE;            goto TAIL_RECURSE;
790         flags = match_isgroup;            }
791         DPRINTF(("bracket 0 tail recursion\n"));  
792         goto TAIL_RECURSE;          /* Possibly empty group; can't use tail recursion. */
793         }  
794            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
795              eptrb, flags, RM48);
796            RRETURN(rrc);
797            }
798    
799        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
800        otherwise return. */        otherwise return. */
801    
802        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
803          match_isgroup);          eptrb, flags, RM2);
804        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
805        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
806        }        }
807      /* Control never reaches here. */      /* Control never reaches here. */
# Line 688  for (;;) Line 813  for (;;)
813      obeyed, we can use tail recursion to avoid using another stack frame. */      obeyed, we can use tail recursion to avoid using another stack frame. */
814    
815      case OP_COND:      case OP_COND:
816      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
817        codelink= GET(ecode, 1);
818    
819        /* Because of the way auto-callout works during compile, a callout item is
820        inserted between OP_COND and an assertion condition. */
821    
822        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
823          {
824          if (pcre_callout != NULL)
825            {
826            pcre_callout_block cb;
827            cb.version          = 1;   /* Version 1 of the callout block */
828            cb.callout_number   = ecode[LINK_SIZE+2];
829            cb.offset_vector    = md->offset_vector;
830            cb.subject          = (PCRE_SPTR)md->start_subject;
831            cb.subject_length   = md->end_subject - md->start_subject;
832            cb.start_match      = mstart - md->start_subject;
833            cb.current_position = eptr - md->start_subject;
834            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
835            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
836            cb.capture_top      = offset_top/2;
837            cb.capture_last     = md->capture_last;
838            cb.callout_data     = md->callout_data;
839            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
840            if (rrc < 0) RRETURN(rrc);
841            }
842          ecode += _pcre_OP_lengths[OP_CALLOUT];
843          }
844    
845        condcode = ecode[LINK_SIZE+1];
846    
847        /* Now see what the actual condition is */
848    
849        if (condcode == OP_RREF)         /* Recursion test */
850          {
851          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
852          condition = md->recursive != NULL &&
853            (offset == RREF_ANY || offset == md->recursive->group_num);
854          ecode += condition? 3 : GET(ecode, 1);
855          }
856    
857        else if (condcode == OP_CREF)    /* Group used test */
858        {        {
859        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
860        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
861          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
862          (offset < offset_top && md->offset_vector[offset] >= 0);        }
863        ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));  
864        flags = match_isgroup;      else if (condcode == OP_DEF)     /* DEFINE - always false */
865        goto TAIL_RECURSE;        {
866          condition = FALSE;
867          ecode += GET(ecode, 1);
868        }        }
869    
870      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
871      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
872        assertion. */
873    
874      else      else
875        {        {
876        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
877            match_condassert | match_isgroup);            match_condassert, RM3);
878        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
879          {          {
880          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
881            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
882          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
883          }          }
884        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
885          {          {
886          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
887          }          }
888        else ecode += GET(ecode, 1);        else
889            {
890            condition = FALSE;
891            ecode += codelink;
892            }
893          }
894    
895        /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
896        we can use tail recursion to avoid using another stack frame. */      we can use tail recursion to avoid using another stack frame, except when
897        match_cbegroup is required for an unlimited repeat of a possibly empty
898        group. If the second alternative doesn't exist, we can just plough on. */
899    
900        if (condition || *ecode == OP_ALT)
901          {
902          ecode += 1 + LINK_SIZE;
903          if (op == OP_SCOND)        /* Possibly empty group */
904            {
905            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
906            RRETURN(rrc);
907            }
908          else                       /* Group must match something */
909            {
910            flags = 0;
911            goto TAIL_RECURSE;
912            }
913          }
914        else                         /* Condition false & no alternative */
915          {
916        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
       flags = match_isgroup;  
       goto TAIL_RECURSE;  
917        }        }
     /* Control never reaches here */  
   
     /* Skip over conditional reference or large extraction number data if  
     encountered. */  
   
     case OP_CREF:  
     case OP_BRANUMBER:  
     ecode += 3;  
918      break;      break;
919    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
920    
921        /* End of the pattern, either real or forced. If we are in a top-level
922        recursion, we should restore the offsets appropriately and continue from
923        after the call. */
924    
925        case OP_ACCEPT:
926      case OP_END:      case OP_END:
927      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
928        {        {
# Line 745  for (;;) Line 931  for (;;)
931        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
932        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
933          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
934        md->start_match = rec->save_start;        mstart = rec->save_start;
935        ims = original_ims;        ims = original_ims;
936        ecode = rec->after_call;        ecode = rec->after_call;
937        break;        break;
# Line 754  for (;;) Line 940  for (;;)
940      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
941      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
942    
943      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
944      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
945      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
946        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
947      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
948    
949      /* Change option settings */      /* Change option settings */
# Line 777  for (;;) Line 964  for (;;)
964      case OP_ASSERTBACK:      case OP_ASSERTBACK:
965      do      do
966        {        {
967        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
968          match_isgroup);          RM4);
969        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
970        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
971        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
972        }        }
973      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 804  for (;;) Line 991  for (;;)
991      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
992      do      do
993        {        {
994        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
995          match_isgroup);          RM5);
996        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
997        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
998        ecode += GET(ecode,1);        ecode += GET(ecode,1);
999        }        }
1000      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 826  for (;;) Line 1013  for (;;)
1013  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1014      if (utf8)      if (utf8)
1015        {        {
1016        c = GET(ecode,1);        i = GET(ecode, 1);
1017        for (i = 0; i < c; i++)        while (i-- > 0)
1018          {          {
1019          eptr--;          eptr--;
1020          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1021          BACKCHAR(eptr)          BACKCHAR(eptr);
1022          }          }
1023        }        }
1024      else      else
# Line 840  for (;;) Line 1027  for (;;)
1027      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1028    
1029        {        {
1030        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1031        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1032        }        }
1033    
# Line 862  for (;;) Line 1049  for (;;)
1049        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1050        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1051        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1052        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1053        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1054        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1055        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 897  for (;;) Line 1084  for (;;)
1084      case OP_RECURSE:      case OP_RECURSE:
1085        {        {
1086        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1087        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1088            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1089    
1090        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1091    
# Line 929  for (;;) Line 1111  for (;;)
1111    
1112        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1113              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1114        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
1115        md->start_match = eptr;        mstart = eptr;
1116    
1117        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1118        restore the offset and recursion data. */        restore the offset and recursion data. */
1119    
1120        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1121          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1122        do        do
1123          {          {
1124          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1125              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1126          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1127            {            {
1128            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 948  for (;;) Line 1131  for (;;)
1131              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1132            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1133            }            }
1134          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1135            {            {
1136            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1137              if (new_recursive.offset_save != stacksave)
1138                (pcre_free)(new_recursive.offset_save);
1139            RRETURN(rrc);            RRETURN(rrc);
1140            }            }
1141    
# Line 982  for (;;) Line 1167  for (;;)
1167    
1168      do      do
1169        {        {
1170        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, match_isgroup);  
1171        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1172        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1173        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1174        }        }
1175      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 997  for (;;) Line 1181  for (;;)
1181      /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1182      mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1183    
1184      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1185    
1186      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1187      eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
# Line 1028  for (;;) Line 1212  for (;;)
1212    
1213      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1214        {        {
1215        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1216        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1217        ecode = prev;        ecode = prev;
1218        flags = match_isgroup;        flags = 0;
1219        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1220        }        }
1221      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1222        {        {
1223        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1224        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1225        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1226        flags = 0;        flags = 0;
# Line 1051  for (;;) Line 1235  for (;;)
1235      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1236      break;      break;
1237    
1238      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1239      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1240      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1241      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1242      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1243    
1244      case OP_BRAZERO:      case OP_BRAZERO:
1245        {        {
1246        next = ecode+1;        next = ecode+1;
1247        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1248        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1249        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1250        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1251        }        }
1252      break;      break;
1253    
1254      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1255        {        {
1256        next = ecode+1;        next = ecode+1;
1257        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1258        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1259        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1260        ecode++;        ecode++;
1261        }        }
1262      break;      break;
1263    
1264      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1265      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1266      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1267      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1268          ecode = next + 1 + LINK_SIZE;
1269          }
1270        break;
1271    
1272        /* End of a group, repeated or non-repeating. */
1273    
1274      case OP_KET:      case OP_KET:
1275      case OP_KETRMIN:      case OP_KETRMIN:
1276      case OP_KETRMAX:      case OP_KETRMAX:
1277      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
     saved_eptr = eptrb->epb_saved_eptr;  
1278    
1279      /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1280        infinite repeats of empty string matches, retrieve the subject start from
1281        the chain. Otherwise, set it NULL. */
1282    
1283      eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1284          {
1285          saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1286          eptrb = eptrb->epb_prev;              /* Backup to previous group */
1287          }
1288        else saved_eptr = NULL;
1289    
1290        /* If we are at the end of an assertion group, stop matching and return
1291        MATCH_MATCH, but record the current high water mark for use by positive
1292        assertions. Do this also for the "once" (atomic) groups. */
1293    
1294      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1295          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1102  for (;;) Line 1300  for (;;)
1300        RRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);
1301        }        }
1302    
1303      /* In all other cases except a conditional group we have to check the      /* For capturing groups we have to check the group number back at the start
1304      group number back at the start and if necessary complete handling an      and if necessary complete handling an extraction by setting the offsets and
1305      extraction by setting the offsets and bumping the high water mark. */      bumping the high water mark. Note that whole-pattern recursion is coded as
1306        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1307        when the OP_END is reached. Other recursion is handled here. */
1308    
1309      if (*prev != OP_COND)      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1310        {        {
1311        number = *prev - OP_BRA;        number = GET2(prev, 1+LINK_SIZE);
   
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);  
1312        offset = number << 1;        offset = number << 1;
1313    
1314  #ifdef DEBUG  #ifdef DEBUG
# Line 1121  for (;;) Line 1316  for (;;)
1316        printf("\n");        printf("\n");
1317  #endif  #endif
1318    
1319        /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1320        of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       into group 0, so it won't be picked up here. Instead, we catch it when  
       the OP_END is reached. */  
   
       if (number > 0)  
1321          {          {
1322          md->capture_last = number;          md->offset_vector[offset] =
1323          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            md->offset_vector[md->offset_end - number];
1324            {          md->offset_vector[offset+1] = eptr - md->start_subject;
1325            md->offset_vector[offset] =          if (offset_top <= offset) offset_top = offset + 2;
1326              md->offset_vector[md->offset_end - number];          }
1327            md->offset_vector[offset+1] = eptr - md->start_subject;  
1328            if (offset_top <= offset) offset_top = offset + 2;        /* Handle a recursively called group. Restore the offsets
1329            }        appropriately and continue from after the call. */
1330    
1331          /* Handle a recursively called group. Restore the offsets        if (md->recursive != NULL && md->recursive->group_num == number)
1332          appropriately and continue from after the call. */          {
1333            recursion_info *rec = md->recursive;
1334          if (md->recursive != NULL && md->recursive->group_num == number)          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1335            {          md->recursive = rec->prevrec;
1336            recursion_info *rec = md->recursive;          mstart = rec->save_start;
1337            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          memcpy(md->offset_vector, rec->offset_save,
1338            md->recursive = rec->prevrec;            rec->saved_max * sizeof(int));
1339            md->start_match = rec->save_start;          ecode = rec->after_call;
1340            memcpy(md->offset_vector, rec->offset_save,          ims = original_ims;
1341              rec->saved_max * sizeof(int));          break;
           ecode = rec->after_call;  
           ims = original_ims;  
           break;  
           }  
1342          }          }
1343        }        }
1344    
1345      /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1346      the group. */      flags, in case they got changed during the group. */
1347    
1348      ims = original_ims;      ims = original_ims;
1349      DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
# Line 1175  for (;;) Line 1362  for (;;)
1362    
1363      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1364      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1365      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1366        unlimited repeat of a group that can match an empty string. */
1367    
1368        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1369    
1370      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1371        {        {
1372        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1373        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1374          if (flags != 0)    /* Could match an empty string */
1375            {
1376            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1377            RRETURN(rrc);
1378            }
1379        ecode = prev;        ecode = prev;
       flags = match_isgroup;  
1380        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1381        }        }
1382      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1383        {        {
1384        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1385        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1386        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1387        flags = 0;        flags = 0;
# Line 1202  for (;;) Line 1396  for (;;)
1396      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1397        {        {
1398        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1399            (eptr == md->end_subject ||            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
            eptr < md->start_subject + md->nllen ||  
            !IS_NEWLINE(eptr - md->nllen)))  
1400          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1401        ecode++;        ecode++;
1402        break;        break;
# Line 1225  for (;;) Line 1417  for (;;)
1417      ecode++;      ecode++;
1418      break;      break;
1419    
1420        /* Reset the start of match point */
1421    
1422        case OP_SET_SOM:
1423        mstart = eptr;
1424        ecode++;
1425        break;
1426    
1427      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1428      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1429    
# Line 1244  for (;;) Line 1443  for (;;)
1443        if (!md->endonly)        if (!md->endonly)
1444          {          {
1445          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1446              (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1447            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1448          ecode++;          ecode++;
1449          break;          break;
# Line 1263  for (;;) Line 1462  for (;;)
1462    
1463      case OP_EODN:      case OP_EODN:
1464      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1465          (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1466        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1467      ecode++;      ecode++;
1468      break;      break;
# Line 1283  for (;;) Line 1482  for (;;)
1482          {          {
1483          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1484            {            {
1485            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1486            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1487            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1488            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1317  for (;;) Line 1516  for (;;)
1516      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1517    
1518      case OP_ANY:      case OP_ANY:
1519      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1520        {      /* Fall through */
1521        if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))  
1522          RRETURN(MATCH_NOMATCH);      case OP_ALLANY:
       }  
1523      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1524      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1525      ecode++;      ecode++;
1526      break;      break;
1527    
# Line 1414  for (;;) Line 1611  for (;;)
1611      ecode++;      ecode++;
1612      break;      break;
1613    
1614        case OP_ANYNL:
1615        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1616        GETCHARINCTEST(c, eptr);
1617        switch(c)
1618          {
1619          default: RRETURN(MATCH_NOMATCH);
1620          case 0x000d:
1621          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1622          break;
1623    
1624          case 0x000a:
1625          break;
1626    
1627          case 0x000b:
1628          case 0x000c:
1629          case 0x0085:
1630          case 0x2028:
1631          case 0x2029:
1632          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1633          break;
1634          }
1635        ecode++;
1636        break;
1637    
1638        case OP_NOT_HSPACE:
1639        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1640        GETCHARINCTEST(c, eptr);
1641        switch(c)
1642          {
1643          default: break;
1644          case 0x09:      /* HT */
1645          case 0x20:      /* SPACE */
1646          case 0xa0:      /* NBSP */
1647          case 0x1680:    /* OGHAM SPACE MARK */
1648          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1649          case 0x2000:    /* EN QUAD */
1650          case 0x2001:    /* EM QUAD */
1651          case 0x2002:    /* EN SPACE */
1652          case 0x2003:    /* EM SPACE */
1653          case 0x2004:    /* THREE-PER-EM SPACE */
1654          case 0x2005:    /* FOUR-PER-EM SPACE */
1655          case 0x2006:    /* SIX-PER-EM SPACE */
1656          case 0x2007:    /* FIGURE SPACE */
1657          case 0x2008:    /* PUNCTUATION SPACE */
1658          case 0x2009:    /* THIN SPACE */
1659          case 0x200A:    /* HAIR SPACE */
1660          case 0x202f:    /* NARROW NO-BREAK SPACE */
1661          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1662          case 0x3000:    /* IDEOGRAPHIC SPACE */
1663          RRETURN(MATCH_NOMATCH);
1664          }
1665        ecode++;
1666        break;
1667    
1668        case OP_HSPACE:
1669        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1670        GETCHARINCTEST(c, eptr);
1671        switch(c)
1672          {
1673          default: RRETURN(MATCH_NOMATCH);
1674          case 0x09:      /* HT */
1675          case 0x20:      /* SPACE */
1676          case 0xa0:      /* NBSP */
1677          case 0x1680:    /* OGHAM SPACE MARK */
1678          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1679          case 0x2000:    /* EN QUAD */
1680          case 0x2001:    /* EM QUAD */
1681          case 0x2002:    /* EN SPACE */
1682          case 0x2003:    /* EM SPACE */
1683          case 0x2004:    /* THREE-PER-EM SPACE */
1684          case 0x2005:    /* FOUR-PER-EM SPACE */
1685          case 0x2006:    /* SIX-PER-EM SPACE */
1686          case 0x2007:    /* FIGURE SPACE */
1687          case 0x2008:    /* PUNCTUATION SPACE */
1688          case 0x2009:    /* THIN SPACE */
1689          case 0x200A:    /* HAIR SPACE */
1690          case 0x202f:    /* NARROW NO-BREAK SPACE */
1691          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1692          case 0x3000:    /* IDEOGRAPHIC SPACE */
1693          break;
1694          }
1695        ecode++;
1696        break;
1697    
1698        case OP_NOT_VSPACE:
1699        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1700        GETCHARINCTEST(c, eptr);
1701        switch(c)
1702          {
1703          default: break;
1704          case 0x0a:      /* LF */
1705          case 0x0b:      /* VT */
1706          case 0x0c:      /* FF */
1707          case 0x0d:      /* CR */
1708          case 0x85:      /* NEL */
1709          case 0x2028:    /* LINE SEPARATOR */
1710          case 0x2029:    /* PARAGRAPH SEPARATOR */
1711          RRETURN(MATCH_NOMATCH);
1712          }
1713        ecode++;
1714        break;
1715    
1716        case OP_VSPACE:
1717        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1718        GETCHARINCTEST(c, eptr);
1719        switch(c)
1720          {
1721          default: RRETURN(MATCH_NOMATCH);
1722          case 0x0a:      /* LF */
1723          case 0x0b:      /* VT */
1724          case 0x0c:      /* FF */
1725          case 0x0d:      /* CR */
1726          case 0x85:      /* NEL */
1727          case 0x2028:    /* LINE SEPARATOR */
1728          case 0x2029:    /* PARAGRAPH SEPARATOR */
1729          break;
1730          }
1731        ecode++;
1732        break;
1733    
1734  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1735      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1736      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1423  for (;;) Line 1740  for (;;)
1740      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1741      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1742        {        {
1743        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1744    
1745        switch(ecode[1])        switch(ecode[1])
1746          {          {
# Line 1433  for (;;) Line 1749  for (;;)
1749          break;          break;
1750    
1751          case PT_LAMP:          case PT_LAMP:
1752          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1753               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1754               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1755            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1756           break;           break;
1757    
1758          case PT_GC:          case PT_GC:
1759          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1760            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1761          break;          break;
1762    
1763          case PT_PC:          case PT_PC:
1764          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1765            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1766          break;          break;
1767    
1768          case PT_SC:          case PT_SC:
1769          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1770            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1771          break;          break;
1772    
1773          default:          default:
1774          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
1775          }          }
1776    
1777        ecode += 3;        ecode += 3;
# Line 1470  for (;;) Line 1785  for (;;)
1785      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1786      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1787        {        {
1788        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1789        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1790        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1791          {          {
# Line 1480  for (;;) Line 1794  for (;;)
1794            {            {
1795            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1796            }            }
1797          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1798          if (category != ucp_M) break;          if (category != ucp_M) break;
1799          eptr += len;          eptr += len;
1800          }          }
# Line 1501  for (;;) Line 1815  for (;;)
1815      case OP_REF:      case OP_REF:
1816        {        {
1817        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1818        ecode += 3;                                 /* Advance past item */        ecode += 3;
1819    
1820        /* If the reference is unset, set the length to be longer than the amount        /* If the reference is unset, there are two possibilities:
1821        of subject left; this ensures that every attempt at a match fails. We  
1822        can't just fail here, because of the possibility of quantifiers with zero        (a) In the default, Perl-compatible state, set the length to be longer
1823        minima. */        than the amount of subject left; this ensures that every attempt at a
1824          match fails. We can't just fail here, because of the possibility of
1825        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        quantifiers with zero minima.
1826          md->end_subject - eptr + 1 :  
1827          md->offset_vector[offset+1] - md->offset_vector[offset];        (b) If the JavaScript compatibility flag is set, set the length to zero
1828          so that the back reference matches an empty string.
1829    
1830          Otherwise, set the length to the length of what was matched by the
1831          referenced subpattern. */
1832    
1833          if (offset >= offset_top || md->offset_vector[offset] < 0)
1834            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1835          else
1836            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1837    
1838        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1839    
# Line 1555  for (;;) Line 1878  for (;;)
1878    
1879        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
1880          {          {
1881          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1882              {
1883              CHECK_PARTIAL();
1884              RRETURN(MATCH_NOMATCH);
1885              }
1886          eptr += length;          eptr += length;
1887          }          }
1888    
# Line 1570  for (;;) Line 1897  for (;;)
1897          {          {
1898          for (fi = min;; fi++)          for (fi = min;; fi++)
1899            {            {
1900            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1901            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1902            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1903                {
1904                CHECK_PARTIAL();
1905              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1906                }
1907            eptr += length;            eptr += length;
1908            }            }
1909          /* Control never gets here */          /* Control never gets here */
# Line 1589  for (;;) Line 1919  for (;;)
1919            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
1920            eptr += length;            eptr += length;
1921            }            }
1922            CHECK_PARTIAL();
1923          while (eptr >= pp)          while (eptr >= pp)
1924            {            {
1925            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1926            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1927            eptr -= length;            eptr -= length;
1928            }            }
# Line 1656  for (;;) Line 1987  for (;;)
1987          {          {
1988          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
1989            {            {
1990            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
1991                {
1992                CHECK_PARTIAL();
1993                RRETURN(MATCH_NOMATCH);
1994                }
1995            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
1996            if (c > 255)            if (c > 255)
1997              {              {
# Line 1674  for (;;) Line 2009  for (;;)
2009          {          {
2010          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2011            {            {
2012            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2013                {
2014                CHECK_PARTIAL();
2015                RRETURN(MATCH_NOMATCH);
2016                }
2017            c = *eptr++;            c = *eptr++;
2018            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2019            }            }
# Line 1696  for (;;) Line 2035  for (;;)
2035            {            {
2036            for (fi = min;; fi++)            for (fi = min;; fi++)
2037              {              {
2038              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2039              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2040              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2041                  {
2042                  CHECK_PARTIAL();
2043                  RRETURN(MATCH_NOMATCH);
2044                  }
2045                if (eptr >= md->end_subject)
2046                  {
2047                  SCHECK_PARTIAL();
2048                  RRETURN(MATCH_NOMATCH);
2049                  }
2050              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2051              if (c > 255)              if (c > 255)
2052                {                {
# Line 1716  for (;;) Line 2064  for (;;)
2064            {            {
2065            for (fi = min;; fi++)            for (fi = min;; fi++)
2066              {              {
2067              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2068              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2069              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2070                  {
2071                  CHECK_PARTIAL();
2072                  RRETURN(MATCH_NOMATCH);
2073                  }
2074                if (eptr >= md->end_subject)
2075                  {
2076                  SCHECK_PARTIAL();
2077                  RRETURN(MATCH_NOMATCH);
2078                  }
2079              c = *eptr++;              c = *eptr++;
2080              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2081              }              }
# Line 1751  for (;;) Line 2108  for (;;)
2108                }                }
2109              eptr += len;              eptr += len;
2110              }              }
2111              CHECK_PARTIAL();
2112            for (;;)            for (;;)
2113              {              {
2114              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2115              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2116              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2117              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1770  for (;;) Line 2128  for (;;)
2128              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2129              eptr++;              eptr++;
2130              }              }
2131              CHECK_PARTIAL();
2132            while (eptr >= pp)            while (eptr >= pp)
2133              {              {
2134              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2135              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2136              eptr--;              eptr--;
2137              }              }
# Line 1785  for (;;) Line 2144  for (;;)
2144    
2145    
2146      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2147      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2148        mode, because Unicode properties are supported in non-UTF-8 mode. */
2149    
2150  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2151      case OP_XCLASS:      case OP_XCLASS:
# Line 1826  for (;;) Line 2186  for (;;)
2186    
2187        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2188          {          {
2189          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2190          GETCHARINC(c, eptr);            {
2191              SCHECK_PARTIAL();
2192              RRETURN(MATCH_NOMATCH);
2193              }
2194            GETCHARINCTEST(c, eptr);
2195          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2196          }          }
2197    
# Line 1843  for (;;) Line 2207  for (;;)
2207          {          {
2208          for (fi = min;; fi++)          for (fi = min;; fi++)
2209            {            {
2210            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2211            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
2213            GETCHARINC(c, eptr);              {
2214                CHECK_PARTIAL();
2215                RRETURN(MATCH_NOMATCH);
2216                }
2217              if (eptr >= md->end_subject)
2218                {
2219                SCHECK_PARTIAL();
2220                RRETURN(MATCH_NOMATCH);
2221                }
2222              GETCHARINCTEST(c, eptr);
2223            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2224            }            }
2225          /* Control never gets here */          /* Control never gets here */
# Line 1861  for (;;) Line 2234  for (;;)
2234            {            {
2235            int len = 1;            int len = 1;
2236            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2237            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2238            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2239            eptr += len;            eptr += len;
2240            }            }
2241            CHECK_PARTIAL();
2242          for(;;)          for(;;)
2243            {            {
2244            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2245            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2246            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2247            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2248            }            }
2249          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2250          }          }
# Line 1926  for (;;) Line 2300  for (;;)
2300    
2301        else        else
2302          {          {
2303          int dc;          unsigned int dc;
2304          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2305          ecode += length;          ecode += length;
2306    
# Line 1936  for (;;) Line 2310  for (;;)
2310          if (fc != dc)          if (fc != dc)
2311            {            {
2312  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2313            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2314  #endif  #endif
2315              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2316            }            }
# Line 1953  for (;;) Line 2327  for (;;)
2327        }        }
2328      break;      break;
2329    
2330      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2331    
2332      case OP_EXACT:      case OP_EXACT:
2333      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2334      ecode += 3;      ecode += 3;
2335      goto REPEATCHAR;      goto REPEATCHAR;
2336    
2337        case OP_POSUPTO:
2338        possessive = TRUE;
2339        /* Fall through */
2340    
2341      case OP_UPTO:      case OP_UPTO:
2342      case OP_MINUPTO:      case OP_MINUPTO:
2343      min = 0;      min = 0;
# Line 1968  for (;;) Line 2346  for (;;)
2346      ecode += 3;      ecode += 3;
2347      goto REPEATCHAR;      goto REPEATCHAR;
2348    
2349        case OP_POSSTAR:
2350        possessive = TRUE;
2351        min = 0;
2352        max = INT_MAX;
2353        ecode++;
2354        goto REPEATCHAR;
2355    
2356        case OP_POSPLUS:
2357        possessive = TRUE;
2358        min = 1;
2359        max = INT_MAX;
2360        ecode++;
2361        goto REPEATCHAR;
2362    
2363        case OP_POSQUERY:
2364        possessive = TRUE;
2365        min = 0;
2366        max = 1;
2367        ecode++;
2368        goto REPEATCHAR;
2369    
2370      case OP_STAR:      case OP_STAR:
2371      case OP_MINSTAR:      case OP_MINSTAR:
2372      case OP_PLUS:      case OP_PLUS:
# Line 1980  for (;;) Line 2379  for (;;)
2379      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2380      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2381    
2382      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2383    
2384      REPEATCHAR:      REPEATCHAR:
2385  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1991  for (;;) Line 2388  for (;;)
2388        length = 1;        length = 1;
2389        charptr = ecode;        charptr = ecode;
2390        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2391        ecode += length;        ecode += length;
2392    
2393        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1999  for (;;) Line 2395  for (;;)
2395    
2396        if (length > 1)        if (length > 1)
2397          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2398  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2399          int othercase;          unsigned int othercase;
2400          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2401              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase >= 0)  
2402            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2403            else oclength = 0;
2404  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2405    
2406          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2407            {            {
2408            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2409            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2410            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2411              else if (oclength > 0 &&
2412                       eptr <= md->end_subject - oclength &&
2413                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2414    #endif  /* SUPPORT_UCP */
2415            else            else
2416              {              {
2417              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2418              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2419              }              }
2420            }            }
2421    
# Line 2028  for (;;) Line 2425  for (;;)
2425            {            {
2426            for (fi = min;; fi++)            for (fi = min;; fi++)
2427              {              {
2428              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2429              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2430              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2431              if (memcmp(eptr, charptr, length) == 0) eptr += length;                {
2432              /* Need braces because of following else */                CHECK_PARTIAL();
2433              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                RRETURN(MATCH_NOMATCH);
2434                  }
2435                if (eptr <= md->end_subject - length &&
2436                  memcmp(eptr, charptr, length) == 0) eptr += length;
2437    #ifdef SUPPORT_UCP
2438                else if (oclength > 0 &&
2439                         eptr <= md->end_subject - oclength &&
2440                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2441    #endif  /* SUPPORT_UCP */
2442              else              else
2443                {                {
2444                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2445                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2446                }                }
2447              }              }
2448            /* Control never gets here */            /* Control never gets here */
2449            }            }
2450          else  
2451            else  /* Maximize */
2452            {            {
2453            pp = eptr;            pp = eptr;
2454            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2455              {              {
2456              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2457              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2458              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2459              else              else if (oclength > 0 &&
2460                {                       eptr <= md->end_subject - oclength &&
2461                if (memcmp(eptr, occhars, oclength) != 0) break;                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2462                eptr += oclength;  #endif  /* SUPPORT_UCP */
2463                }              else break;
2464                }
2465    
2466              CHECK_PARTIAL();
2467              if (possessive) continue;
2468    
2469              for(;;)
2470                {
2471                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2472                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2473                if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2474    #ifdef SUPPORT_UCP
2475                eptr--;
2476                BACKCHAR(eptr);
2477    #else   /* without SUPPORT_UCP */
2478                eptr -= length;
2479    #endif  /* SUPPORT_UCP */
2480              }              }
           while (eptr >= pp)  
            {  
            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
            eptr -= length;  
            }  
           RRETURN(MATCH_NOMATCH);  
2481            }            }
2482          /* Control never gets here */          /* Control never gets here */
2483          }          }
# Line 2075  for (;;) Line 2490  for (;;)
2490  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2491    
2492      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2493        {  
2494        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2495    
2496      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2497      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2096  for (;;) Line 2509  for (;;)
2509        {        {
2510        fc = md->lcc[fc];        fc = md->lcc[fc];
2511        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2512            {
2513            if (eptr >= md->end_subject)
2514              {
2515              SCHECK_PARTIAL();
2516              RRETURN(MATCH_NOMATCH);
2517              }
2518          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2519            }
2520        if (min == max) continue;        if (min == max) continue;
2521        if (minimize)        if (minimize)
2522          {          {
2523          for (fi = min;; fi++)          for (fi = min;; fi++)
2524            {            {
2525            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2526            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
2528                fc != md->lcc[*eptr++])              {
2529                CHECK_PARTIAL();
2530              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2531                }
2532              if (eptr >= md->end_subject)
2533                {
2534                SCHECK_PARTIAL();
2535                RRETURN(MATCH_NOMATCH);
2536                }
2537              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2538            }            }
2539          /* Control never gets here */          /* Control never gets here */
2540          }          }
2541        else        else  /* Maximize */
2542          {          {
2543          pp = eptr;          pp = eptr;
2544          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2118  for (;;) Line 2546  for (;;)
2546            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2547            eptr++;            eptr++;
2548            }            }
2549    
2550            CHECK_PARTIAL();
2551            if (possessive) continue;
2552    
2553          while (eptr >= pp)          while (eptr >= pp)
2554            {            {
2555            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2556            eptr--;            eptr--;
2557            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2558            }            }
# Line 2133  for (;;) Line 2565  for (;;)
2565    
2566      else      else
2567        {        {
2568        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2569            {
2570            if (eptr >= md->end_subject)
2571              {
2572              SCHECK_PARTIAL();
2573              RRETURN(MATCH_NOMATCH);
2574              }
2575            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2576            }
2577        if (min == max) continue;        if (min == max) continue;
2578        if (minimize)        if (minimize)
2579          {          {
2580          for (fi = min;; fi++)          for (fi = min;; fi++)
2581            {            {
2582            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2583            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2584            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max)
2585                {
2586                CHECK_PARTIAL();
2587                RRETURN(MATCH_NOMATCH);
2588                }
2589              if (eptr >= md->end_subject)
2590                {
2591                SCHECK_PARTIAL();
2592              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2593                }
2594              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2595            }            }
2596          /* Control never gets here */          /* Control never gets here */
2597          }          }
2598        else        else  /* Maximize */
2599          {          {
2600          pp = eptr;          pp = eptr;
2601          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2154  for (;;) Line 2603  for (;;)
2603            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2604            eptr++;            eptr++;
2605            }            }
2606            CHECK_PARTIAL();
2607            if (possessive) continue;
2608          while (eptr >= pp)          while (eptr >= pp)
2609            {            {
2610            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2611            eptr--;            eptr--;
2612            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2613            }            }
# Line 2206  for (;;) Line 2657  for (;;)
2657      ecode += 3;      ecode += 3;
2658      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2659    
2660        case OP_NOTPOSSTAR:
2661        possessive = TRUE;
2662        min = 0;
2663        max = INT_MAX;
2664        ecode++;
2665        goto REPEATNOTCHAR;
2666    
2667        case OP_NOTPOSPLUS:
2668        possessive = TRUE;
2669        min = 1;
2670        max = INT_MAX;
2671        ecode++;
2672        goto REPEATNOTCHAR;
2673    
2674        case OP_NOTPOSQUERY:
2675        possessive = TRUE;
2676        min = 0;
2677        max = 1;
2678        ecode++;
2679        goto REPEATNOTCHAR;
2680    
2681        case OP_NOTPOSUPTO:
2682        possessive = TRUE;
2683        min = 0;
2684        max = GET2(ecode, 1);
2685        ecode += 3;
2686        goto REPEATNOTCHAR;
2687    
2688      case OP_NOTSTAR:      case OP_NOTSTAR:
2689      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2690      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2218  for (;;) Line 2697  for (;;)
2697      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2698      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2699    
2700      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2701    
2702      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2703      fc = *ecode++;      fc = *ecode++;
2704    
2705      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2245  for (;;) Line 2721  for (;;)
2721        /* UTF-8 mode */        /* UTF-8 mode */
2722        if (utf8)        if (utf8)
2723          {          {
2724          register int d;          register unsigned int d;
2725          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2726            {            {
2727              if (eptr >= md->end_subject)
2728                {
2729                SCHECK_PARTIAL();
2730                RRETURN(MATCH_NOMATCH);
2731                }
2732            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2733            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
2734            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2259  for (;;) Line 2740  for (;;)
2740        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2741          {          {
2742          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2743              {
2744              if (eptr >= md->end_subject)
2745                {
2746                SCHECK_PARTIAL();
2747                RRETURN(MATCH_NOMATCH);
2748                }
2749            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2750              }
2751          }          }
2752    
2753        if (min == max) continue;        if (min == max) continue;
# Line 2270  for (;;) Line 2758  for (;;)
2758          /* UTF-8 mode */          /* UTF-8 mode */
2759          if (utf8)          if (utf8)
2760            {            {
2761            register int d;            register unsigned int d;
2762            for (fi = min;; fi++)            for (fi = min;; fi++)
2763              {              {
2764              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2765              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2766                if (fi >= max)
2767                  {
2768                  CHECK_PARTIAL();
2769                  RRETURN(MATCH_NOMATCH);
2770                  }
2771                if (eptr >= md->end_subject)
2772                  {
2773                  SCHECK_PARTIAL();
2774                  RRETURN(MATCH_NOMATCH);
2775                  }
2776              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2777              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2778              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2779              }              }
2780            }            }
2781          else          else
# Line 2287  for (;;) Line 2784  for (;;)
2784            {            {
2785            for (fi = min;; fi++)            for (fi = min;; fi++)
2786              {              {
2787              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2788              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2789              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max)
2790                  {
2791                  CHECK_PARTIAL();
2792                  RRETURN(MATCH_NOMATCH);
2793                  }
2794                if (eptr >= md->end_subject)
2795                  {
2796                  SCHECK_PARTIAL();
2797                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2798                  }
2799                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2800              }              }
2801            }            }
2802          /* Control never gets here */          /* Control never gets here */
# Line 2306  for (;;) Line 2812  for (;;)
2812          /* UTF-8 mode */          /* UTF-8 mode */
2813          if (utf8)          if (utf8)
2814            {            {
2815            register int d;            register unsigned int d;
2816            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2817              {              {
2818              int len = 1;              int len = 1;
# Line 2316  for (;;) Line 2822  for (;;)
2822              if (fc == d) break;              if (fc == d) break;
2823              eptr += len;              eptr += len;
2824              }              }
2825            for(;;)          CHECK_PARTIAL();
2826            if (possessive) continue;
2827            for(;;)
2828              {              {
2829              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2830              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2831              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2832              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2333  for (;;) Line 2841  for (;;)
2841              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2842              eptr++;              eptr++;
2843              }              }
2844              CHECK_PARTIAL();
2845              if (possessive) continue;
2846            while (eptr >= pp)            while (eptr >= pp)
2847              {              {
2848              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2849              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2850              eptr--;              eptr--;
2851              }              }
# Line 2354  for (;;) Line 2864  for (;;)
2864        /* UTF-8 mode */        /* UTF-8 mode */
2865        if (utf8)        if (utf8)
2866          {          {
2867          register int d;          register unsigned int d;
2868          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2869            {            {
2870              if (eptr >= md->end_subject)
2871                {
2872                SCHECK_PARTIAL();
2873                RRETURN(MATCH_NOMATCH);
2874                }
2875            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2876            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
2877            }            }
# Line 2366  for (;;) Line 2881  for (;;)
2881        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2882          {          {
2883          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2884              {
2885              if (eptr >= md->end_subject)
2886                {
2887                SCHECK_PARTIAL();
2888                RRETURN(MATCH_NOMATCH);
2889                }
2890            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2891              }
2892          }          }
2893    
2894        if (min == max) continue;        if (min == max) continue;
# Line 2377  for (;;) Line 2899  for (;;)
2899          /* UTF-8 mode */          /* UTF-8 mode */
2900          if (utf8)          if (utf8)
2901            {            {
2902            register int d;            register unsigned int d;
2903            for (fi = min;; fi++)            for (fi = min;; fi++)
2904              {              {
2905              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2906              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2907              GETCHARINC(d, eptr);              if (fi >= max)
2908              if (fi >= max || eptr >= md->end_subject || fc == d)                {
2909                  CHECK_PARTIAL();
2910                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2911                  }
2912                if (eptr >= md->end_subject)
2913                  {
2914                  SCHECK_PARTIAL();
2915                  RRETURN(MATCH_NOMATCH);
2916                  }
2917                GETCHARINC(d, eptr);
2918                if (fc == d) RRETURN(MATCH_NOMATCH);
2919              }              }
2920            }            }
2921          else          else
# Line 2393  for (;;) Line 2924  for (;;)
2924            {            {
2925            for (fi = min;; fi++)            for (fi = min;; fi++)
2926              {              {
2927              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2928              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2929              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max)
2930                  {
2931                  CHECK_PARTIAL();
2932                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2933                  }
2934                if (eptr >= md->end_subject)
2935                  {
2936                  SCHECK_PARTIAL();
2937                  RRETURN(MATCH_NOMATCH);
2938                  }
2939                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2940              }              }
2941            }            }
2942          /* Control never gets here */          /* Control never gets here */
# Line 2412  for (;;) Line 2952  for (;;)
2952          /* UTF-8 mode */          /* UTF-8 mode */
2953          if (utf8)          if (utf8)
2954            {            {
2955            register int d;            register unsigned int d;
2956            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2957              {              {
2958              int len = 1;              int len = 1;
# Line 2421  for (;;) Line 2961  for (;;)
2961              if (fc == d) break;              if (fc == d) break;
2962              eptr += len;              eptr += len;
2963              }              }
2964              CHECK_PARTIAL();
2965              if (possessive) continue;
2966            for(;;)            for(;;)
2967              {              {
2968              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2969              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2970              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2971              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2438  for (;;) Line 2980  for (;;)
2980              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2981              eptr++;              eptr++;
2982              }              }
2983              CHECK_PARTIAL();
2984              if (possessive) continue;
2985            while (eptr >= pp)            while (eptr >= pp)
2986              {              {
2987              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2988              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2989              eptr--;              eptr--;
2990              }              }
# Line 2469  for (;;) Line 3013  for (;;)
3013      ecode += 3;      ecode += 3;
3014      goto REPEATTYPE;      goto REPEATTYPE;
3015    
3016        case OP_TYPEPOSSTAR:
3017        possessive = TRUE;
3018        min = 0;
3019        max = INT_MAX;
3020        ecode++;
3021        goto REPEATTYPE;
3022    
3023        case OP_TYPEPOSPLUS:
3024        possessive = TRUE;
3025        min = 1;
3026        max = INT_MAX;
3027        ecode++;
3028        goto REPEATTYPE;
3029    
3030        case OP_TYPEPOSQUERY:
3031        possessive = TRUE;
3032        min = 0;
3033        max = 1;
3034        ecode++;
3035        goto REPEATTYPE;
3036    
3037        case OP_TYPEPOSUPTO:
3038        possessive = TRUE;
3039        min = 0;
3040        max = GET2(ecode, 1);
3041        ecode += 3;
3042        goto REPEATTYPE;
3043    
3044      case OP_TYPESTAR:      case OP_TYPESTAR:
3045      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3046      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2500  for (;;) Line 3072  for (;;)
3072    
3073      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3074      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3075      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3076      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3077      and single-bytes. */      and single-bytes. */
3078    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3079      if (min > 0)      if (min > 0)
3080        {        {
3081  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2518  for (;;) Line 3087  for (;;)
3087            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3088            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3089              {              {
3090              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3091              GETCHARINC(c, eptr);                {
3092                  SCHECK_PARTIAL();
3093                  RRETURN(MATCH_NOMATCH);
3094                  }
3095                GETCHARINCTEST(c, eptr);
3096              }              }
3097            break;            break;
3098    
3099            case PT_LAMP:            case PT_LAMP:
3100            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3101              {              {
3102              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3103              GETCHARINC(c, eptr);                {
3104              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3105                  RRETURN(MATCH_NOMATCH);
3106                  }
3107                GETCHARINCTEST(c, eptr);
3108                prop_chartype = UCD_CHARTYPE(c);
3109              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3110                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3111                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2539  for (;;) Line 3116  for (;;)
3116            case PT_GC:            case PT_GC:
3117            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3118              {              {
3119              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3120              GETCHARINC(c, eptr);                {
3121              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3122                  RRETURN(MATCH_NOMATCH);
3123                  }
3124                GETCHARINCTEST(c, eptr);
3125                prop_category = UCD_CATEGORY(c);
3126              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3127                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3128              }              }
# Line 2550  for (;;) Line 3131  for (;;)
3131            case PT_PC:            case PT_PC:
3132            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3133              {              {
3134              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3135              GETCHARINC(c, eptr);                {
3136              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3137                  RRETURN(MATCH_NOMATCH);
3138                  }
3139                GETCHARINCTEST(c, eptr);
3140                prop_chartype = UCD_CHARTYPE(c);
3141              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3142                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3143              }              }
# Line 2561  for (;;) Line 3146  for (;;)
3146            case PT_SC:            case PT_SC:
3147            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3148              {              {
3149              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3150              GETCHARINC(c, eptr);                {
3151              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3152                  RRETURN(MATCH_NOMATCH);
3153                  }
3154                GETCHARINCTEST(c, eptr);
3155                prop_script = UCD_SCRIPT(c);
3156              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3157                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3158              }              }
# Line 2571  for (;;) Line 3160  for (;;)
3160    
3161            default:            default:
3162            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3163            }            }
3164          }          }
3165    
# Line 2582  for (;;) Line 3170  for (;;)
3170          {          {
3171          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3172            {            {
3173              if (eptr >= md->end_subject)
3174                {
3175                SCHECK_PARTIAL();
3176                RRETURN(MATCH_NOMATCH);
3177                }
3178            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3179            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3180            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3181            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3182              {              {
3183              int len = 1;              int len = 1;
3184              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3185                {                else { GETCHARLEN(c, eptr, len); }
3186                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3187              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3188              eptr += len;              eptr += len;
3189              }              }
# Line 2610  for (;;) Line 3201  for (;;)
3201          case OP_ANY:          case OP_ANY:
3202          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3203            {            {
3204            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3205                 ((ims & PCRE_DOTALL) == 0 &&              {
3206                   eptr <= md->end_subject - md->nllen &&              SCHECK_PARTIAL();
                  IS_NEWLINE(eptr)))  
3207              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3208                }
3209              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3210              eptr++;
3211              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3212              }
3213            break;
3214    
3215            case OP_ALLANY:
3216            for (i = 1; i <= min; i++)
3217              {
3218              if (eptr >= md->end_subject)
3219                {
3220                SCHECK_PARTIAL();
3221                RRETURN(MATCH_NOMATCH);
3222                }
3223            eptr++;            eptr++;
3224            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3225            }            }
3226          break;          break;
3227    
3228          case OP_ANYBYTE:          case OP_ANYBYTE:
3229            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3230          eptr += min;          eptr += min;
3231          break;          break;
3232    
3233            case OP_ANYNL:
3234            for (i = 1; i <= min; i++)
3235              {
3236              if (eptr >= md->end_subject)
3237                {
3238                SCHECK_PARTIAL();
3239                RRETURN(MATCH_NOMATCH);
3240                }
3241              GETCHARINC(c, eptr);
3242              switch(c)
3243                {
3244                default: RRETURN(MATCH_NOMATCH);
3245                case 0x000d:
3246                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3247                break;
3248    
3249                case 0x000a:
3250                break;
3251    
3252                case 0x000b:
3253                case 0x000c:
3254                case 0x0085:
3255                case 0x2028:
3256                case 0x2029:
3257                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3258                break;
3259                }
3260              }
3261            break;
3262    
3263            case OP_NOT_HSPACE:
3264            for (i = 1; i <= min; i++)
3265              {
3266              if (eptr >= md->end_subject)
3267                {
3268                SCHECK_PARTIAL();
3269                RRETURN(MATCH_NOMATCH);
3270                }
3271              GETCHARINC(c, eptr);
3272              switch(c)
3273                {
3274                default: break;
3275                case 0x09:      /* HT */
3276                case 0x20:      /* SPACE */
3277                case 0xa0:      /* NBSP */
3278                case 0x1680:    /* OGHAM SPACE MARK */
3279                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3280                case 0x2000:    /* EN QUAD */
3281                case 0x2001:    /* EM QUAD */
3282                case 0x2002:    /* EN SPACE */
3283                case 0x2003:    /* EM SPACE */
3284                case 0x2004:    /* THREE-PER-EM SPACE */
3285                case 0x2005:    /* FOUR-PER-EM SPACE */
3286                case 0x2006:    /* SIX-PER-EM SPACE */
3287                case 0x2007:    /* FIGURE SPACE */
3288                case 0x2008:    /* PUNCTUATION SPACE */
3289                case 0x2009:    /* THIN SPACE */
3290                case 0x200A:    /* HAIR SPACE */
3291                case 0x202f:    /* NARROW NO-BREAK SPACE */
3292                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3293                case 0x3000:    /* IDEOGRAPHIC SPACE */
3294                RRETURN(MATCH_NOMATCH);
3295                }
3296              }
3297            break;
3298    
3299            case OP_HSPACE:
3300            for (i = 1; i <= min; i++)
3301              {
3302              if (eptr >= md->end_subject)
3303                {
3304                SCHECK_PARTIAL();
3305                RRETURN(MATCH_NOMATCH);
3306                }
3307              GETCHARINC(c, eptr);
3308              switch(c)
3309                {
3310                default: RRETURN(MATCH_NOMATCH);
3311                case 0x09:      /* HT */
3312                case 0x20:      /* SPACE */
3313                case 0xa0:      /* NBSP */
3314                case 0x1680:    /* OGHAM SPACE MARK */
3315                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3316                case 0x2000:    /* EN QUAD */
3317                case 0x2001:    /* EM QUAD */
3318                case 0x2002:    /* EN SPACE */
3319                case 0x2003:    /* EM SPACE */
3320                case 0x2004:    /* THREE-PER-EM SPACE */
3321                case 0x2005:    /* FOUR-PER-EM SPACE */
3322                case 0x2006:    /* SIX-PER-EM SPACE */
3323                case 0x2007:    /* FIGURE SPACE */
3324                case 0x2008:    /* PUNCTUATION SPACE */
3325                case 0x2009:    /* THIN SPACE */
3326                case 0x200A:    /* HAIR SPACE */
3327                case 0x202f:    /* NARROW NO-BREAK SPACE */
3328                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3329                case 0x3000:    /* IDEOGRAPHIC SPACE */
3330                break;
3331                }
3332              }
3333            break;
3334    
3335            case OP_NOT_VSPACE:
3336            for (i = 1; i <= min; i++)
3337              {
3338              if (eptr >= md->end_subject)
3339                {
3340                SCHECK_PARTIAL();
3341                RRETURN(MATCH_NOMATCH);
3342                }
3343              GETCHARINC(c, eptr);
3344              switch(c)
3345                {
3346                default: break;
3347                case 0x0a:      /* LF */
3348                case 0x0b:      /* VT */
3349                case 0x0c:      /* FF */
3350                case 0x0d:      /* CR */
3351                case 0x85:      /* NEL */
3352                case 0x2028:    /* LINE SEPARATOR */
3353                case 0x2029:    /* PARAGRAPH SEPARATOR */
3354                RRETURN(MATCH_NOMATCH);
3355                }
3356              }
3357            break;
3358    
3359            case OP_VSPACE:
3360            for (i = 1; i <= min; i++)
3361              {
3362              if (eptr >= md->end_subject)
3363                {
3364                SCHECK_PARTIAL();
3365                RRETURN(MATCH_NOMATCH);
3366                }
3367              GETCHARINC(c, eptr);
3368              switch(c)
3369                {
3370                default: RRETURN(MATCH_NOMATCH);
3371                case 0x0a:      /* LF */
3372                case 0x0b:      /* VT */
3373                case 0x0c:      /* FF */
3374                case 0x0d:      /* CR */
3375                case 0x85:      /* NEL */
3376                case 0x2028:    /* LINE SEPARATOR */
3377                case 0x2029:    /* PARAGRAPH SEPARATOR */
3378                break;
3379                }
3380              }
3381            break;
3382    
3383          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3384          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3385            {            {
3386            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3387                {
3388                SCHECK_PARTIAL();
3389                RRETURN(MATCH_NOMATCH);
3390                }
3391            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3392            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3393              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2637  for (;;) Line 3397  for (;;)
3397          case OP_DIGIT:          case OP_DIGIT:
3398          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3399            {            {
3400            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3401               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3402                SCHECK_PARTIAL();
3403                RRETURN(MATCH_NOMATCH);
3404                }
3405              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3406              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3407            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3408            }            }
# Line 2647  for (;;) Line 3411  for (;;)
3411          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3412          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3413            {            {
3414            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3415               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3416                SCHECK_PARTIAL();
3417              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3418            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              }
3419              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3420                RRETURN(MATCH_NOMATCH);
3421              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3422            }            }
3423          break;          break;
3424    
3425          case OP_WHITESPACE:          case OP_WHITESPACE:
3426          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3427            {            {
3428            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3429               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3430                SCHECK_PARTIAL();
3431                RRETURN(MATCH_NOMATCH);
3432                }
3433              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3434              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3435            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3436            }            }
# Line 2668  for (;;) Line 3440  for (;;)
3440          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3441            {            {
3442            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3443               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3444              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3445            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3446            }            }
3447          break;          break;
3448    
3449          case OP_WORDCHAR:          case OP_WORDCHAR:
3450          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3451            {            {
3452            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3453               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3454                SCHECK_PARTIAL();
3455                RRETURN(MATCH_NOMATCH);
3456                }
3457              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3458              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3459            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3460            }            }
# Line 2697  for (;;) Line 3473  for (;;)
3473        switch(ctype)        switch(ctype)
3474          {          {
3475          case OP_ANY:          case OP_ANY:
3476          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3477            {            {
3478            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3479              {              {
3480              if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))              SCHECK_PARTIAL();
3481                RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
             eptr++;  
3482              }              }
3483              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3484              eptr++;
3485            }            }
3486          else eptr += min;          break;
3487    
3488            case OP_ALLANY:
3489            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3490            eptr += min;
3491          break;          break;
3492    
3493          case OP_ANYBYTE:          case OP_ANYBYTE:
3494            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3495          eptr += min;          eptr += min;
3496          break;          break;
3497    
3498            case OP_ANYNL:
3499            for (i = 1; i <= min; i++)
3500              {
3501              if (eptr >= md->end_subject)
3502                {
3503                SCHECK_PARTIAL();
3504                RRETURN(MATCH_NOMATCH);
3505                }
3506              switch(*eptr++)
3507                {
3508                default: RRETURN(MATCH_NOMATCH);
3509                case 0x000d:
3510                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3511                break;
3512                case 0x000a:
3513                break;
3514    
3515                case 0x000b:
3516                case 0x000c:
3517                case 0x0085:
3518                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3519                break;
3520                }
3521              }
3522            break;
3523    
3524            case OP_NOT_HSPACE:
3525            for (i = 1; i <= min; i++)
3526              {
3527              if (eptr >= md->end_subject)
3528                {
3529                SCHECK_PARTIAL();
3530                RRETURN(MATCH_NOMATCH);
3531                }
3532              switch(*eptr++)
3533                {
3534                default: break;
3535                case 0x09:      /* HT */
3536                case 0x20:      /* SPACE */
3537                case 0xa0:      /* NBSP */
3538                RRETURN(MATCH_NOMATCH);
3539                }
3540              }
3541            break;
3542    
3543            case OP_HSPACE:
3544            for (i = 1; i <= min; i++)
3545              {
3546              if (eptr >= md->end_subject)
3547                {
3548                SCHECK_PARTIAL();
3549                RRETURN(MATCH_NOMATCH);
3550                }
3551              switch(*eptr++)
3552                {
3553                default: RRETURN(MATCH_NOMATCH);
3554                case 0x09:      /* HT */
3555                case 0x20:      /* SPACE */
3556                case 0xa0:      /* NBSP */
3557                break;
3558                }
3559              }
3560            break;
3561    
3562            case OP_NOT_VSPACE:
3563            for (i = 1; i <= min; i++)
3564              {
3565              if (eptr >= md->end_subject)
3566                {
3567                SCHECK_PARTIAL();
3568                RRETURN(MATCH_NOMATCH);
3569                }
3570              switch(*eptr++)
3571                {
3572                default: break;
3573                case 0x0a:      /* LF */
3574                case 0x0b:      /* VT */
3575                case 0x0c:      /* FF */
3576                case 0x0d:      /* CR */
3577                case 0x85:      /* NEL */
3578                RRETURN(MATCH_NOMATCH);
3579                }
3580              }
3581            break;
3582    
3583            case OP_VSPACE:
3584            for (i = 1; i <= min; i++)
3585              {
3586              if (eptr >= md->end_subject)
3587                {
3588                SCHECK_PARTIAL();
3589                RRETURN(MATCH_NOMATCH);
3590                }
3591              switch(*eptr++)
3592                {
3593                default: RRETURN(MATCH_NOMATCH);
3594                case 0x0a:      /* LF */
3595                case 0x0b:      /* VT */
3596                case 0x0c:      /* FF */
3597                case 0x0d:      /* CR */
3598                case 0x85:      /* NEL */
3599                break;
3600                }
3601              }
3602            break;
3603    
3604          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3605          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3606              {
3607              if (eptr >= md->end_subject)
3608                {
3609                SCHECK_PARTIAL();
3610                RRETURN(MATCH_NOMATCH);
3611                }
3612            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3613              }
3614          break;          break;
3615    
3616          case OP_DIGIT:          case OP_DIGIT:
3617          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3618              {
3619              if (eptr >= md->end_subject)
3620                {
3621                SCHECK_PARTIAL();
3622                RRETURN(MATCH_NOMATCH);
3623                }
3624            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3625              }
3626          break;          break;
3627    
3628          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3629          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3630              {
3631              if (eptr >= md->end_subject)
3632                {
3633                SCHECK_PARTIAL();
3634                RRETURN(MATCH_NOMATCH);
3635                }
3636            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3637              }
3638          break;          break;
3639    
3640          case OP_WHITESPACE:          case OP_WHITESPACE:
3641          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3642              {
3643              if (eptr >= md->end_subject)
3644                {
3645                SCHECK_PARTIAL();
3646                RRETURN(MATCH_NOMATCH);
3647                }
3648            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3649              }
3650          break;          break;
3651    
3652          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3653          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3654              {
3655              if (eptr >= md->end_subject)
3656                {
3657                SCHECK_PARTIAL();
3658                RRETURN(MATCH_NOMATCH);
3659                }
3660            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3661              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3662              }
3663          break;          break;
3664    
3665          case OP_WORDCHAR:          case OP_WORDCHAR:
3666          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3667              {
3668              if (eptr >= md->end_subject)
3669                {
3670                SCHECK_PARTIAL();
3671                RRETURN(MATCH_NOMATCH);
3672                }
3673            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3674              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3675              }
3676          break;          break;
3677    
3678          default:          default:
# Line 2768  for (;;) Line 3698  for (;;)
3698            case PT_ANY:            case PT_ANY:
3699            for (fi = min;; fi++)            for (fi = min;; fi++)
3700              {              {
3701              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3702              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3703              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3704                  {
3705                  CHECK_PARTIAL();
3706                  RRETURN(MATCH_NOMATCH);
3707                  }
3708                if (eptr >= md->end_subject)
3709                  {
3710                  SCHECK_PARTIAL();
3711                  RRETURN(MATCH_NOMATCH);
3712                  }
3713              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3714              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3715              }              }
3716            break;            /* Control never gets here */
3717    
3718            case PT_LAMP:            case PT_LAMP:
3719            for (fi = min;; fi++)            for (fi = min;; fi++)
3720              {              {
3721              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3722              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3723              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3724                  {
3725                  CHECK_PARTIAL();
3726                  RRETURN(MATCH_NOMATCH);
3727                  }
3728                if (eptr >= md->end_subject)
3729                  {
3730                  SCHECK_PARTIAL();
3731                  RRETURN(MATCH_NOMATCH);
3732                  }
3733              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3734              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3735              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3736                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3737                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3738                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3739              }              }
3740            break;            /* Control never gets here */
3741    
3742            case PT_GC:            case PT_GC:
3743            for (fi = min;; fi++)            for (fi = min;; fi++)
3744              {              {
3745              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3746              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3747              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3748                  {
3749                  CHECK_PARTIAL();
3750                  RRETURN(MATCH_NOMATCH);
3751                  }
3752                if (eptr >= md->end_subject)
3753                  {
3754                  SCHECK_PARTIAL();
3755                  RRETURN(MATCH_NOMATCH);
3756                  }
3757              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3758              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3759              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3760                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3761              }              }
3762            break;            /* Control never gets here */
3763    
3764            case PT_PC:            case PT_PC:
3765            for (fi = min;; fi++)            for (fi = min;; fi++)
3766              {              {
3767              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3768              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3769              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3770                  {
3771                  CHECK_PARTIAL();
3772                  RRETURN(MATCH_NOMATCH);
3773                  }
3774                if (eptr >= md->end_subject)
3775                  {
3776                  SCHECK_PARTIAL();
3777                  RRETURN(MATCH_NOMATCH);
3778                  }
3779              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3780              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3781              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3782                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3783              }              }
3784            break;            /* Control never gets here */
3785    
3786            case PT_SC:            case PT_SC:
3787            for (fi = min;; fi++)            for (fi = min;; fi++)
3788              {              {
3789              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3790              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3792                  {
3793                  CHECK_PARTIAL();
3794                  RRETURN(MATCH_NOMATCH);
3795                  }
3796                if (eptr >= md->end_subject)
3797                  {
3798                  SCHECK_PARTIAL();
3799                  RRETURN(MATCH_NOMATCH);
3800                  }
3801              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3802              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3803              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3804                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3805              }              }
3806            break;            /* Control never gets here */
3807    
3808            default:            default:
3809            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3810            }            }
3811          }          }
3812    
# Line 2843  for (;;) Line 3817  for (;;)
3817          {          {
3818          for (fi = min;; fi++)          for (fi = min;; fi++)
3819            {            {
3820            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3821            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3822            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
3823                {
3824                CHECK_PARTIAL();
3825                RRETURN(MATCH_NOMATCH);
3826                }
3827              if (eptr >= md->end_subject)
3828                {
3829                SCHECK_PARTIAL();
3830                RRETURN(MATCH_NOMATCH);
3831                }
3832            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3833            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3834            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3835            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3836              {              {
3837              int len = 1;              int len = 1;
3838              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3839                {                else { GETCHARLEN(c, eptr, len); }
3840                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3841              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3842              eptr += len;              eptr += len;
3843              }              }
# Line 2872  for (;;) Line 3853  for (;;)
3853          {          {
3854          for (fi = min;; fi++)          for (fi = min;; fi++)
3855            {            {
3856            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3857            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3858            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
3859                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&              {
3860                  eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))              CHECK_PARTIAL();
3861                RRETURN(MATCH_NOMATCH);
3862                }
3863              if (eptr >= md->end_subject)
3864                {
3865                SCHECK_PARTIAL();
3866                RRETURN(MATCH_NOMATCH);
3867                }
3868              if (ctype == OP_ANY && IS_NEWLINE(eptr))
3869              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
3870            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3871            switch(ctype)            switch(ctype)
3872              {              {
3873              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3874                case OP_ALLANY:
3875                case OP_ANYBYTE:
3876              break;              break;
3877    
3878              case OP_ANYBYTE:              case OP_ANYNL:
3879                switch(c)
3880                  {
3881                  default: RRETURN(MATCH_NOMATCH);
3882                  case 0x000d:
3883                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3884                  break;
3885                  case 0x000a:
3886                  break;
3887    
3888                  case 0x000b:
3889                  case 0x000c:
3890                  case 0x0085:
3891                  case 0x2028:
3892                  case 0x2029:
3893                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3894                  break;
3895                  }
3896                break;
3897    
3898                case OP_NOT_HSPACE:
3899                switch(c)
3900                  {
3901                  default: break;
3902                  case 0x09:      /* HT */
3903                  case 0x20:      /* SPACE */
3904                  case 0xa0:      /* NBSP */
3905                  case 0x1680:    /* OGHAM SPACE MARK */
3906                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3907                  case 0x2000:    /* EN QUAD */
3908                  case 0x2001:    /* EM QUAD */
3909                  case 0x2002:    /* EN SPACE */
3910                  case 0x2003:    /* EM SPACE */
3911                  case 0x2004:    /* THREE-PER-EM SPACE */
3912                  case 0x2005:    /* FOUR-PER-EM SPACE */
3913                  case 0x2006:    /* SIX-PER-EM SPACE */
3914                  case 0x2007:    /* FIGURE SPACE */
3915                  case 0x2008:    /* PUNCTUATION SPACE */
3916                  case 0x2009:    /* THIN SPACE */
3917                  case 0x200A:    /* HAIR SPACE */
3918                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3919                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3920                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3921                  RRETURN(MATCH_NOMATCH);
3922                  }
3923                break;
3924    
3925                case OP_HSPACE:
3926                switch(c)
3927                  {
3928                  default: RRETURN(MATCH_NOMATCH);
3929                  case 0x09:      /* HT */
3930                  case 0x20:      /* SPACE */
3931                  case 0xa0:      /* NBSP */
3932                  case 0x1680:    /* OGHAM SPACE MARK */
3933                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3934                  case 0x2000:    /* EN QUAD */
3935                  case 0x2001:    /* EM QUAD */
3936                  case 0x2002:    /* EN SPACE */
3937                  case 0x2003:    /* EM SPACE */
3938                  case 0x2004:    /* THREE-PER-EM SPACE */
3939                  case 0x2005:    /* FOUR-PER-EM SPACE */
3940                  case 0x2006:    /* SIX-PER-EM SPACE */
3941                  case 0x2007:    /* FIGURE SPACE */
3942                  case 0x2008:    /* PUNCTUATION SPACE */
3943                  case 0x2009:    /* THIN SPACE */
3944                  case 0x200A:    /* HAIR SPACE */
3945                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3946                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3947                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3948                  break;
3949                  }
3950                break;
3951    
3952                case OP_NOT_VSPACE:
3953                switch(c)
3954                  {
3955                  default: break;
3956                  case 0x0a:      /* LF */
3957                  case 0x0b:      /* VT */
3958                  case 0x0c:      /* FF */
3959                  case 0x0d:      /* CR */
3960                  case 0x85:      /* NEL */
3961                  case 0x2028:    /* LINE SEPARATOR */
3962                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3963                  RRETURN(MATCH_NOMATCH);
3964                  }
3965                break;
3966    
3967                case OP_VSPACE:
3968                switch(c)
3969                  {
3970                  default: RRETURN(MATCH_NOMATCH);
3971                  case 0x0a:      /* LF */
3972                  case 0x0b:      /* VT */
3973                  case 0x0c:      /* FF */
3974                  case 0x0d:      /* CR */
3975                  case 0x85:      /* NEL */
3976                  case 0x2028:    /* LINE SEPARATOR */
3977                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3978                  break;
3979                  }
3980              break;              break;
3981    
3982              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
# Line 2913  for (;;) Line 4004  for (;;)
4004                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4005              break;              break;
4006    
4007              case OP_WORDCHAR:              case OP_WORDCHAR:
4008              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4009                  RRETURN(MATCH_NOMATCH);
4010                break;
4011    
4012                default:
4013                RRETURN(PCRE_ERROR_INTERNAL);
4014                }
4015              }
4016            }
4017          else
4018    #endif
4019          /* Not UTF-8 mode */
4020            {
4021            for (fi = min;; fi++)
4022              {
4023              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4024              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4025              if (fi >= max)
4026                {
4027                CHECK_PARTIAL();
4028                RRETURN(MATCH_NOMATCH);
4029                }
4030              if (eptr >= md->end_subject)
4031                {
4032                SCHECK_PARTIAL();
4033                RRETURN(MATCH_NOMATCH);
4034                }
4035              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4036                RRETURN(MATCH_NOMATCH);
4037              c = *eptr++;
4038              switch(ctype)
4039                {
4040                case OP_ANY:     /* This is the non-NL case */
4041                case OP_ALLANY:
4042                case OP_ANYBYTE:
4043                break;
4044    
4045                case OP_ANYNL:
4046                switch(c)
4047                  {
4048                  default: RRETURN(MATCH_NOMATCH);
4049                  case 0x000d:
4050                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4051                  break;
4052    
4053                  case 0x000a:
4054                  break;
4055    
4056                  case 0x000b:
4057                  case 0x000c:
4058                  case 0x0085:
4059                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4060                  break;
4061                  }
4062                break;
4063    
4064                case OP_NOT_HSPACE:
4065                switch(c)
4066                  {
4067                  default: break;
4068                  case 0x09:      /* HT */
4069                  case 0x20:      /* SPACE */
4070                  case 0xa0:      /* NBSP */
4071                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4072                  }
4073              break;              break;
4074    
4075              default:              case OP_HSPACE:
4076              RRETURN(PCRE_ERROR_INTERNAL);              switch(c)
4077              }                {
4078            }                default: RRETURN(MATCH_NOMATCH);
4079          }                case 0x09:      /* HT */
4080        else                case 0x20:      /* SPACE */
4081  #endif                case 0xa0:      /* NBSP */
4082        /* Not UTF-8 mode */                break;
4083          {                }
4084          for (fi = min;; fi++)              break;
           {  
           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
           if (fi >= max || eptr >= md->end_subject ||  
                ((ims & PCRE_DOTALL) == 0 &&  
                  eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
             RRETURN(MATCH_NOMATCH);  
4085    
4086            c = *eptr++;              case OP_NOT_VSPACE:
4087            switch(ctype)              switch(c)
4088              {                {
4089              case OP_ANY:   /* This is the DOTALL case */                default: break;
4090                  case 0x0a:      /* LF */
4091                  case 0x0b:      /* VT */
4092                  case 0x0c:      /* FF */
4093                  case 0x0d:      /* CR */
4094                  case 0x85:      /* NEL */
4095                  RRETURN(MATCH_NOMATCH);
4096                  }
4097              break;              break;
4098    
4099              case OP_ANYBYTE:              case OP_VSPACE:
4100                switch(c)
4101                  {
4102                  default: RRETURN(MATCH_NOMATCH);
4103                  case 0x0a:      /* LF */
4104                  case 0x0b:      /* VT */
4105                  case 0x0c:      /* FF */
4106                  case 0x0d:      /* CR */
4107                  case 0x85:      /* NEL */
4108                  break;
4109                  }
4110              break;              break;
4111    
4112              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
# Line 2977  for (;;) Line 4141  for (;;)
4141        /* Control never gets here */        /* Control never gets here */
4142        }        }
4143    
4144      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
4145      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
4146      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
4147    
# Line 3007  for (;;) Line 4171  for (;;)
4171              int len = 1;              int len = 1;
4172              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4173              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4174              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4175              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4176                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4177                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3022  for (;;) Line 4186  for (;;)
4186              int len = 1;              int len = 1;
4187              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4188              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4189              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4190              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4191                break;                break;
4192              eptr+= len;              eptr+= len;
# Line 3035  for (;;) Line 4199  for (;;)
4199              int len = 1;              int len = 1;
4200              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4201              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4202              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4203              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4204                break;                break;
4205              eptr+= len;              eptr+= len;
# Line 3048  for (;;) Line 4212  for (;;)
4212              int len = 1;              int len = 1;
4213              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
4214              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4215              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
4216              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4217                break;                break;
4218              eptr+= len;              eptr+= len;
# Line 3058  for (;;) Line 4222  for (;;)
4222    
4223          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4224    
4225            CHECK_PARTIAL();
4226            if (possessive) continue;
4227          for(;;)          for(;;)
4228            {            {
4229            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4230            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4231            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4232            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
4233            }            }
4234          }          }
4235    
# Line 3076  for (;;) Line 4242  for (;;)
4242            {            {
4243            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
4244            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4245            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4246            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
4247            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4248              {              {
# Line 3085  for (;;) Line 4251  for (;;)
4251                {                {
4252                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4253                }                }
4254              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4255              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4256              eptr += len;              eptr += len;
4257              }              }
# Line 3093  for (;;) Line 4259  for (;;)
4259    
4260          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4261    
4262            CHECK_PARTIAL();
4263            if (possessive) continue;
4264          for(;;)          for(;;)
4265            {            {
4266            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4267            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4268            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4269            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
4270              {              {
4271              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
4272              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
4273                {                {
4274                  BACKCHAR(eptr);
4275                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4276                }                }
4277              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4278              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4279              eptr--;              eptr--;
4280              }              }
# Line 3124  for (;;) Line 4292  for (;;)
4292          switch(ctype)          switch(ctype)
4293            {            {
4294            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
4295            if (max < INT_MAX)            if (max < INT_MAX)
4296              {              {
4297              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject ||  
                     (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
                   break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
4298                {                {
4299                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4300                  {                eptr++;
4301                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
4302                }                }
4303              }              }
4304    
# Line 3157  for (;;) Line 4306  for (;;)
4306    
4307            else            else
4308              {              {
4309              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
4310                {                {
4311                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4312                  {                eptr++;
4313                  if (eptr >= md->end_subject ||                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                     (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
                   break;  
                 eptr++;  
                 }  
               break;  
4314                }                }
4315              else              }
4316              break;
4317    
4318              case OP_ALLANY:
4319              if (max < INT_MAX)
4320                {
4321                for (i = min; i < max; i++)
4322                {                {
4323                c = max - min;                if (eptr >= md->end_subject) break;
4324                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                eptr++;
4325                eptr += c;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4326                }                }
4327              }              }
4328              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
4329            break;            break;
4330    
4331            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
4332    
4333            case OP_ANYBYTE:            case OP_ANYBYTE:
4334            c = max - min;            c = max - min;
4335            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
4336                c = md->end_subject - eptr;
4337            eptr += c;            eptr += c;
4338            break;            break;
4339    
4340              case OP_ANYNL:
4341              for (i = min; i < max; i++)
4342                {
4343                int len = 1;
4344                if (eptr >= md->end_subject) bre