/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 600 by ph10, Mon May 9 08:54:11 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 119  Returns:     nothing
119  static void  static void
120  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121  {  {
122  int c;  unsigned int c;
123  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124  while (length-- > 0)  while (length-- > 0)
125    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 114  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    ims         the ims flags
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    unsigned long int ims)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
159    printf("matching subject <null>");    printf("matching subject <null>");
160  else  else
# Line 146  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175    properly if Unicode properties are supported. Otherwise, we can check only
176    ASCII characters. */
177    
178  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
179    {    {
180    while (length-- > 0)  #ifdef SUPPORT_UTF8
181      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;  #ifdef SUPPORT_UCP
182      if (md->utf8)
183        {
184        /* Match characters up to the end of the reference. NOTE: the number of
185        bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194          {
195          int c, d;
196          if (eptr >= md->end_subject) return -1;
197          GETCHARINC(c, eptr);
198          GETCHARINC(d, p);
199          if (c != d && c != UCD_OTHERCASE(d)) return -1;
200          }
201        }
202      else
203    #endif
204    #endif
205    
206      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207      is no UCP support. */
208        {
209        if (eptr + length > md->end_subject) return -1;
210        while (length-- > 0)
211          { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212        }
213    }    }
214    
215    /* In the caseful case, we can just compare the bytes, whether or not we
216    are in UTF-8 mode. */
217    
218  else  else
219    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    {
220      if (eptr + length > md->end_subject) return -1;
221      while (length-- > 0) if (*p++ != *eptr++) return -1;
222      }
223    
224  return TRUE;  return eptr - eptr_start;
225  }  }
226    
227    
# Line 186  calls by keeping local variables that ne Line 247  calls by keeping local variables that ne
247  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
248  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
249  always used to.  always used to.
250    
251    The original heap-recursive code used longjmp(). However, it seems that this
252    can be very slow on some operating systems. Following a suggestion from Stan
253    Switzer, the use of longjmp() has been abolished, at the cost of having to
254    provide a unique number for each call to RMATCH. There is no way of generating
255    a sequence of numbers at compile time in C. I have given them names, to make
256    them stand out more clearly.
257    
258    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
259    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
260    tests. Furthermore, not using longjmp() means that local dynamic variables
261    don't have indeterminate values; this has meant that the frame size can be
262    reduced because the result can be "passed back" by straight setting of the
263    variable instead of being passed in the frame.
264  ****************************************************************************  ****************************************************************************
265  ***************************************************************************/  ***************************************************************************/
266    
267    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
268    below must be updated in sync.  */
269    
270    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
271           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
272           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276           RM61,  RM62 };
277    
278  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
279  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
280    actually used in this definition. */
281    
282  #ifndef NO_RECURSE  #ifndef NO_RECURSE
283  #define REGISTER register  #define REGISTER register
284  #ifdef DEBUG  
285  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
286    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
287    { \    { \
288    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
289    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
290    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
291    }    }
292  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 295  versions and production versions. */
295    return ra; \    return ra; \
296    }    }
297  #else  #else
298  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
299    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
300  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
301  #endif  #endif
302    
303  #else  #else
304    
305    
306  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
307  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
308  match(), which never changes. */  argument of match(), which never changes. */
309    
310  #define REGISTER  #define REGISTER
311    
312  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
313    {\    {\
314    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315    if (setjmp(frame->Xwhere) == 0)\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316      {\    frame->Xwhere = rw; \
317      newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
318      newframe->Xecode = rb;\    newframe->Xecode = rb;\
319      newframe->Xoffset_top = rc;\    newframe->Xmstart = mstart;\
320      newframe->Xims = re;\    newframe->Xmarkptr = markptr;\
321      newframe->Xeptrb = rf;\    newframe->Xoffset_top = rc;\
322      newframe->Xflags = rg;\    newframe->Xims = re;\
323      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xeptrb = rf;\
324      newframe->Xprevframe = frame;\    newframe->Xflags = rg;\
325      frame = newframe;\    newframe->Xrdepth = frame->Xrdepth + 1;\
326      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xprevframe = frame;\
327      goto HEAP_RECURSE;\    frame = newframe;\
328      }\    DPRINTF(("restarting from line %d\n", __LINE__));\
329    else\    goto HEAP_RECURSE;\
330      {\    L_##rw:\
331      DPRINTF(("longjumped back to line %d\n", __LINE__));\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
332    }    }
333    
334  #define RRETURN(ra)\  #define RRETURN(ra)\
335    {\    {\
336    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
337    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
338    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
339    if (frame != NULL)\    if (frame != NULL)\
340      {\      {\
341      frame->Xresult = ra;\      rrc = ra;\
342      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
343      }\      }\
344    return ra;\    return ra;\
345    }    }
# Line 269  typedef struct heapframe { Line 352  typedef struct heapframe {
352    
353    /* Function arguments that may change */    /* Function arguments that may change */
354    
355    const uschar *Xeptr;    USPTR Xeptr;
356    const uschar *Xecode;    const uschar *Xecode;
357      USPTR Xmstart;
358      USPTR Xmarkptr;
359    int Xoffset_top;    int Xoffset_top;
360    long int Xims;    long int Xims;
361    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 279  typedef struct heapframe { Line 364  typedef struct heapframe {
364    
365    /* Function local variables */    /* Function local variables */
366    
367    const uschar *Xcallpat;    USPTR Xcallpat;
368    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
369    const uschar *Xdata;    USPTR Xcharptr;
370    const uschar *Xnext;  #endif
371    const uschar *Xpp;    USPTR Xdata;
372    const uschar *Xprev;    USPTR Xnext;
373    const uschar *Xsaved_eptr;    USPTR Xpp;
374      USPTR Xprev;
375      USPTR Xsaved_eptr;
376    
377    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
378    
379    BOOL Xcur_is_word;    BOOL Xcur_is_word;
380    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
381    BOOL Xprev_is_word;    BOOL Xprev_is_word;
382    
383    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 389  typedef struct heapframe {
389    int Xprop_category;    int Xprop_category;
390    int Xprop_chartype;    int Xprop_chartype;
391    int Xprop_script;    int Xprop_script;
392    int *Xprop_test_variable;    int Xoclength;
393      uschar Xocchars[8];
394  #endif  #endif
395    
396      int Xcodelink;
397    int Xctype;    int Xctype;
398    int Xfc;    unsigned int Xfc;
399    int Xfi;    int Xfi;
400    int Xlength;    int Xlength;
401    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 409  typedef struct heapframe {
409    
410    eptrblock Xnewptrb;    eptrblock Xnewptrb;
411    
412    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
413    
414    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
415    
416  } heapframe;  } heapframe;
417    
# Line 340  typedef struct heapframe { Line 427  typedef struct heapframe {
427  *         Match from current position            *  *         Match from current position            *
428  *************************************************/  *************************************************/
429    
430  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
431  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
432  same response.  same response. */
433    
434    /* These macros pack up tests that are used for partial matching, and which
435    appears several times in the code. We set the "hit end" flag if the pointer is
436    at the end of the subject and also past the start of the subject (i.e.
437    something has been matched). For hard partial matching, we then return
438    immediately. The second one is used when we already know we are past the end of
439    the subject. */
440    
441    #define CHECK_PARTIAL()\
442      if (md->partial != 0 && eptr >= md->end_subject && \
443          eptr > md->start_used_ptr) \
444        { \
445        md->hitend = TRUE; \
446        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
447        }
448    
449  Performance note: It might be tempting to extract commonly used fields from the  #define SCHECK_PARTIAL()\
450  md structure (e.g. utf8, end_subject) into individual variables to improve    if (md->partial != 0 && eptr > md->start_used_ptr) \
451        { \
452        md->hitend = TRUE; \
453        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
454        }
455    
456    
457    /* Performance note: It might be tempting to extract commonly used fields from
458    the md structure (e.g. utf8, end_subject) into individual variables to improve
459  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
460  made performance worse.  made performance worse.
461    
462  Arguments:  Arguments:
463     eptr        pointer in subject     eptr        pointer to current character in subject
464     ecode       position in code     ecode       pointer to current position in compiled code
465       mstart      pointer to the current match start position (can be modified
466                     by encountering \K)
467       markptr     pointer to the most recent MARK name, or NULL
468     offset_top  current top pointer     offset_top  current top pointer
469     md          pointer to "static" info for the match     md          pointer to "static" info for the match
470     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 472  Arguments:
472                   brackets - for testing for empty matches                   brackets - for testing for empty matches
473     flags       can contain     flags       can contain
474                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
475                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
476                       group that can match an empty string
477     rdepth      the recursion depth     rdepth      the recursion depth
478    
479  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
480                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
481                   a negative MATCH_xxx value for PRUNE, SKIP, etc
482                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
483                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
484  */  */
485    
486  static int  static int
487  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
488    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
489    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
490  {  {
491  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
492  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
493  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
494    
495  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
496  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
497  register unsigned int  c;  /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
498  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
499    
500    BOOL minimize, possessive; /* Quantifier options */
501    int condcode;
502    
503  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
504  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
505  heap storage. Set up the top-level frame here; others are obtained from the  heap storage. Set up the top-level frame here; others are obtained from the
506  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
507    
508  #ifdef NO_RECURSE  #ifdef NO_RECURSE
509  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
510    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
511  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
512    
513  /* Copy in the original argument variables */  /* Copy in the original argument variables */
514    
515  frame->Xeptr = eptr;  frame->Xeptr = eptr;
516  frame->Xecode = ecode;  frame->Xecode = ecode;
517    frame->Xmstart = mstart;
518    frame->Xmarkptr = markptr;
519  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
520  frame->Xims = ims;  frame->Xims = ims;
521  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 530  HEAP_RECURSE:
530    
531  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
532  #define ecode              frame->Xecode  #define ecode              frame->Xecode
533    #define mstart             frame->Xmstart
534    #define markptr            frame->Xmarkptr
535  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
536  #define ims                frame->Xims  #define ims                frame->Xims
537  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 424  HEAP_RECURSE: Line 544  HEAP_RECURSE:
544  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
545  #endif  #endif
546  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
547    #define codelink           frame->Xcodelink
548  #define data               frame->Xdata  #define data               frame->Xdata
549  #define next               frame->Xnext  #define next               frame->Xnext
550  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 434  HEAP_RECURSE: Line 555  HEAP_RECURSE:
555    
556  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
557  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
558  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
559    
560  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 566  HEAP_RECURSE:
566  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
567  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
568  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
569  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
570    #define occhars            frame->Xocchars
571  #endif  #endif
572    
573  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 591  HEAP_RECURSE:
591  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
592  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
593    
594  #else  #else         /* NO_RECURSE not defined */
595  #define fi i  #define fi i
596  #define fc c  #define fc c
597    
# Line 489  recursion_info new_recursive;      /* wi Line 610  recursion_info new_recursive;      /* wi
610                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
611  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
612  BOOL condition;  BOOL condition;
 BOOL minimize;  
613  BOOL prev_is_word;  BOOL prev_is_word;
614    
615  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 621  int prop_fail_result;
621  int prop_category;  int prop_category;
622  int prop_chartype;  int prop_chartype;
623  int prop_script;  int prop_script;
624  int *prop_test_variable;  int oclength;
625    uschar occhars[8];
626  #endif  #endif
627    
628    int codelink;
629  int ctype;  int ctype;
630  int length;  int length;
631  int max;  int max;
# Line 516  int save_offset1, save_offset2, save_off Line 638  int save_offset1, save_offset2, save_off
638  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
639    
640  eptrblock newptrb;  eptrblock newptrb;
641  #endif  #endif     /* NO_RECURSE */
642    
643  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
644  variables. */  variables. */
# Line 524  variables. */ Line 646  variables. */
646  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
647  prop_value = 0;  prop_value = 0;
648  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
649  #endif  #endif
650    
651    
652  /* This label is used for tail recursion, which is used in a few cases even  /* This label is used for tail recursion, which is used in a few cases even
653  when NO_RECURSE is not defined, in order to reduce the amount of stack that is  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
654  used. Thanks to Ian Taylor for noticing this possibility and sending the  used. Thanks to Ian Taylor for noticing this possibility and sending the
# Line 537  TAIL_RECURSE: Line 659  TAIL_RECURSE:
659  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
660  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
661  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
662  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
663  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
664  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
665  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
666    
667    #ifdef SUPPORT_UTF8
668    utf8 = md->utf8;       /* Local copy of the flag */
669    #else
670    utf8 = FALSE;
671    #endif
672    
673  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
674  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
675    
# Line 550  if (rdepth >= md->match_limit_recursion) Line 678  if (rdepth >= md->match_limit_recursion)
678    
679  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
680    
681  #ifdef SUPPORT_UTF8  /* At the start of a group with an unlimited repeat that may match an empty
682  utf8 = md->utf8;       /* Local copy of the flag */  string, the match_cbegroup flag is set. When this is the case, add the current
683  #else  subject pointer to the chain of such remembered pointers, to be checked when we
684  utf8 = FALSE;  hit the closing ket, in order to break infinite loops that match no characters.
685  #endif  When match() is called in other circumstances, don't add to the chain. The
686    match_cbegroup flag must NOT be used with tail recursion, because the memory
687  /* At the start of a bracketed group, add the current subject pointer to the  block that is used is on the stack, so a new one may be required for each
688  stack of such pointers, to be re-instated at the end of the group when we hit  match(). */
 the closing ket. When match() is called in other circumstances, we don't add to  
 this stack. */  
689    
690  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
691    {    {
   newptrb.epb_prev = eptrb;  
692    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
693      newptrb.epb_prev = eptrb;
694    eptrb = &newptrb;    eptrb = &newptrb;
695    }    }
696    
697  /* Now start processing the operations. */  /* Now start processing the opcodes. */
698    
699  for (;;)  for (;;)
700    {    {
701      minimize = possessive = FALSE;
702    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
703    
704    if (md->partial &&    switch(op)
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
705      {      {
706      number = op - OP_BRA;      case OP_MARK:
707        markptr = ecode + 2;
708      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
709      number from a dummy opcode at the start. */        ims, eptrb, flags, RM55);
710    
711      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
712        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
713        argument. It is passed back in md->start_match_ptr (an overloading of that
714        variable). If it does match, we reset that variable to the current subject
715        position and return MATCH_SKIP. Otherwise, pass back the return code
716        unaltered. */
717    
718        if (rrc == MATCH_SKIP_ARG &&
719            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
720          {
721          md->start_match_ptr = eptr;
722          RRETURN(MATCH_SKIP);
723          }
724    
725        if (md->mark == NULL) md->mark = markptr;
726        RRETURN(rrc);
727    
728        case OP_FAIL:
729        MRRETURN(MATCH_NOMATCH);
730    
731        /* COMMIT overrides PRUNE, SKIP, and THEN */
732    
733        case OP_COMMIT:
734        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
735          ims, eptrb, flags, RM52);
736        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
737            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
738            rrc != MATCH_THEN)
739          RRETURN(rrc);
740        MRRETURN(MATCH_COMMIT);
741    
742        /* PRUNE overrides THEN */
743    
744        case OP_PRUNE:
745        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
746          ims, eptrb, flags, RM51);
747        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748        MRRETURN(MATCH_PRUNE);
749    
750        case OP_PRUNE_ARG:
751        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752          ims, eptrb, flags, RM56);
753        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
754        md->mark = ecode + 2;
755        RRETURN(MATCH_PRUNE);
756    
757        /* SKIP overrides PRUNE and THEN */
758    
759        case OP_SKIP:
760        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
761          ims, eptrb, flags, RM53);
762        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
763          RRETURN(rrc);
764        md->start_match_ptr = eptr;   /* Pass back current position */
765        MRRETURN(MATCH_SKIP);
766    
767        case OP_SKIP_ARG:
768        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
769          ims, eptrb, flags, RM57);
770        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
771          RRETURN(rrc);
772    
773        /* Pass back the current skip name by overloading md->start_match_ptr and
774        returning the special MATCH_SKIP_ARG return code. This will either be
775        caught by a matching MARK, or get to the top, where it is treated the same
776        as PRUNE. */
777    
778        md->start_match_ptr = ecode + 2;
779        RRETURN(MATCH_SKIP_ARG);
780    
781        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
782        the alt that is at the start of the current branch. This makes it possible
783        to skip back past alternatives that precede the THEN within the current
784        branch. */
785    
786        case OP_THEN:
787        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
788          ims, eptrb, flags, RM54);
789        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
790        md->start_match_ptr = ecode - GET(ecode, 1);
791        MRRETURN(MATCH_THEN);
792    
793        case OP_THEN_ARG:
794        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
795          offset_top, md, ims, eptrb, flags, RM58);
796        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797        md->start_match_ptr = ecode - GET(ecode, 1);
798        md->mark = ecode + LINK_SIZE + 2;
799        RRETURN(MATCH_THEN);
800    
801        /* Handle a capturing bracket. If there is space in the offset vector, save
802        the current subject position in the working slot at the top of the vector.
803        We mustn't change the current values of the data slot, because they may be
804        set from a previous iteration of this group, and be referred to by a
805        reference inside the group.
806    
807        If the bracket fails to match, we need to restore this value and also the
808        values of the final offsets, in case they were set by a previous iteration
809        of the same bracket.
810    
811        If there isn't enough space in the offset vector, treat this as if it were
812        a non-capturing bracket. Don't worry about setting the flag for the error
813        case here; that is handled in the code for KET. */
814    
815        case OP_CBRA:
816        case OP_SCBRA:
817        number = GET2(ecode, 1+LINK_SIZE);
818      offset = number << 1;      offset = number << 1;
819    
820  #ifdef DEBUG  #ifdef PCRE_DEBUG
821      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
822        printf("subject=");
823      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
824      printf("\n");      printf("\n");
825  #endif  #endif
# Line 622  for (;;) Line 832  for (;;)
832        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
833    
834        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
835        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
836            (int)(eptr - md->start_subject);
837    
838          flags = (op == OP_SCBRA)? match_cbegroup : 0;
839        do        do
840          {          {
841          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
842            match_isgroup);            ims, eptrb, flags, RM1);
843          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
844                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
845              RRETURN(rrc);
846          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
847          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
848          }          }
# Line 640  for (;;) Line 854  for (;;)
854        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
855        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
856    
857          if (rrc != MATCH_THEN) md->mark = markptr;
858        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
859        }        }
860    
861      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
862        as a non-capturing bracket. */
863    
864      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   
   /* Other types of node can be handled by a switch */  
   
   switch(op)  
     {  
     case OP_BRA:     /* Non-capturing bracket: optimized */  
     DPRINTF(("start bracket 0\n"));  
866    
867      /* Loop for all the alternatives */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
868    
869        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
870        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
871    
872        /* Non-capturing bracket. Loop for all the alternatives. When we get to the
873        final alternative within the brackets, we would return the result of a
874        recursive call to match() whatever happened. We can reduce stack usage by
875        turning this into a tail recursion, except in the case when match_cbegroup
876        is set.*/
877    
878        case OP_BRA:
879        case OP_SBRA:
880        DPRINTF(("start non-capturing bracket\n"));
881        flags = (op >= OP_SBRA)? match_cbegroup : 0;
882      for (;;)      for (;;)
883        {        {
884        /* When we get to the final alternative within the brackets, we would        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
885        return the result of a recursive call to match() whatever happened. We          {
886        can reduce stack usage by turning this into a tail recursion. */          if (flags == 0)    /* Not a possibly empty group */
887              {
888        if (ecode[GET(ecode, 1)] != OP_ALT)            ecode += _pcre_OP_lengths[*ecode];
889         {            DPRINTF(("bracket 0 tail recursion\n"));
890         ecode += 1 + LINK_SIZE;            goto TAIL_RECURSE;
891         flags = match_isgroup;            }
892         DPRINTF(("bracket 0 tail recursion\n"));  
893         goto TAIL_RECURSE;          /* Possibly empty group; can't use tail recursion. */
894         }  
895            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
896              eptrb, flags, RM48);
897            if (rrc == MATCH_NOMATCH) md->mark = markptr;
898            RRETURN(rrc);
899            }
900    
901        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
902        otherwise return. */        otherwise return. */
903    
904        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
905          match_isgroup);          eptrb, flags, RM2);
906        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
907              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
908            RRETURN(rrc);
909        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
910        }        }
911      /* Control never reaches here. */      /* Control never reaches here. */
# Line 688  for (;;) Line 917  for (;;)
917      obeyed, we can use tail recursion to avoid using another stack frame. */      obeyed, we can use tail recursion to avoid using another stack frame. */
918    
919      case OP_COND:      case OP_COND:
920      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
921        codelink= GET(ecode, 1);
922    
923        /* Because of the way auto-callout works during compile, a callout item is
924        inserted between OP_COND and an assertion condition. */
925    
926        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
927          {
928          if (pcre_callout != NULL)
929            {
930            pcre_callout_block cb;
931            cb.version          = 1;   /* Version 1 of the callout block */
932            cb.callout_number   = ecode[LINK_SIZE+2];
933            cb.offset_vector    = md->offset_vector;
934            cb.subject          = (PCRE_SPTR)md->start_subject;
935            cb.subject_length   = (int)(md->end_subject - md->start_subject);
936            cb.start_match      = (int)(mstart - md->start_subject);
937            cb.current_position = (int)(eptr - md->start_subject);
938            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
939            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
940            cb.capture_top      = offset_top/2;
941            cb.capture_last     = md->capture_last;
942            cb.callout_data     = md->callout_data;
943            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
944            if (rrc < 0) RRETURN(rrc);
945            }
946          ecode += _pcre_OP_lengths[OP_CALLOUT];
947          }
948    
949        condcode = ecode[LINK_SIZE+1];
950    
951        /* Now see what the actual condition is */
952    
953        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
954          {
955          if (md->recursive == NULL)                /* Not recursing => FALSE */
956            {
957            condition = FALSE;
958            ecode += GET(ecode, 1);
959            }
960          else
961            {
962            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
963            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
964    
965            /* If the test is for recursion into a specific subpattern, and it is
966            false, but the test was set up by name, scan the table to see if the
967            name refers to any other numbers, and test them. The condition is true
968            if any one is set. */
969    
970            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
971              {
972              uschar *slotA = md->name_table;
973              for (i = 0; i < md->name_count; i++)
974                {
975                if (GET2(slotA, 0) == recno) break;
976                slotA += md->name_entry_size;
977                }
978    
979              /* Found a name for the number - there can be only one; duplicate
980              names for different numbers are allowed, but not vice versa. First
981              scan down for duplicates. */
982    
983              if (i < md->name_count)
984                {
985                uschar *slotB = slotA;
986                while (slotB > md->name_table)
987                  {
988                  slotB -= md->name_entry_size;
989                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
990                    {
991                    condition = GET2(slotB, 0) == md->recursive->group_num;
992                    if (condition) break;
993                    }
994                  else break;
995                  }
996    
997                /* Scan up for duplicates */
998    
999                if (!condition)
1000                  {
1001                  slotB = slotA;
1002                  for (i++; i < md->name_count; i++)
1003                    {
1004                    slotB += md->name_entry_size;
1005                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1006                      {
1007                      condition = GET2(slotB, 0) == md->recursive->group_num;
1008                      if (condition) break;
1009                      }
1010                    else break;
1011                    }
1012                  }
1013                }
1014              }
1015    
1016            /* Chose branch according to the condition */
1017    
1018            ecode += condition? 3 : GET(ecode, 1);
1019            }
1020          }
1021    
1022        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1023        {        {
1024        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1025        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
1026          (md->recursive != NULL) :  
1027          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
1028        ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));        scan the table to see if the name refers to any other numbers, and test
1029        flags = match_isgroup;        them. The condition is true if any one is set. This is tediously similar
1030        goto TAIL_RECURSE;        to the code above, but not close enough to try to amalgamate. */
1031    
1032          if (!condition && condcode == OP_NCREF)
1033            {
1034            int refno = offset >> 1;
1035            uschar *slotA = md->name_table;
1036    
1037            for (i = 0; i < md->name_count; i++)
1038              {
1039              if (GET2(slotA, 0) == refno) break;
1040              slotA += md->name_entry_size;
1041              }
1042    
1043            /* Found a name for the number - there can be only one; duplicate names
1044            for different numbers are allowed, but not vice versa. First scan down
1045            for duplicates. */
1046    
1047            if (i < md->name_count)
1048              {
1049              uschar *slotB = slotA;
1050              while (slotB > md->name_table)
1051                {
1052                slotB -= md->name_entry_size;
1053                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054                  {
1055                  offset = GET2(slotB, 0) << 1;
1056                  condition = offset < offset_top &&
1057                    md->offset_vector[offset] >= 0;
1058                  if (condition) break;
1059                  }
1060                else break;
1061                }
1062    
1063              /* Scan up for duplicates */
1064    
1065              if (!condition)
1066                {
1067                slotB = slotA;
1068                for (i++; i < md->name_count; i++)
1069                  {
1070                  slotB += md->name_entry_size;
1071                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1072                    {
1073                    offset = GET2(slotB, 0) << 1;
1074                    condition = offset < offset_top &&
1075                      md->offset_vector[offset] >= 0;
1076                    if (condition) break;
1077                    }
1078                  else break;
1079                  }
1080                }
1081              }
1082            }
1083    
1084          /* Chose branch according to the condition */
1085    
1086          ecode += condition? 3 : GET(ecode, 1);
1087          }
1088    
1089        else if (condcode == OP_DEF)     /* DEFINE - always false */
1090          {
1091          condition = FALSE;
1092          ecode += GET(ecode, 1);
1093        }        }
1094    
1095      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1096      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1097        assertion. */
1098    
1099      else      else
1100        {        {
1101        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1102            match_condassert | match_isgroup);            match_condassert, RM3);
1103        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1104          {          {
1105          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1106            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1107          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1108          }          }
1109        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH &&
1110                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1111          {          {
1112          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1113          }          }
1114        else ecode += GET(ecode, 1);        else
1115            {
1116            condition = FALSE;
1117            ecode += codelink;
1118            }
1119          }
1120    
1121        /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1122        we can use tail recursion to avoid using another stack frame. */      we can use tail recursion to avoid using another stack frame, except when
1123        match_cbegroup is required for an unlimited repeat of a possibly empty
1124        group. If the second alternative doesn't exist, we can just plough on. */
1125    
1126        if (condition || *ecode == OP_ALT)
1127          {
1128        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1129        flags = match_isgroup;        if (op == OP_SCOND)        /* Possibly empty group */
1130        goto TAIL_RECURSE;          {
1131            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1132            RRETURN(rrc);
1133            }
1134          else                       /* Group must match something */
1135            {
1136            flags = 0;
1137            goto TAIL_RECURSE;
1138            }
1139        }        }
1140      /* Control never reaches here */      else                         /* Condition false & no alternative */
1141          {
1142          ecode += 1 + LINK_SIZE;
1143          }
1144        break;
1145    
1146    
1147        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1148        to close any currently open capturing brackets. */
1149    
1150        case OP_CLOSE:
1151        number = GET2(ecode, 1);
1152        offset = number << 1;
1153    
1154      /* Skip over conditional reference or large extraction number data if  #ifdef PCRE_DEBUG
1155      encountered. */        printf("end bracket %d at *ACCEPT", number);
1156          printf("\n");
1157    #endif
1158    
1159      case OP_CREF:      md->capture_last = number;
1160      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1161          {
1162          md->offset_vector[offset] =
1163            md->offset_vector[md->offset_end - number];
1164          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1165          if (offset_top <= offset) offset_top = offset + 2;
1166          }
1167      ecode += 3;      ecode += 3;
1168      break;      break;
1169    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1170    
1171        /* End of the pattern, either real or forced. If we are in a top-level
1172        recursion, we should restore the offsets appropriately and continue from
1173        after the call. */
1174    
1175        case OP_ACCEPT:
1176      case OP_END:      case OP_END:
1177      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1178        {        {
# Line 745  for (;;) Line 1181  for (;;)
1181        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1182        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1183          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1184        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1185        ims = original_ims;        ims = original_ims;
1186        ecode = rec->after_call;        ecode = rec->after_call;
1187        break;        break;
1188        }        }
1189    
1190      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1191      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1192        the subject. In both cases, backtracking will then try other alternatives,
1193        if any. */
1194    
1195        if (eptr == mstart &&
1196            (md->notempty ||
1197              (md->notempty_atstart &&
1198                mstart == md->start_subject + md->start_offset)))
1199          MRRETURN(MATCH_NOMATCH);
1200    
1201        /* Otherwise, we have a match. */
1202    
1203        md->end_match_ptr = eptr;           /* Record where we ended */
1204        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1205        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1206    
1207      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      /* For some reason, the macros don't work properly if an expression is
1208      md->end_match_ptr = eptr;          /* Record where we ended */      given as the argument to MRRETURN when the heap is in use. */
1209      md->end_offset_top = offset_top;   /* and how many extracts were taken */  
1210      RRETURN(MATCH_MATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1211        MRRETURN(rrc);
1212    
1213      /* Change option settings */      /* Change option settings */
1214    
# Line 777  for (;;) Line 1228  for (;;)
1228      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1229      do      do
1230        {        {
1231        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1232          match_isgroup);          RM4);
1233        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1234        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1235            mstart = md->start_match_ptr;   /* In case \K reset it */
1236            break;
1237            }
1238          if (rrc != MATCH_NOMATCH &&
1239              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1240            RRETURN(rrc);
1241        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1242        }        }
1243      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1244      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1245    
1246      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1247    
# Line 798  for (;;) Line 1255  for (;;)
1255      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1256      continue;      continue;
1257    
1258      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1259        PRUNE, or COMMIT means we must assume failure without checking subsequent
1260        branches. */
1261    
1262      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1263      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1264      do      do
1265        {        {
1266        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1267          match_isgroup);          RM5);
1268        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1269        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1270            {
1271            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1272            break;
1273            }
1274          if (rrc != MATCH_NOMATCH &&
1275              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1276            RRETURN(rrc);
1277        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1278        }        }
1279      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 826  for (;;) Line 1292  for (;;)
1292  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1293      if (utf8)      if (utf8)
1294        {        {
1295        c = GET(ecode,1);        i = GET(ecode, 1);
1296        for (i = 0; i < c; i++)        while (i-- > 0)
1297          {          {
1298          eptr--;          eptr--;
1299          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1300          BACKCHAR(eptr)          BACKCHAR(eptr);
1301          }          }
1302        }        }
1303      else      else
# Line 840  for (;;) Line 1306  for (;;)
1306      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1307    
1308        {        {
1309        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1310        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1311        }        }
1312    
1313      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1314    
1315        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1316      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1317      break;      break;
1318    
# Line 861  for (;;) Line 1328  for (;;)
1328        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1329        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1330        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1331        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1332        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1333        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1334        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1335        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1336        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1337        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1338        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1339        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1340        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1341        }        }
1342      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 897  for (;;) Line 1364  for (;;)
1364      case OP_RECURSE:      case OP_RECURSE:
1365        {        {
1366        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1367        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1368            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1369    
1370        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1371    
# Line 929  for (;;) Line 1391  for (;;)
1391    
1392        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1393              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1394        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1395    
1396        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1397        restore the offset and recursion data. */        restore the offset and recursion data. */
1398    
1399        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1400          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1401        do        do
1402          {          {
1403          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1404              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1405          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1406            {            {
1407            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1408            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1409            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1410              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1411            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1412            }            }
1413          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH &&
1414                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415            {            {
1416            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1417              if (new_recursive.offset_save != stacksave)
1418                (pcre_free)(new_recursive.offset_save);
1419            RRETURN(rrc);            RRETURN(rrc);
1420            }            }
1421    
# Line 965  for (;;) Line 1430  for (;;)
1430        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1431        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1432          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1433        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1434        }        }
1435      /* Control never reaches here */      /* Control never reaches here */
1436    
# Line 974  for (;;) Line 1439  for (;;)
1439      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1440      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1441      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1442      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1443        the start-of-match value in case it was changed by \K. */
1444    
1445      case OP_ONCE:      case OP_ONCE:
1446      prev = ecode;      prev = ecode;
# Line 982  for (;;) Line 1448  for (;;)
1448    
1449      do      do
1450        {        {
1451        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1452          eptrb, match_isgroup);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1453        if (rrc == MATCH_MATCH) break;          {
1454        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1455            break;
1456            }
1457          if (rrc != MATCH_NOMATCH &&
1458              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1459            RRETURN(rrc);
1460        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1461        }        }
1462      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 997  for (;;) Line 1468  for (;;)
1468      /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1469      mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1470    
1471      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1472    
1473      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1474      eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
# Line 1028  for (;;) Line 1499  for (;;)
1499    
1500      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1501        {        {
1502        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1503        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1504        ecode = prev;        ecode = prev;
1505        flags = match_isgroup;        flags = 0;
1506        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1507        }        }
1508      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1509        {        {
1510        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1511        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1512        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1513        flags = 0;        flags = 0;
# Line 1051  for (;;) Line 1522  for (;;)
1522      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1523      break;      break;
1524    
1525      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1526      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1527      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1528      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1529      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1530    
1531      case OP_BRAZERO:      case OP_BRAZERO:
1532        {        {
1533        next = ecode+1;        next = ecode+1;
1534        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1535        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1536        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1537        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1538        }        }
1539      break;      break;
1540    
1541      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1542        {        {
1543        next = ecode+1;        next = ecode+1;
1544        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1545        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1546        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1547        ecode++;        ecode++;
1548        }        }
1549      break;      break;
1550    
1551      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1552      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1553      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1554      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1555          ecode = next + 1 + LINK_SIZE;
1556          }
1557        break;
1558    
1559        /* End of a group, repeated or non-repeating. */
1560    
1561      case OP_KET:      case OP_KET:
1562      case OP_KETRMIN:      case OP_KETRMIN:
1563      case OP_KETRMAX:      case OP_KETRMAX:
1564      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
     saved_eptr = eptrb->epb_saved_eptr;  
1565    
1566      /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1567        infinite repeats of empty string matches, retrieve the subject start from
1568        the chain. Otherwise, set it NULL. */
1569    
1570        if (*prev >= OP_SBRA)
1571          {
1572          saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1573          eptrb = eptrb->epb_prev;              /* Backup to previous group */
1574          }
1575        else saved_eptr = NULL;
1576    
1577      eptrb = eptrb->epb_prev;      /* If we are at the end of an assertion group or an atomic group, stop
1578        matching and return MATCH_MATCH, but record the current high water mark for
1579        use by positive assertions. We also need to record the match start in case
1580        it was changed by \K. */
1581    
1582      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1583          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1099  for (;;) Line 1585  for (;;)
1585        {        {
1586        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1587        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1588        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1589          MRRETURN(MATCH_MATCH);
1590        }        }
1591    
1592      /* In all other cases except a conditional group we have to check the      /* For capturing groups we have to check the group number back at the start
1593      group number back at the start and if necessary complete handling an      and if necessary complete handling an extraction by setting the offsets and
1594      extraction by setting the offsets and bumping the high water mark. */      bumping the high water mark. Note that whole-pattern recursion is coded as
1595        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1596        when the OP_END is reached. Other recursion is handled here. */
1597    
1598      if (*prev != OP_COND)      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1599        {        {
1600        number = *prev - OP_BRA;        number = GET2(prev, 1+LINK_SIZE);
   
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);  
1601        offset = number << 1;        offset = number << 1;
1602    
1603  #ifdef DEBUG  #ifdef PCRE_DEBUG
1604        printf("end bracket %d", number);        printf("end bracket %d", number);
1605        printf("\n");        printf("\n");
1606  #endif  #endif
1607    
1608        /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1609        of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       into group 0, so it won't be picked up here. Instead, we catch it when  
       the OP_END is reached. */  
   
       if (number > 0)  
1610          {          {
1611          md->capture_last = number;          md->offset_vector[offset] =
1612          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            md->offset_vector[md->offset_end - number];
1613            {          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1614            md->offset_vector[offset] =          if (offset_top <= offset) offset_top = offset + 2;
1615              md->offset_vector[md->offset_end - number];          }
1616            md->offset_vector[offset+1] = eptr - md->start_subject;  
1617            if (offset_top <= offset) offset_top = offset + 2;        /* Handle a recursively called group. Restore the offsets
1618            }        appropriately and continue from after the call. */
1619    
1620          /* Handle a recursively called group. Restore the offsets        if (md->recursive != NULL && md->recursive->group_num == number)
1621          appropriately and continue from after the call. */          {
1622            recursion_info *rec = md->recursive;
1623          if (md->recursive != NULL && md->recursive->group_num == number)          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1624            {          md->recursive = rec->prevrec;
1625            recursion_info *rec = md->recursive;          memcpy(md->offset_vector, rec->offset_save,
1626            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));            rec->saved_max * sizeof(int));
1627            md->recursive = rec->prevrec;          offset_top = rec->save_offset_top;
1628            md->start_match = rec->save_start;          ecode = rec->after_call;
1629            memcpy(md->offset_vector, rec->offset_save,          ims = original_ims;
1630              rec->saved_max * sizeof(int));          break;
           ecode = rec->after_call;  
           ims = original_ims;  
           break;  
           }  
1631          }          }
1632        }        }
1633    
1634      /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1635      the group. */      flags, in case they got changed during the group. */
1636    
1637      ims = original_ims;      ims = original_ims;
1638      DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
# Line 1175  for (;;) Line 1651  for (;;)
1651    
1652      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1653      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1654      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1655        unlimited repeat of a group that can match an empty string. */
1656    
1657        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1658    
1659      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1660        {        {
1661        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1662        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1663          if (flags != 0)    /* Could match an empty string */
1664            {
1665            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1666            RRETURN(rrc);
1667            }
1668        ecode = prev;        ecode = prev;
       flags = match_isgroup;  
1669        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1670        }        }
1671      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1672        {        {
1673        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1674        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1675        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1676        flags = 0;        flags = 0;
# Line 1198  for (;;) Line 1681  for (;;)
1681      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1682    
1683      case OP_CIRC:      case OP_CIRC:
1684      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1685      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1686        {        {
1687        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1688            (eptr == md->end_subject ||            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1689             eptr < md->start_subject + md->nllen ||          MRRETURN(MATCH_NOMATCH);
            !IS_NEWLINE(eptr - md->nllen)))  
         RRETURN(MATCH_NOMATCH);  
1690        ecode++;        ecode++;
1691        break;        break;
1692        }        }
# Line 1214  for (;;) Line 1695  for (;;)
1695      /* Start of subject assertion */      /* Start of subject assertion */
1696    
1697      case OP_SOD:      case OP_SOD:
1698      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1699      ecode++;      ecode++;
1700      break;      break;
1701    
1702      /* Start of match assertion */      /* Start of match assertion */
1703    
1704      case OP_SOM:      case OP_SOM:
1705      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1706        ecode++;
1707        break;
1708    
1709        /* Reset the start of match point */
1710    
1711        case OP_SET_SOM:
1712        mstart = eptr;
1713      ecode++;      ecode++;
1714      break;      break;
1715    
# Line 1232  for (;;) Line 1720  for (;;)
1720      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1721        {        {
1722        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1723          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1724        else        else
1725          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          {
1726            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1727            SCHECK_PARTIAL();
1728            }
1729        ecode++;        ecode++;
1730        break;        break;
1731        }        }
1732      else      else  /* Not multiline */
1733        {        {
1734        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1735        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))  
           RRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1736        }        }
1737    
1738      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1739    
1740      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1741    
1742      case OP_EOD:      case OP_EOD:
1743      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1744        SCHECK_PARTIAL();
1745      ecode++;      ecode++;
1746      break;      break;
1747    
1748      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1749    
1750      case OP_EODN:      case OP_EODN:
1751      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1752          (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))      if (eptr < md->end_subject &&
1753        RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1754          MRRETURN(MATCH_NOMATCH);
1755    
1756        /* Either at end of string or \n before end. */
1757    
1758        SCHECK_PARTIAL();
1759      ecode++;      ecode++;
1760      break;      break;
1761    
# Line 1276  for (;;) Line 1767  for (;;)
1767    
1768        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1769        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1770        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1771          partial matching. */
1772    
1773  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1774        if (utf8)        if (utf8)
1775          {          {
1776            /* Get status of previous character */
1777    
1778          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1779            {            {
1780            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1781            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1782              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1783            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1784    #ifdef SUPPORT_UCP
1785              if (md->use_ucp)
1786                {
1787                if (c == '_') prev_is_word = TRUE; else
1788                  {
1789                  int cat = UCD_CATEGORY(c);
1790                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1791                  }
1792                }
1793              else
1794    #endif
1795            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1796            }            }
1797          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1798            /* Get status of next character */
1799    
1800            if (eptr >= md->end_subject)
1801              {
1802              SCHECK_PARTIAL();
1803              cur_is_word = FALSE;
1804              }
1805            else
1806            {            {
1807            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1808    #ifdef SUPPORT_UCP
1809              if (md->use_ucp)
1810                {
1811                if (c == '_') cur_is_word = TRUE; else
1812                  {
1813                  int cat = UCD_CATEGORY(c);
1814                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1815                  }
1816                }
1817              else
1818    #endif
1819            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1820            }            }
1821          }          }
1822        else        else
1823  #endif  #endif
1824    
1825        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1826          consistency with the behaviour of \w we do use it in this case. */
1827    
1828          {          {
1829          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1830            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1831          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1832            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1833          }            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1834    #ifdef SUPPORT_UCP
1835        /* Now see if the situation is what we want */            if (md->use_ucp)
1836                {
1837        if ((*ecode++ == OP_WORD_BOUNDARY)?              c = eptr[-1];
1838             cur_is_word == prev_is_word : cur_is_word != prev_is_word)              if (c == '_') prev_is_word = TRUE; else
1839          RRETURN(MATCH_NOMATCH);                {
1840        }                int cat = UCD_CATEGORY(c);
1841      break;                prev_is_word = (cat == ucp_L || cat == ucp_N);
1842                  }
1843                }
1844              else
1845    #endif
1846              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1847              }
1848    
1849            /* Get status of next character */
1850    
1851            if (eptr >= md->end_subject)
1852              {
1853              SCHECK_PARTIAL();
1854              cur_is_word = FALSE;
1855              }
1856            else
1857    #ifdef SUPPORT_UCP
1858            if (md->use_ucp)
1859              {
1860              c = *eptr;
1861              if (c == '_') cur_is_word = TRUE; else
1862                {
1863                int cat = UCD_CATEGORY(c);
1864                cur_is_word = (cat == ucp_L || cat == ucp_N);
1865                }
1866              }
1867            else
1868    #endif
1869            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1870            }
1871    
1872          /* Now see if the situation is what we want */
1873    
1874          if ((*ecode++ == OP_WORD_BOUNDARY)?
1875               cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1876            MRRETURN(MATCH_NOMATCH);
1877          }
1878        break;
1879    
1880      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1881    
1882      case OP_ANY:      case OP_ANY:
1883      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1884        /* Fall through */
1885    
1886        case OP_ALLANY:
1887        if (eptr++ >= md->end_subject)
1888        {        {
1889        if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))        SCHECK_PARTIAL();
1890          RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1891        }        }
1892      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1893      ecode++;      ecode++;
1894      break;      break;
1895    
# Line 1332  for (;;) Line 1897  for (;;)
1897      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1898    
1899      case OP_ANYBYTE:      case OP_ANYBYTE:
1900      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1901          {
1902          SCHECK_PARTIAL();
1903          MRRETURN(MATCH_NOMATCH);
1904          }
1905      ecode++;      ecode++;
1906      break;      break;
1907    
1908      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1909      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1910          {
1911          SCHECK_PARTIAL();
1912          MRRETURN(MATCH_NOMATCH);
1913          }
1914      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1915      if (      if (
1916  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1345  for (;;) Line 1918  for (;;)
1918  #endif  #endif
1919         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1920         )         )
1921        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1922      ecode++;      ecode++;
1923      break;      break;
1924    
1925      case OP_DIGIT:      case OP_DIGIT:
1926      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1927          {
1928          SCHECK_PARTIAL();
1929          MRRETURN(MATCH_NOMATCH);
1930          }
1931      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1932      if (      if (
1933  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1358  for (;;) Line 1935  for (;;)
1935  #endif  #endif
1936         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1937         )         )
1938        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1939      ecode++;      ecode++;
1940      break;      break;
1941    
1942      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1943      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1944          {
1945          SCHECK_PARTIAL();
1946          MRRETURN(MATCH_NOMATCH);
1947          }
1948      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1949      if (      if (
1950  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1371  for (;;) Line 1952  for (;;)
1952  #endif  #endif
1953         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1954         )         )
1955        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1956      ecode++;      ecode++;
1957      break;      break;
1958    
1959      case OP_WHITESPACE:      case OP_WHITESPACE:
1960      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1961          {
1962          SCHECK_PARTIAL();
1963          MRRETURN(MATCH_NOMATCH);
1964          }
1965      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1966      if (      if (
1967  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1384  for (;;) Line 1969  for (;;)
1969  #endif  #endif
1970         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1971         )         )
1972        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1973      ecode++;      ecode++;
1974      break;      break;
1975    
1976      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1977      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1978          {
1979          SCHECK_PARTIAL();
1980          MRRETURN(MATCH_NOMATCH);
1981          }
1982      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1983      if (      if (
1984  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1397  for (;;) Line 1986  for (;;)
1986  #endif  #endif
1987         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1988         )         )
1989        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1990      ecode++;      ecode++;
1991      break;      break;
1992    
1993      case OP_WORDCHAR:      case OP_WORDCHAR:
1994      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1995          {
1996          SCHECK_PARTIAL();
1997          MRRETURN(MATCH_NOMATCH);
1998          }
1999      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2000      if (      if (
2001  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1410  for (;;) Line 2003  for (;;)
2003  #endif  #endif
2004         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2005         )         )
2006        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2007        ecode++;
2008        break;
2009    
2010        case OP_ANYNL:
2011        if (eptr >= md->end_subject)
2012          {
2013          SCHECK_PARTIAL();
2014          MRRETURN(MATCH_NOMATCH);
2015          }
2016        GETCHARINCTEST(c, eptr);
2017        switch(c)
2018          {
2019          default: MRRETURN(MATCH_NOMATCH);
2020    
2021          case 0x000d:
2022          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2023          break;
2024    
2025          case 0x000a:
2026          break;
2027    
2028          case 0x000b:
2029          case 0x000c:
2030          case 0x0085:
2031          case 0x2028:
2032          case 0x2029:
2033          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2034          break;
2035          }
2036        ecode++;
2037        break;
2038    
2039        case OP_NOT_HSPACE:
2040        if (eptr >= md->end_subject)
2041          {
2042          SCHECK_PARTIAL();
2043          MRRETURN(MATCH_NOMATCH);
2044          }
2045        GETCHARINCTEST(c, eptr);
2046        switch(c)
2047          {
2048          default: break;
2049          case 0x09:      /* HT */
2050          case 0x20:      /* SPACE */
2051          case 0xa0:      /* NBSP */
2052          case 0x1680:    /* OGHAM SPACE MARK */
2053          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2054          case 0x2000:    /* EN QUAD */
2055          case 0x2001:    /* EM QUAD */
2056          case 0x2002:    /* EN SPACE */
2057          case 0x2003:    /* EM SPACE */
2058          case 0x2004:    /* THREE-PER-EM SPACE */
2059          case 0x2005:    /* FOUR-PER-EM SPACE */
2060          case 0x2006:    /* SIX-PER-EM SPACE */
2061          case 0x2007:    /* FIGURE SPACE */
2062          case 0x2008:    /* PUNCTUATION SPACE */
2063          case 0x2009:    /* THIN SPACE */
2064          case 0x200A:    /* HAIR SPACE */
2065          case 0x202f:    /* NARROW NO-BREAK SPACE */
2066          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2067          case 0x3000:    /* IDEOGRAPHIC SPACE */
2068          MRRETURN(MATCH_NOMATCH);
2069          }
2070        ecode++;
2071        break;
2072    
2073        case OP_HSPACE:
2074        if (eptr >= md->end_subject)
2075          {
2076          SCHECK_PARTIAL();
2077          MRRETURN(MATCH_NOMATCH);
2078          }
2079        GETCHARINCTEST(c, eptr);
2080        switch(c)
2081          {
2082          default: MRRETURN(MATCH_NOMATCH);
2083          case 0x09:      /* HT */
2084          case 0x20:      /* SPACE */
2085          case 0xa0:      /* NBSP */
2086          case 0x1680:    /* OGHAM SPACE MARK */
2087          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2088          case 0x2000:    /* EN QUAD */
2089          case 0x2001:    /* EM QUAD */
2090          case 0x2002:    /* EN SPACE */
2091          case 0x2003:    /* EM SPACE */
2092          case 0x2004:    /* THREE-PER-EM SPACE */
2093          case 0x2005:    /* FOUR-PER-EM SPACE */
2094          case 0x2006:    /* SIX-PER-EM SPACE */
2095          case 0x2007:    /* FIGURE SPACE */
2096          case 0x2008:    /* PUNCTUATION SPACE */
2097          case 0x2009:    /* THIN SPACE */
2098          case 0x200A:    /* HAIR SPACE */
2099          case 0x202f:    /* NARROW NO-BREAK SPACE */
2100          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2101          case 0x3000:    /* IDEOGRAPHIC SPACE */
2102          break;
2103          }
2104        ecode++;
2105        break;
2106    
2107        case OP_NOT_VSPACE:
2108        if (eptr >= md->end_subject)
2109          {
2110          SCHECK_PARTIAL();
2111          MRRETURN(MATCH_NOMATCH);
2112          }
2113        GETCHARINCTEST(c, eptr);
2114        switch(c)
2115          {
2116          default: break;
2117          case 0x0a:      /* LF */
2118          case 0x0b:      /* VT */
2119          case 0x0c:      /* FF */
2120          case 0x0d:      /* CR */
2121          case 0x85:      /* NEL */
2122          case 0x2028:    /* LINE SEPARATOR */
2123          case 0x2029:    /* PARAGRAPH SEPARATOR */
2124          MRRETURN(MATCH_NOMATCH);
2125          }
2126        ecode++;
2127        break;
2128    
2129        case OP_VSPACE:
2130        if (eptr >= md->end_subject)
2131          {
2132          SCHECK_PARTIAL();
2133          MRRETURN(MATCH_NOMATCH);
2134          }
2135        GETCHARINCTEST(c, eptr);
2136        switch(c)
2137          {
2138          default: MRRETURN(MATCH_NOMATCH);
2139          case 0x0a:      /* LF */
2140          case 0x0b:      /* VT */
2141          case 0x0c:      /* FF */
2142          case 0x0d:      /* CR */
2143          case 0x85:      /* NEL */
2144          case 0x2028:    /* LINE SEPARATOR */
2145          case 0x2029:    /* PARAGRAPH SEPARATOR */
2146          break;
2147          }
2148      ecode++;      ecode++;
2149      break;      break;
2150    
# Line 1420  for (;;) Line 2154  for (;;)
2154    
2155      case OP_PROP:      case OP_PROP:
2156      case OP_NOTPROP:      case OP_NOTPROP:
2157      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2158          {
2159          SCHECK_PARTIAL();
2160          MRRETURN(MATCH_NOMATCH);
2161          }
2162      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2163        {        {
2164        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2165    
2166        switch(ecode[1])        switch(ecode[1])
2167          {          {
2168          case PT_ANY:          case PT_ANY:
2169          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2170          break;          break;
2171    
2172          case PT_LAMP:          case PT_LAMP:
2173          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2174               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2175               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2176            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2177           break;          break;
2178    
2179          case PT_GC:          case PT_GC:
2180          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2181            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2182          break;          break;
2183    
2184          case PT_PC:          case PT_PC:
2185          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2186            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2187          break;          break;
2188    
2189          case PT_SC:          case PT_SC:
2190          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2191            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2192            break;
2193    
2194            /* These are specials */
2195    
2196            case PT_ALNUM:
2197            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2198                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2199              MRRETURN(MATCH_NOMATCH);
2200            break;
2201    
2202            case PT_SPACE:    /* Perl space */
2203            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2205                   == (op == OP_NOTPROP))
2206              MRRETURN(MATCH_NOMATCH);
2207            break;
2208    
2209            case PT_PXSPACE:  /* POSIX space */
2210            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2211                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2212                 c == CHAR_FF || c == CHAR_CR)
2213                   == (op == OP_NOTPROP))
2214              MRRETURN(MATCH_NOMATCH);
2215            break;
2216    
2217            case PT_WORD:
2218            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2219                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2220                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2221              MRRETURN(MATCH_NOMATCH);
2222          break;          break;
2223    
2224            /* This should never occur */
2225    
2226          default:          default:
2227          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
2228          }          }
2229    
2230        ecode += 3;        ecode += 3;
# Line 1467  for (;;) Line 2235  for (;;)
2235      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2236    
2237      case OP_EXTUNI:      case OP_EXTUNI:
2238      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2239          {
2240          SCHECK_PARTIAL();
2241          MRRETURN(MATCH_NOMATCH);
2242          }
2243      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2244        {        {
2245        int chartype, script;        int category = UCD_CATEGORY(c);
2246        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2247        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2248          {          {
2249          int len = 1;          int len = 1;
# Line 1480  for (;;) Line 2251  for (;;)
2251            {            {
2252            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2253            }            }
2254          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2255          if (category != ucp_M) break;          if (category != ucp_M) break;
2256          eptr += len;          eptr += len;
2257          }          }
# Line 1499  for (;;) Line 2270  for (;;)
2270      loops). */      loops). */
2271    
2272      case OP_REF:      case OP_REF:
2273        {      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2274        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      ecode += 3;
       ecode += 3;                                 /* Advance past item */  
2275    
2276        /* If the reference is unset, set the length to be longer than the amount      /* If the reference is unset, there are two possibilities:
       of subject left; this ensures that every attempt at a match fails. We  
       can't just fail here, because of the possibility of quantifiers with zero  
       minima. */  
   
       length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
         md->end_subject - eptr + 1 :  
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2277    
2278        /* Set up for repetition, or handle the non-repeated case */      (a) In the default, Perl-compatible state, set the length negative;
2279        this ensures that every attempt at a match fails. We can't just fail
2280        here, because of the possibility of quantifiers with zero minima.
2281    
2282        switch (*ecode)      (b) If the JavaScript compatibility flag is set, set the length to zero
2283          {      so that the back reference matches an empty string.
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         c = *ecode++ - OP_CRSTAR;  
         minimize = (c & 1) != 0;  
         min = rep_min[c];                 /* Pick up values from tables; */  
         max = rep_max[c];                 /* zero for max => infinity */  
         if (max == 0) max = INT_MAX;  
         break;  
2284    
2285          case OP_CRRANGE:      Otherwise, set the length to the length of what was matched by the
2286          case OP_CRMINRANGE:      referenced subpattern. */
         minimize = (*ecode == OP_CRMINRANGE);  
         min = GET2(ecode, 1);  
         max = GET2(ecode, 3);  
         if (max == 0) max = INT_MAX;  
         ecode += 5;  
         break;  
2287    
2288          default:               /* No repeat follows */      if (offset >= offset_top || md->offset_vector[offset] < 0)
2289          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);        length = (md->jscript_compat)? 0 : -1;
2290          eptr += length;      else
2291          continue;              /* With the main loop */        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2292    
2293        /* Set up for repetition, or handle the non-repeated case */
2294    
2295        switch (*ecode)
2296          {
2297          case OP_CRSTAR:
2298          case OP_CRMINSTAR:
2299          case OP_CRPLUS:
2300          case OP_CRMINPLUS:
2301          case OP_CRQUERY:
2302          case OP_CRMINQUERY:
2303          c = *ecode++ - OP_CRSTAR;
2304          minimize = (c & 1) != 0;
2305          min = rep_min[c];                 /* Pick up values from tables; */
2306          max = rep_max[c];                 /* zero for max => infinity */
2307          if (max == 0) max = INT_MAX;
2308          break;
2309    
2310          case OP_CRRANGE:
2311          case OP_CRMINRANGE:
2312          minimize = (*ecode == OP_CRMINRANGE);
2313          min = GET2(ecode, 1);
2314          max = GET2(ecode, 3);
2315          if (max == 0) max = INT_MAX;
2316          ecode += 5;
2317          break;
2318    
2319          default:               /* No repeat follows */
2320          if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2321            {
2322            CHECK_PARTIAL();
2323            MRRETURN(MATCH_NOMATCH);
2324          }          }
2325          eptr += length;
2326          continue;              /* With the main loop */
2327          }
2328    
2329        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2330        main loop. */      zero, just continue with the main loop. */
2331    
2332        if (length == 0) continue;      if (length == 0) continue;
2333    
2334        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2335        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2336        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2337    
2338        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2339          {
2340          int slength;
2341          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2342          {          {
2343          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          CHECK_PARTIAL();
2344          eptr += length;          MRRETURN(MATCH_NOMATCH);
2345          }          }
2346          eptr += slength;
2347          }
2348    
2349        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2350        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2351    
2352        if (min == max) continue;      if (min == max) continue;
2353    
2354        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2355    
2356        if (minimize)      if (minimize)
2357          {
2358          for (fi = min;; fi++)
2359          {          {
2360          for (fi = min;; fi++)          int slength;
2361            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2362            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2363            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2364            if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2365            {            {
2366            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            CHECK_PARTIAL();
2367            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max || !match_ref(offset, eptr, length, md, ims))  
             RRETURN(MATCH_NOMATCH);  
           eptr += length;  
2368            }            }
2369          /* Control never gets here */          eptr += slength;
2370          }          }
2371          /* Control never gets here */
2372          }
2373    
2374        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2375    
2376        else      else
2377          {
2378          pp = eptr;
2379          for (i = min; i < max; i++)
2380          {          {
2381          pp = eptr;          int slength;
2382          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
           {  
           if (!match_ref(offset, eptr, length, md, ims)) break;  
           eptr += length;  
           }  
         while (eptr >= pp)  
2383            {            {
2384            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            CHECK_PARTIAL();
2385            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            break;
           eptr -= length;  
2386            }            }
2387          RRETURN(MATCH_NOMATCH);          eptr += slength;
2388            }
2389          while (eptr >= pp)
2390            {
2391            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2392            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2393            eptr -= length;
2394          }          }
2395          MRRETURN(MATCH_NOMATCH);
2396        }        }
2397      /* Control never gets here */      /* Control never gets here */
2398    
   
   
2399      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2400      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2401      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1656  for (;;) Line 2450  for (;;)
2450          {          {
2451          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2452            {            {
2453            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2454                {
2455                SCHECK_PARTIAL();
2456                MRRETURN(MATCH_NOMATCH);
2457                }
2458            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2459            if (c > 255)            if (c > 255)
2460              {              {
2461              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2462              }              }
2463            else            else
2464              {              {
2465              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2466              }              }
2467            }            }
2468          }          }
# Line 1674  for (;;) Line 2472  for (;;)
2472          {          {
2473          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2474            {            {
2475            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2476                {
2477                SCHECK_PARTIAL();
2478                MRRETURN(MATCH_NOMATCH);
2479                }
2480            c = *eptr++;            c = *eptr++;
2481            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2482            }            }
2483          }          }
2484    
# Line 1696  for (;;) Line 2498  for (;;)
2498            {            {
2499            for (fi = min;; fi++)            for (fi = min;; fi++)
2500              {              {
2501              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2502              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2503              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2504                if (eptr >= md->end_subject)
2505                  {
2506                  SCHECK_PARTIAL();
2507                  MRRETURN(MATCH_NOMATCH);
2508                  }
2509              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2510              if (c > 255)              if (c > 255)
2511                {                {
2512                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2513                }                }
2514              else              else
2515                {                {
2516                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2517                }                }
2518              }              }
2519            }            }
# Line 1716  for (;;) Line 2523  for (;;)
2523            {            {
2524            for (fi = min;; fi++)            for (fi = min;; fi++)
2525              {              {
2526              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2527              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2528              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2529                if (eptr >= md->end_subject)
2530                  {
2531                  SCHECK_PARTIAL();
2532                  MRRETURN(MATCH_NOMATCH);
2533                  }
2534              c = *eptr++;              c = *eptr++;
2535              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2536              }              }
2537            }            }
2538          /* Control never gets here */          /* Control never gets here */
# Line 1739  for (;;) Line 2551  for (;;)
2551            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2552              {              {
2553              int len = 1;              int len = 1;
2554              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2555                  {
2556                  SCHECK_PARTIAL();
2557                  break;
2558                  }
2559              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2560              if (c > 255)              if (c > 255)
2561                {                {
# Line 1753  for (;;) Line 2569  for (;;)
2569              }              }
2570            for (;;)            for (;;)
2571              {              {
2572              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2573              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2575              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1765  for (;;) Line 2581  for (;;)
2581            {            {
2582            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2583              {              {
2584              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2585                  {
2586                  SCHECK_PARTIAL();
2587                  break;
2588                  }
2589              c = *eptr;              c = *eptr;
2590              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2591              eptr++;              eptr++;
2592              }              }
2593            while (eptr >= pp)            while (eptr >= pp)
2594              {              {
2595              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2596              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2597              eptr--;              eptr--;
2598              }              }
2599            }            }
2600    
2601          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2602          }          }
2603        }        }
2604      /* Control never gets here */      /* Control never gets here */
2605    
2606    
2607      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2608      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2609        mode, because Unicode properties are supported in non-UTF-8 mode. */
2610    
2611  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2612      case OP_XCLASS:      case OP_XCLASS:
# Line 1826  for (;;) Line 2647  for (;;)
2647    
2648        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2649          {          {
2650          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2651          GETCHARINC(c, eptr);            {
2652          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2653              MRRETURN(MATCH_NOMATCH);
2654              }
2655            GETCHARINCTEST(c, eptr);
2656            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2657          }          }
2658    
2659        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1843  for (;;) Line 2668  for (;;)
2668          {          {
2669          for (fi = min;; fi++)          for (fi = min;; fi++)
2670            {            {
2671            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2672            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2673            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2674            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2675            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2676                SCHECK_PARTIAL();
2677                MRRETURN(MATCH_NOMATCH);
2678                }
2679              GETCHARINCTEST(c, eptr);
2680              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2681            }            }
2682          /* Control never gets here */          /* Control never gets here */
2683          }          }
# Line 1860  for (;;) Line 2690  for (;;)
2690          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2691            {            {
2692            int len = 1;            int len = 1;
2693            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2694            GETCHARLEN(c, eptr, len);              {
2695                SCHECK_PARTIAL();
2696                break;
2697                }
2698              GETCHARLENTEST(c, eptr, len);
2699            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2700            eptr += len;            eptr += len;
2701            }            }
2702          for(;;)          for(;;)
2703            {            {
2704            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2705            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2706            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2707            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2708            }            }
2709          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2710          }          }
2711    
2712        /* Control never gets here */        /* Control never gets here */
# Line 1888  for (;;) Line 2722  for (;;)
2722        length = 1;        length = 1;
2723        ecode++;        ecode++;
2724        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2725        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2726        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2727            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2728            MRRETURN(MATCH_NOMATCH);
2729            }
2730          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2731        }        }
2732      else      else
2733  #endif  #endif
2734    
2735      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2736        {        {
2737        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2738        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2739            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2740            MRRETURN(MATCH_NOMATCH);
2741            }
2742          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2743        ecode += 2;        ecode += 2;
2744        }        }
2745      break;      break;
# Line 1912  for (;;) Line 2754  for (;;)
2754        ecode++;        ecode++;
2755        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2756    
2757        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2758            {
2759            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2760            MRRETURN(MATCH_NOMATCH);
2761            }
2762    
2763        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2764        can use the fast lookup table. */        can use the fast lookup table. */
2765    
2766        if (fc < 128)        if (fc < 128)
2767          {          {
2768          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2769          }          }
2770    
2771        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2772    
2773        else        else
2774          {          {
2775          int dc;          unsigned int dc;
2776          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2777          ecode += length;          ecode += length;
2778    
# Line 1936  for (;;) Line 2782  for (;;)
2782          if (fc != dc)          if (fc != dc)
2783            {            {
2784  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2785            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2786  #endif  #endif
2787              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2788            }            }
2789          }          }
2790        }        }
# Line 1947  for (;;) Line 2793  for (;;)
2793    
2794      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2795        {        {
2796        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2797        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2798            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2799            MRRETURN(MATCH_NOMATCH);
2800            }
2801          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2802        ecode += 2;        ecode += 2;
2803        }        }
2804      break;      break;
2805    
2806      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2807    
2808      case OP_EXACT:      case OP_EXACT:
2809      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2810      ecode += 3;      ecode += 3;
2811      goto REPEATCHAR;      goto REPEATCHAR;
2812    
2813        case OP_POSUPTO:
2814        possessive = TRUE;
2815        /* Fall through */
2816    
2817      case OP_UPTO:      case OP_UPTO:
2818      case OP_MINUPTO:      case OP_MINUPTO:
2819      min = 0;      min = 0;
# Line 1968  for (;;) Line 2822  for (;;)
2822      ecode += 3;      ecode += 3;
2823      goto REPEATCHAR;      goto REPEATCHAR;
2824    
2825        case OP_POSSTAR:
2826        possessive = TRUE;
2827        min = 0;
2828        max = INT_MAX;
2829        ecode++;
2830        goto REPEATCHAR;
2831    
2832        case OP_POSPLUS:
2833        possessive = TRUE;
2834        min = 1;
2835        max = INT_MAX;
2836        ecode++;
2837        goto REPEATCHAR;
2838    
2839        case OP_POSQUERY:
2840        possessive = TRUE;
2841        min = 0;
2842        max = 1;
2843        ecode++;
2844        goto REPEATCHAR;
2845    
2846      case OP_STAR:      case OP_STAR:
2847      case OP_MINSTAR:      case OP_MINSTAR:
2848      case OP_PLUS:      case OP_PLUS:
# Line 1976  for (;;) Line 2851  for (;;)
2851      case OP_MINQUERY:      case OP_MINQUERY:
2852      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2853      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2854    
2855      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2856      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2857      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2858    
2859      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2860    
2861      REPEATCHAR:      REPEATCHAR:
2862  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1991  for (;;) Line 2865  for (;;)
2865        length = 1;        length = 1;
2866        charptr = ecode;        charptr = ecode;
2867        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2868        ecode += length;        ecode += length;
2869    
2870        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1999  for (;;) Line 2872  for (;;)
2872    
2873        if (length > 1)        if (length > 1)
2874          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2875  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2876          int othercase;          unsigned int othercase;
2877          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2878              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase >= 0)  
2879            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2880            else oclength = 0;
2881  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2882    
2883          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2884            {            {
2885            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2886            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2887            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2888              else if (oclength > 0 &&
2889                       eptr <= md->end_subject - oclength &&
2890                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2891    #endif  /* SUPPORT_UCP */
2892            else            else
2893              {              {
2894              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2895              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2896              }              }
2897            }            }
2898    
# Line 2028  for (;;) Line 2902  for (;;)
2902            {            {
2903            for (fi = min;; fi++)            for (fi = min;; fi++)
2904              {              {
2905              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2906              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2907              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2908              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2909              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2910              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2911                else if (oclength > 0 &&
2912                         eptr <= md->end_subject - oclength &&
2913                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2914    #endif  /* SUPPORT_UCP */
2915              else              else
2916                {                {
2917                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2918                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2919                }                }
2920              }              }
2921            /* Control never gets here */            /* Control never gets here */
2922            }            }
2923          else  
2924            else  /* Maximize */
2925            {            {
2926            pp = eptr;            pp = eptr;
2927            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2928              {              {
2929              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2930              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2931              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2932                else if (oclength > 0 &&
2933                         eptr <= md->end_subject - oclength &&
2934                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2935    #endif  /* SUPPORT_UCP */
2936              else              else
2937                {                {
2938                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2939                eptr += oclength;                break;
2940                }                }
2941              }              }
2942            while (eptr >= pp)  
2943             {            if (possessive) continue;
2944             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2945             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2946             eptr -= length;              {
2947             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2948            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2949                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2950    #ifdef SUPPORT_UCP
2951                eptr--;
2952                BACKCHAR(eptr);
2953    #else   /* without SUPPORT_UCP */
2954                eptr -= length;
2955    #endif  /* SUPPORT_UCP */
2956                }
2957            }            }
2958          /* Control never gets here */          /* Control never gets here */
2959          }          }
# Line 2075  for (;;) Line 2966  for (;;)
2966  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2967    
2968      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2969        {  
2970        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2971    
2972      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2973      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2096  for (;;) Line 2985  for (;;)
2985        {        {
2986        fc = md->lcc[fc];        fc = md->lcc[fc];
2987        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2988          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2989            if (eptr >= md->end_subject)
2990              {
2991              SCHECK_PARTIAL();
2992              MRRETURN(MATCH_NOMATCH);
2993              }
2994            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2995            }
2996        if (min == max) continue;        if (min == max) continue;
2997        if (minimize)        if (minimize)
2998          {          {
2999          for (fi = min;; fi++)          for (fi = min;; fi++)
3000            {            {
3001            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
3002            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3003            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3004                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
3005              RRETURN(MATCH_NOMATCH);              {
3006                SCHECK_PARTIAL();
3007                MRRETURN(MATCH_NOMATCH);
3008                }
3009              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3010            }            }
3011          /* Control never gets here */          /* Control never gets here */
3012          }          }
3013        else        else  /* Maximize */
3014          {          {
3015          pp = eptr;          pp = eptr;
3016          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3017            {            {
3018            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
3019                {
3020                SCHECK_PARTIAL();
3021                break;
3022                }
3023              if (fc != md->lcc[*eptr]) break;
3024            eptr++;            eptr++;
3025            }            }
3026    
3027            if (possessive) continue;
3028    
3029          while (eptr >= pp)          while (eptr >= pp)
3030            {            {
3031            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3032            eptr--;            eptr--;
3033            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3034            }            }
3035          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3036          }          }
3037        /* Control never gets here */        /* Control never gets here */
3038        }        }
# Line 2133  for (;;) Line 3041  for (;;)
3041    
3042      else      else
3043        {        {
3044        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3045            {
3046            if (eptr >= md->end_subject)
3047              {
3048              SCHECK_PARTIAL();
3049              MRRETURN(MATCH_NOMATCH);
3050              }
3051            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3052            }
3053    
3054        if (min == max) continue;        if (min == max) continue;
3055    
3056        if (minimize)        if (minimize)
3057          {          {
3058          for (fi = min;; fi++)          for (fi = min;; fi++)
3059            {            {
3060            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3061            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3062            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3063              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3064                {
3065                SCHECK_PARTIAL();
3066                MRRETURN(MATCH_NOMATCH);
3067                }
3068              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3069            }            }
3070          /* Control never gets here */          /* Control never gets here */
3071          }          }
3072        else        else  /* Maximize */
3073          {          {
3074          pp = eptr;          pp = eptr;
3075          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3076            {            {
3077            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3078                {
3079                SCHECK_PARTIAL();
3080                break;
3081                }
3082              if (fc != *eptr) break;
3083            eptr++;            eptr++;
3084            }            }
3085            if (possessive) continue;
3086    
3087          while (eptr >= pp)          while (eptr >= pp)
3088            {            {
3089            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3090            eptr--;            eptr--;
3091            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3092            }            }
3093          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3094          }          }
3095        }        }
3096      /* Control never gets here */      /* Control never gets here */
# Line 2169  for (;;) Line 3099  for (;;)
3099      checking can be multibyte. */      checking can be multibyte. */
3100    
3101      case OP_NOT:      case OP_NOT:
3102      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3103          {
3104          SCHECK_PARTIAL();
3105          MRRETURN(MATCH_NOMATCH);
3106          }
3107      ecode++;      ecode++;
3108      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3109      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2178  for (;;) Line 3112  for (;;)
3112        if (c < 256)        if (c < 256)
3113  #endif  #endif
3114        c = md->lcc[c];        c = md->lcc[c];
3115        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3116        }        }
3117      else      else
3118        {        {
3119        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3120        }        }
3121      break;      break;
3122    
# Line 2206  for (;;) Line 3140  for (;;)
3140      ecode += 3;      ecode += 3;
3141      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3142    
3143        case OP_NOTPOSSTAR:
3144        possessive = TRUE;
3145        min = 0;
3146        max = INT_MAX;
3147        ecode++;
3148        goto REPEATNOTCHAR;
3149    
3150        case OP_NOTPOSPLUS:
3151        possessive = TRUE;
3152        min = 1;
3153        max = INT_MAX;
3154        ecode++;
3155        goto REPEATNOTCHAR;
3156    
3157        case OP_NOTPOSQUERY:
3158        possessive = TRUE;
3159        min = 0;
3160        max = 1;
3161        ecode++;
3162        goto REPEATNOTCHAR;
3163    
3164        case OP_NOTPOSUPTO:
3165        possessive = TRUE;
3166        min = 0;
3167        max = GET2(ecode, 1);
3168        ecode += 3;
3169        goto REPEATNOTCHAR;
3170    
3171      case OP_NOTSTAR:      case OP_NOTSTAR:
3172      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3173      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2218  for (;;) Line 3180  for (;;)
3180      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3181      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3182    
3183      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3184    
3185      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3186      fc = *ecode++;      fc = *ecode++;
3187    
3188      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2245  for (;;) Line 3204  for (;;)
3204        /* UTF-8 mode */        /* UTF-8 mode */
3205        if (utf8)        if (utf8)
3206          {          {
3207          register int d;          register unsigned int d;
3208          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3209            {            {
3210              if (eptr >= md->end_subject)
3211                {
3212                SCHECK_PARTIAL();
3213                MRRETURN(MATCH_NOMATCH);
3214                }
3215            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3216            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3217            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3218            }            }
3219          }          }
3220        else        else
# Line 2259  for (;;) Line 3223  for (;;)
3223        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3224          {          {
3225          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3226            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3227              if (eptr >= md->end_subject)
3228                {
3229                SCHECK_PARTIAL();
3230                MRRETURN(MATCH_NOMATCH);
3231                }
3232              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3233              }
3234          }          }
3235    
3236        if (min == max) continue;        if (min == max) continue;
# Line 2270  for (;;) Line 3241  for (;;)
3241          /* UTF-8 mode */          /* UTF-8 mode */
3242          if (utf8)          if (utf8)
3243            {            {
3244            register int d;            register unsigned int d;
3245            for (fi = min;; fi++)            for (fi = min;; fi++)
3246              {              {
3247              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3248              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3249                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3250                if (eptr >= md->end_subject)
3251                  {
3252                  SCHECK_PARTIAL();
3253                  MRRETURN(MATCH_NOMATCH);
3254                  }
3255              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3256              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3257              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3258              }              }
3259            }            }
3260          else          else
# Line 2287  for (;;) Line 3263  for (;;)
3263            {            {
3264            for (fi = min;; fi++)            for (fi = min;; fi++)
3265              {              {
3266              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3267              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3268              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3269                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3270                  {
3271                  SCHECK_PARTIAL();
3272                  MRRETURN(MATCH_NOMATCH);
3273                  }
3274                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3275              }              }
3276            }            }
3277          /* Control never gets here */          /* Control never gets here */
# Line 2306  for (;;) Line 3287  for (;;)
3287          /* UTF-8 mode */          /* UTF-8 mode */
3288          if (utf8)          if (utf8)
3289            {            {
3290            register int d;            register unsigned int d;
3291            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3292              {              {
3293              int len = 1;              int len = 1;
3294              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3295                  {
3296                  SCHECK_PARTIAL();
3297                  break;
3298                  }
3299              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3300              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3301              if (fc == d) break;              if (fc == d) break;
3302              eptr += len;              eptr += len;
3303              }              }
3304            for(;;)          if (possessive) continue;
3305            for(;;)
3306              {              {
3307              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3308              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3309              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3310              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2330  for (;;) Line 3316  for (;;)
3316            {            {
3317            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3318              {              {
3319              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3320                  {
3321                  SCHECK_PARTIAL();
3322                  break;
3323                  }
3324                if (fc == md->lcc[*eptr]) break;
3325              eptr++;              eptr++;
3326              }              }
3327              if (possessive) continue;
3328            while (eptr >= pp)            while (eptr >= pp)
3329              {              {
3330              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3331              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3332              eptr--;              eptr--;
3333              }              }
3334            }            }
3335    
3336          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3337          }          }
3338        /* Control never gets here */        /* Control never gets here */
3339        }        }
# Line 2354  for (;;) Line 3346  for (;;)
3346        /* UTF-8 mode */        /* UTF-8 mode */
3347        if (utf8)        if (utf8)
3348          {          {
3349          register int d;          register unsigned int d;
3350          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3351            {            {
3352              if (eptr >= md->end_subject)
3353                {
3354                SCHECK_PARTIAL();
3355                MRRETURN(MATCH_NOMATCH);
3356                }
3357            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3358            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3359            }            }
3360          }          }
3361        else        else
# Line 2366  for (;;) Line 3363  for (;;)
3363        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3364          {          {
3365          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3366            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3367              if (eptr >= md->end_subject)
3368                {
3369                SCHECK_PARTIAL();
3370                MRRETURN(MATCH_NOMATCH);
3371                }
3372              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3373              }
3374          }          }
3375    
3376        if (min == max) continue;        if (min == max) continue;
# Line 2377  for (;;) Line 3381  for (;;)
3381          /* UTF-8 mode */          /* UTF-8 mode */
3382          if (utf8)          if (utf8)
3383            {            {
3384            register int d;            register unsigned int d;
3385            for (fi = min;; fi++)            for (fi = min;; fi++)
3386              {              {
3387              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3388              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3390                if (eptr >= md->end_subject)
3391                  {
3392                  SCHECK_PARTIAL();
3393                  MRRETURN(MATCH_NOMATCH);
3394                  }
3395              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3396              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3397              }              }
3398            }            }
3399          else          else
# Line 2393  for (;;) Line 3402  for (;;)
3402            {            {
3403            for (fi = min;; fi++)            for (fi = min;; fi++)
3404              {              {
3405              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3406              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3407              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3408                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3409                  {
3410                  SCHECK_PARTIAL();
3411                  MRRETURN(MATCH_NOMATCH);
3412                  }
3413                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3414              }              }
3415            }            }
3416          /* Control never gets here */          /* Control never gets here */
# Line 2412  for (;;) Line 3426  for (;;)
3426          /* UTF-8 mode */          /* UTF-8 mode */
3427          if (utf8)          if (utf8)
3428            {            {
3429            register int d;            register unsigned int d;
3430            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3431              {              {
3432              int len = 1;              int len = 1;
3433              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3434                  {
3435                  SCHECK_PARTIAL();
3436                  break;
3437                  }
3438              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3439              if (fc == d) break;              if (fc == d) break;
3440              eptr += len;              eptr += len;
3441              }              }
3442              if (possessive) continue;
3443            for(;;)            for(;;)
3444              {              {
3445              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3446              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3447              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3448              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2435  for (;;) Line 3454  for (;;)
3454            {            {
3455            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3456              {              {
3457              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3458                  {
3459                  SCHECK_PARTIAL();
3460                  break;
3461                  }
3462                if (fc == *eptr) break;
3463              eptr++;              eptr++;
3464              }              }
3465              if (possessive) continue;
3466            while (eptr >= pp)            while (eptr >= pp)
3467              {              {
3468              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3469              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3470              eptr--;              eptr--;
3471              }              }
3472            }            }
3473    
3474          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3475          }          }
3476        }        }
3477      /* Control never gets here */      /* Control never gets here */
# Line 2469  for (;;) Line 3494  for (;;)
3494      ecode += 3;      ecode += 3;
3495      goto REPEATTYPE;      goto REPEATTYPE;
3496    
3497        case OP_TYPEPOSSTAR:
3498        possessive = TRUE;
3499        min = 0;
3500        max = INT_MAX;
3501        ecode++;
3502        goto REPEATTYPE;
3503    
3504        case OP_TYPEPOSPLUS:
3505        possessive = TRUE;
3506        min = 1;
3507        max = INT_MAX;
3508        ecode++;
3509        goto REPEATTYPE;
3510    
3511        case OP_TYPEPOSQUERY:
3512        possessive = TRUE;
3513        min = 0;
3514        max = 1;
3515        ecode++;
3516        goto REPEATTYPE;
3517    
3518        case OP_TYPEPOSUPTO:
3519        possessive = TRUE;
3520        min = 0;
3521        max = GET2(ecode, 1);
3522        ecode += 3;
3523        goto REPEATTYPE;
3524    
3525      case OP_TYPESTAR:      case OP_TYPESTAR:
3526      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3527      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2500  for (;;) Line 3553  for (;;)
3553    
3554      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3555      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3556      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3557      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3558      and single-bytes. */      and single-bytes. */
3559    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3560      if (min > 0)      if (min > 0)
3561        {        {
3562  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2515  for (;;) Line 3565  for (;;)
3565          switch(prop_type)          switch(prop_type)
3566            {            {
3567            case PT_ANY:            case PT_ANY:
3568            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3569            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3570              {              {
3571              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3572              GETCHARINC(c, eptr);                {
3573                  SCHECK_PARTIAL();
3574                  MRRETURN(MATCH_NOMATCH);
3575                  }
3576                GETCHARINCTEST(c, eptr);
3577              }              }
3578            break;            break;
3579    
3580            case PT_LAMP:            case PT_LAMP:
3581            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3582              {              {
3583              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3584              GETCHARINC(c, eptr);                {
3585              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3586                  MRRETURN(MATCH_NOMATCH);
3587                  }
3588                GETCHARINCTEST(c, eptr);
3589                prop_chartype = UCD_CHARTYPE(c);
3590              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3591                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3592                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3593                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3594              }              }
3595            break;            break;
3596    
3597            case PT_GC:            case PT_GC:
3598            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3599              {              {
3600              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3601              GETCHARINC(c, eptr);                {
3602              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3603                  MRRETURN(MATCH_NOMATCH);
3604                  }
3605                GETCHARINCTEST(c, eptr);
3606                prop_category = UCD_CATEGORY(c);
3607              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3608                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3609              }              }
3610            break;            break;
3611    
3612            case PT_PC:            case PT_PC:
3613            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3614              {              {
3615              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3616              GETCHARINC(c, eptr);                {
3617              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3618                  MRRETURN(MATCH_NOMATCH);
3619                  }
3620                GETCHARINCTEST(c, eptr);
3621                prop_chartype = UCD_CHARTYPE(c);
3622              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3623                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3624              }              }
3625            break;            break;
3626    
3627            case PT_SC:            case PT_SC:
3628            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3629              {              {
3630              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3631              GETCHARINC(c, eptr);                {
3632              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3633                  MRRETURN(MATCH_NOMATCH);
3634                  }
3635                GETCHARINCTEST(c, eptr);
3636                prop_script = UCD_SCRIPT(c);
3637              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3638                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3639                }
3640              break;
3641    
3642              case PT_ALNUM:
3643              for (i = 1; i <= min; i++)
3644                {
3645                if (eptr >= md->end_subject)
3646                  {
3647                  SCHECK_PARTIAL();
3648                  MRRETURN(MATCH_NOMATCH);
3649                  }
3650                GETCHARINCTEST(c, eptr);
3651                prop_category = UCD_CATEGORY(c);
3652                if ((prop_category == ucp_L || prop_category == ucp_N)
3653                       == prop_fail_result)
3654                  MRRETURN(MATCH_NOMATCH);
3655                }
3656              break;
3657    
3658              case PT_SPACE:    /* Perl space */
3659              for (i = 1; i <= min; i++)
3660                {
3661                if (eptr >= md->end_subject)
3662                  {
3663                  SCHECK_PARTIAL();
3664                  MRRETURN(MATCH_NOMATCH);
3665                  }
3666                GETCHARINCTEST(c, eptr);
3667                prop_category = UCD_CATEGORY(c);
3668                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3669                     c == CHAR_FF || c == CHAR_CR)
3670                       == prop_fail_result)
3671                  MRRETURN(MATCH_NOMATCH);
3672                }
3673              break;
3674    
3675              case PT_PXSPACE:  /* POSIX space */
3676              for (i = 1; i <= min; i++)
3677                {
3678                if (eptr >= md->end_subject)
3679                  {
3680                  SCHECK_PARTIAL();
3681                  MRRETURN(MATCH_NOMATCH);
3682                  }
3683                GETCHARINCTEST(c, eptr);
3684                prop_category = UCD_CATEGORY(c);
3685                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3686                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3687                       == prop_fail_result)
3688                  MRRETURN(MATCH_NOMATCH);
3689                }
3690              break;
3691    
3692              case PT_WORD:
3693              for (i = 1; i <= min; i++)
3694                {
3695                if (eptr >= md->end_subject)
3696                  {
3697                  SCHECK_PARTIAL();
3698                  MRRETURN(MATCH_NOMATCH);
3699                  }
3700                GETCHARINCTEST(c, eptr);
3701                prop_category = UCD_CATEGORY(c);
3702                if ((prop_category == ucp_L || prop_category == ucp_N ||
3703                     c == CHAR_UNDERSCORE)
3704                       == prop_fail_result)
3705                  MRRETURN(MATCH_NOMATCH);
3706              }              }
3707            break;            break;
3708    
3709              /* This should not occur */
3710    
3711            default:            default:
3712            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3713            }            }
3714          }          }
3715    
# Line 2582  for (;;) Line 3720  for (;;)
3720          {          {
3721          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3722            {            {
3723              if (eptr >= md->end_subject)
3724                {
3725                SCHECK_PARTIAL();
3726                MRRETURN(MATCH_NOMATCH);
3727                }
3728            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3729            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3730            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3731            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3732              {              {
3733              int len = 1;              int len = 1;
3734              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3735                {                else { GETCHARLEN(c, eptr, len); }
3736                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3737              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3738              eptr += len;              eptr += len;
3739              }              }
# Line 2610  for (;;) Line 3751  for (;;)
3751          case OP_ANY:          case OP_ANY:
3752          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3753            {            {
3754            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3755                 ((ims & PCRE_DOTALL) == 0 &&              {
<