/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 595 by ph10, Mon May 2 10:33:29 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 119  Returns:     nothing
119  static void  static void
120  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121  {  {
122  int c;  unsigned int c;
123  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124  while (length-- > 0)  while (length-- > 0)
125    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 114  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    ims         the ims flags
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    unsigned long int ims)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
159    printf("matching subject <null>");    printf("matching subject <null>");
160  else  else
# Line 146  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175    properly if Unicode properties are supported. Otherwise, we can check only
176    ASCII characters. */
177    
178  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
179    {    {
180    #ifdef SUPPORT_UTF8
181    #ifdef SUPPORT_UCP
182      if (md->utf8)
183        {
184        /* Match characters up to the end of the reference. NOTE: the number of
185        bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194          {
195          int c, d;
196          GETCHARINC(c, eptr);
197          GETCHARINC(d, p);
198          if (c != d && c != UCD_OTHERCASE(d)) return -1;
199          }
200        }
201      else
202    #endif
203    #endif
204    
205      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206      is no UCP support. */
207    
208    while (length-- > 0)    while (length-- > 0)
209      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
210    }    }
211    
212    /* In the caseful case, we can just compare the bytes, whether or not we
213    are in UTF-8 mode. */
214    
215  else  else
216    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return -1; }
217    
218  return TRUE;  return eptr - eptr_start;
219  }  }
220    
221    
# Line 186  calls by keeping local variables that ne Line 241  calls by keeping local variables that ne
241  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
242  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
243  always used to.  always used to.
244    
245    The original heap-recursive code used longjmp(). However, it seems that this
246    can be very slow on some operating systems. Following a suggestion from Stan
247    Switzer, the use of longjmp() has been abolished, at the cost of having to
248    provide a unique number for each call to RMATCH. There is no way of generating
249    a sequence of numbers at compile time in C. I have given them names, to make
250    them stand out more clearly.
251    
252    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
253    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
254    tests. Furthermore, not using longjmp() means that local dynamic variables
255    don't have indeterminate values; this has meant that the frame size can be
256    reduced because the result can be "passed back" by straight setting of the
257    variable instead of being passed in the frame.
258  ****************************************************************************  ****************************************************************************
259  ***************************************************************************/  ***************************************************************************/
260    
261    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
262    below must be updated in sync.  */
263    
264    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
265           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
266           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
267           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
268           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
269           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
270           RM61,  RM62 };
271    
272  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
273  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
274    actually used in this definition. */
275    
276  #ifndef NO_RECURSE  #ifndef NO_RECURSE
277  #define REGISTER register  #define REGISTER register
278  #ifdef DEBUG  
279  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
280    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281    { \    { \
282    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
283    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
284    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
285    }    }
286  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 289  versions and production versions. */
289    return ra; \    return ra; \
290    }    }
291  #else  #else
292  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
293    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
294  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
295  #endif  #endif
296    
297  #else  #else
298    
299    
300  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
301  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
302  match(), which never changes. */  argument of match(), which never changes. */
303    
304  #define REGISTER  #define REGISTER
305    
306  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
307    {\    {\
308    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
309    if (setjmp(frame->Xwhere) == 0)\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
310      {\    frame->Xwhere = rw; \
311      newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
312      newframe->Xecode = rb;\    newframe->Xecode = rb;\
313      newframe->Xoffset_top = rc;\    newframe->Xmstart = mstart;\
314      newframe->Xims = re;\    newframe->Xmarkptr = markptr;\
315      newframe->Xeptrb = rf;\    newframe->Xoffset_top = rc;\
316      newframe->Xflags = rg;\    newframe->Xims = re;\
317      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xeptrb = rf;\
318      newframe->Xprevframe = frame;\    newframe->Xflags = rg;\
319      frame = newframe;\    newframe->Xrdepth = frame->Xrdepth + 1;\
320      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xprevframe = frame;\
321      goto HEAP_RECURSE;\    frame = newframe;\
322      }\    DPRINTF(("restarting from line %d\n", __LINE__));\
323    else\    goto HEAP_RECURSE;\
324      {\    L_##rw:\
325      DPRINTF(("longjumped back to line %d\n", __LINE__));\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
326    }    }
327    
328  #define RRETURN(ra)\  #define RRETURN(ra)\
329    {\    {\
330    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
331    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
332    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
333    if (frame != NULL)\    if (frame != NULL)\
334      {\      {\
335      frame->Xresult = ra;\      rrc = ra;\
336      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
337      }\      }\
338    return ra;\    return ra;\
339    }    }
# Line 269  typedef struct heapframe { Line 346  typedef struct heapframe {
346    
347    /* Function arguments that may change */    /* Function arguments that may change */
348    
349    const uschar *Xeptr;    USPTR Xeptr;
350    const uschar *Xecode;    const uschar *Xecode;
351      USPTR Xmstart;
352      USPTR Xmarkptr;
353    int Xoffset_top;    int Xoffset_top;
354    long int Xims;    long int Xims;
355    eptrblock *Xeptrb;    eptrblock *Xeptrb;
356    int Xflags;    int Xflags;
357    int Xrdepth;    unsigned int Xrdepth;
358    
359    /* Function local variables */    /* Function local variables */
360    
361    const uschar *Xcallpat;    USPTR Xcallpat;
362    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
363    const uschar *Xdata;    USPTR Xcharptr;
364    const uschar *Xnext;  #endif
365    const uschar *Xpp;    USPTR Xdata;
366    const uschar *Xprev;    USPTR Xnext;
367    const uschar *Xsaved_eptr;    USPTR Xpp;
368      USPTR Xprev;
369      USPTR Xsaved_eptr;
370    
371    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
372    
373    BOOL Xcur_is_word;    BOOL Xcur_is_word;
374    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
375    BOOL Xprev_is_word;    BOOL Xprev_is_word;
376    
377    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 383  typedef struct heapframe {
383    int Xprop_category;    int Xprop_category;
384    int Xprop_chartype;    int Xprop_chartype;
385    int Xprop_script;    int Xprop_script;
386    int *Xprop_test_variable;    int Xoclength;
387      uschar Xocchars[8];
388  #endif  #endif
389    
390      int Xcodelink;
391    int Xctype;    int Xctype;
392    int Xfc;    unsigned int Xfc;
393    int Xfi;    int Xfi;
394    int Xlength;    int Xlength;
395    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 403  typedef struct heapframe {
403    
404    eptrblock Xnewptrb;    eptrblock Xnewptrb;
405    
406    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
407    
408    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
409    
410  } heapframe;  } heapframe;
411    
# Line 340  typedef struct heapframe { Line 421  typedef struct heapframe {
421  *         Match from current position            *  *         Match from current position            *
422  *************************************************/  *************************************************/
423    
424  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
425  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
426  same response.  same response. */
427    
428  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
429  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
430    at the end of the subject and also past the start of the subject (i.e.
431    something has been matched). For hard partial matching, we then return
432    immediately. The second one is used when we already know we are past the end of
433    the subject. */
434    
435    #define CHECK_PARTIAL()\
436      if (md->partial != 0 && eptr >= md->end_subject && \
437          eptr > md->start_used_ptr) \
438        { \
439        md->hitend = TRUE; \
440        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
441        }
442    
443    #define SCHECK_PARTIAL()\
444      if (md->partial != 0 && eptr > md->start_used_ptr) \
445        { \
446        md->hitend = TRUE; \
447        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
448        }
449    
450    
451    /* Performance note: It might be tempting to extract commonly used fields from
452    the md structure (e.g. utf8, end_subject) into individual variables to improve
453  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
454  made performance worse.  made performance worse.
455    
456  Arguments:  Arguments:
457     eptr        pointer in subject     eptr        pointer to current character in subject
458     ecode       position in code     ecode       pointer to current position in compiled code
459       mstart      pointer to the current match start position (can be modified
460                     by encountering \K)
461       markptr     pointer to the most recent MARK name, or NULL
462     offset_top  current top pointer     offset_top  current top pointer
463     md          pointer to "static" info for the match     md          pointer to "static" info for the match
464     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 466  Arguments:
466                   brackets - for testing for empty matches                   brackets - for testing for empty matches
467     flags       can contain     flags       can contain
468                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
469                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
470                       group that can match an empty string
471     rdepth      the recursion depth     rdepth      the recursion depth
472    
473  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
474                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
475                   a negative MATCH_xxx value for PRUNE, SKIP, etc
476                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
477                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
478  */  */
479    
480  static int  static int
481  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
482    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
483    int flags, int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
484  {  {
485  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
486  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
487  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
488    
489    register int  rrc;         /* Returns from recursive calls */
490    register int  i;           /* Used for loops not involving calls to RMATCH() */
491    register unsigned int c;   /* Character values not kept over RMATCH() calls */
492    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
493    
494  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
495  register int  i;      /* Used for loops not involving calls to RMATCH() */  int condcode;
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
496    
497  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
498  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 391  heap storage. Set up the top-level frame Line 500  heap storage. Set up the top-level frame
500  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
501    
502  #ifdef NO_RECURSE  #ifdef NO_RECURSE
503  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
504    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
505  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
506    
507  /* Copy in the original argument variables */  /* Copy in the original argument variables */
508    
509  frame->Xeptr = eptr;  frame->Xeptr = eptr;
510  frame->Xecode = ecode;  frame->Xecode = ecode;
511    frame->Xmstart = mstart;
512    frame->Xmarkptr = markptr;
513  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
514  frame->Xims = ims;  frame->Xims = ims;
515  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 524  HEAP_RECURSE:
524    
525  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
526  #define ecode              frame->Xecode  #define ecode              frame->Xecode
527    #define mstart             frame->Xmstart
528    #define markptr            frame->Xmarkptr
529  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
530  #define ims                frame->Xims  #define ims                frame->Xims
531  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 424  HEAP_RECURSE: Line 538  HEAP_RECURSE:
538  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
539  #endif  #endif
540  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
541    #define codelink           frame->Xcodelink
542  #define data               frame->Xdata  #define data               frame->Xdata
543  #define next               frame->Xnext  #define next               frame->Xnext
544  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 434  HEAP_RECURSE: Line 549  HEAP_RECURSE:
549    
550  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
551  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
552  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
553    
554  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 560  HEAP_RECURSE:
560  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
561  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
562  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
563  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
564    #define occhars            frame->Xocchars
565  #endif  #endif
566    
567  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 585  HEAP_RECURSE:
585  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
586  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
587    
588  #else  #else         /* NO_RECURSE not defined */
589  #define fi i  #define fi i
590  #define fc c  #define fc c
591    
# Line 489  recursion_info new_recursive;      /* wi Line 604  recursion_info new_recursive;      /* wi
604                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
605  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
606  BOOL condition;  BOOL condition;
 BOOL minimize;  
607  BOOL prev_is_word;  BOOL prev_is_word;
608    
609  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 615  int prop_fail_result;
615  int prop_category;  int prop_category;
616  int prop_chartype;  int prop_chartype;
617  int prop_script;  int prop_script;
618  int *prop_test_variable;  int oclength;
619    uschar occhars[8];
620  #endif  #endif
621    
622    int codelink;
623  int ctype;  int ctype;
624  int length;  int length;
625  int max;  int max;
# Line 516  int save_offset1, save_offset2, save_off Line 632  int save_offset1, save_offset2, save_off
632  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
633    
634  eptrblock newptrb;  eptrblock newptrb;
635  #endif  #endif     /* NO_RECURSE */
636    
637  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
638  variables. */  variables. */
# Line 524  variables. */ Line 640  variables. */
640  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
641  prop_value = 0;  prop_value = 0;
642  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
643  #endif  #endif
644    
645    
646    /* This label is used for tail recursion, which is used in a few cases even
647    when NO_RECURSE is not defined, in order to reduce the amount of stack that is
648    used. Thanks to Ian Taylor for noticing this possibility and sending the
649    original patch. */
650    
651    TAIL_RECURSE:
652    
653  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
654  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
655  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
656  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
657  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
658  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
659  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
660    
661    #ifdef SUPPORT_UTF8
662    utf8 = md->utf8;       /* Local copy of the flag */
663    #else
664    utf8 = FALSE;
665    #endif
666    
667  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
668  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
669    
# Line 542  if (md->match_call_count++ >= md->match_ Line 671  if (md->match_call_count++ >= md->match_
671  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
672    
673  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
674    
675  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
676  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
677  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
678  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
679    When match() is called in other circumstances, don't add to the chain. The
680    match_cbegroup flag must NOT be used with tail recursion, because the memory
681    block that is used is on the stack, so a new one may be required for each
682    match(). */
683    
684  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
685    {    {
   newptrb.epb_prev = eptrb;  
686    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
687      newptrb.epb_prev = eptrb;
688    eptrb = &newptrb;    eptrb = &newptrb;
689    }    }
690    
691  /* Now start processing the operations. */  /* Now start processing the opcodes. */
692    
693  for (;;)  for (;;)
694    {    {
695      minimize = possessive = FALSE;
696    op = *ecode;    op = *ecode;
   minimize = FALSE;  
697    
698    /* For partial matching, remember if we ever hit the end of the subject after    switch(op)
699    matching at least one subject character. */      {
700        case OP_MARK:
701        markptr = ecode + 2;
702        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
703          ims, eptrb, flags, RM55);
704    
705        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
706        argument, and we must check whether that argument matches this MARK's
707        argument. It is passed back in md->start_match_ptr (an overloading of that
708        variable). If it does match, we reset that variable to the current subject
709        position and return MATCH_SKIP. Otherwise, pass back the return code
710        unaltered. */
711    
712        if (rrc == MATCH_SKIP_ARG &&
713            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
714          {
715          md->start_match_ptr = eptr;
716          RRETURN(MATCH_SKIP);
717          }
718    
719        if (md->mark == NULL) md->mark = markptr;
720        RRETURN(rrc);
721    
722        case OP_FAIL:
723        MRRETURN(MATCH_NOMATCH);
724    
725        /* COMMIT overrides PRUNE, SKIP, and THEN */
726    
727        case OP_COMMIT:
728        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729          ims, eptrb, flags, RM52);
730        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
731            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
732            rrc != MATCH_THEN)
733          RRETURN(rrc);
734        MRRETURN(MATCH_COMMIT);
735    
736    if (md->partial &&      /* PRUNE overrides THEN */
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
737    
738    if (op > OP_BRA)      case OP_PRUNE:
739      {      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
740      number = op - OP_BRA;        ims, eptrb, flags, RM51);
741        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742        MRRETURN(MATCH_PRUNE);
743    
744        case OP_PRUNE_ARG:
745        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
746          ims, eptrb, flags, RM56);
747        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748        md->mark = ecode + 2;
749        RRETURN(MATCH_PRUNE);
750    
751        /* SKIP overrides PRUNE and THEN */
752    
753        case OP_SKIP:
754        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
755          ims, eptrb, flags, RM53);
756        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
757          RRETURN(rrc);
758        md->start_match_ptr = eptr;   /* Pass back current position */
759        MRRETURN(MATCH_SKIP);
760    
761      /* For extended extraction brackets (large number), we have to fish out the      case OP_SKIP_ARG:
762      number from a dummy opcode at the start. */      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
763          ims, eptrb, flags, RM57);
764        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
765          RRETURN(rrc);
766    
767      if (number > EXTRACT_BASIC_MAX)      /* Pass back the current skip name by overloading md->start_match_ptr and
768        number = GET2(ecode, 2+LINK_SIZE);      returning the special MATCH_SKIP_ARG return code. This will either be
769        caught by a matching MARK, or get to the top, where it is treated the same
770        as PRUNE. */
771    
772        md->start_match_ptr = ecode + 2;
773        RRETURN(MATCH_SKIP_ARG);
774    
775        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
776        the alt that is at the start of the current branch. This makes it possible
777        to skip back past alternatives that precede the THEN within the current
778        branch. */
779    
780        case OP_THEN:
781        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
782          ims, eptrb, flags, RM54);
783        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
784        md->start_match_ptr = ecode - GET(ecode, 1);
785        MRRETURN(MATCH_THEN);
786    
787        case OP_THEN_ARG:
788        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
789          offset_top, md, ims, eptrb, flags, RM58);
790        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
791        md->start_match_ptr = ecode - GET(ecode, 1);
792        md->mark = ecode + LINK_SIZE + 2;
793        RRETURN(MATCH_THEN);
794    
795        /* Handle a capturing bracket. If there is space in the offset vector, save
796        the current subject position in the working slot at the top of the vector.
797        We mustn't change the current values of the data slot, because they may be
798        set from a previous iteration of this group, and be referred to by a
799        reference inside the group.
800    
801        If the bracket fails to match, we need to restore this value and also the
802        values of the final offsets, in case they were set by a previous iteration
803        of the same bracket.
804    
805        If there isn't enough space in the offset vector, treat this as if it were
806        a non-capturing bracket. Don't worry about setting the flag for the error
807        case here; that is handled in the code for KET. */
808    
809        case OP_CBRA:
810        case OP_SCBRA:
811        number = GET2(ecode, 1+LINK_SIZE);
812      offset = number << 1;      offset = number << 1;
813    
814  #ifdef DEBUG  #ifdef PCRE_DEBUG
815      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
816        printf("subject=");
817      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
818      printf("\n");      printf("\n");
819  #endif  #endif
# Line 610  for (;;) Line 826  for (;;)
826        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
827    
828        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
829        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
830            (int)(eptr - md->start_subject);
831    
832          flags = (op == OP_SCBRA)? match_cbegroup : 0;
833        do        do
834          {          {
835          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
836            match_isgroup);            ims, eptrb, flags, RM1);
837          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
838                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
839              RRETURN(rrc);
840          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
841          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
842          }          }
# Line 628  for (;;) Line 848  for (;;)
848        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
849        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
850    
851          if (rrc != MATCH_THEN) md->mark = markptr;
852        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
853        }        }
854    
855      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
856        as a non-capturing bracket. */
857    
858      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
859      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
860    
861    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
862    
863    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
864      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865      case OP_BRA:     /* Non-capturing bracket: optimized */  
866      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
867      do      final alternative within the brackets, we would return the result of a
868        recursive call to match() whatever happened. We can reduce stack usage by
869        turning this into a tail recursion, except in the case when match_cbegroup
870        is set.*/
871    
872        case OP_BRA:
873        case OP_SBRA:
874        DPRINTF(("start non-capturing bracket\n"));
875        flags = (op >= OP_SBRA)? match_cbegroup : 0;
876        for (;;)
877        {        {
878        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
879          match_isgroup);          {
880        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
881              {
882              ecode += _pcre_OP_lengths[*ecode];
883              DPRINTF(("bracket 0 tail recursion\n"));
884              goto TAIL_RECURSE;
885              }
886    
887            /* Possibly empty group; can't use tail recursion. */
888    
889            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
890              eptrb, flags, RM48);
891            if (rrc == MATCH_NOMATCH) md->mark = markptr;
892            RRETURN(rrc);
893            }
894    
895          /* For non-final alternatives, continue the loop for a NOMATCH result;
896          otherwise return. */
897    
898          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
899            eptrb, flags, RM2);
900          if (rrc != MATCH_NOMATCH &&
901              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
902            RRETURN(rrc);
903        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
904        }        }
905      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
906    
907      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
908      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
909      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
910      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
911        obeyed, we can use tail recursion to avoid using another stack frame. */
912    
913      case OP_COND:      case OP_COND:
914      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
915        codelink= GET(ecode, 1);
916    
917        /* Because of the way auto-callout works during compile, a callout item is
918        inserted between OP_COND and an assertion condition. */
919    
920        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
921          {
922          if (pcre_callout != NULL)
923            {
924            pcre_callout_block cb;
925            cb.version          = 1;   /* Version 1 of the callout block */
926            cb.callout_number   = ecode[LINK_SIZE+2];
927            cb.offset_vector    = md->offset_vector;
928            cb.subject          = (PCRE_SPTR)md->start_subject;
929            cb.subject_length   = (int)(md->end_subject - md->start_subject);
930            cb.start_match      = (int)(mstart - md->start_subject);
931            cb.current_position = (int)(eptr - md->start_subject);
932            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
933            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
934            cb.capture_top      = offset_top/2;
935            cb.capture_last     = md->capture_last;
936            cb.callout_data     = md->callout_data;
937            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
938            if (rrc < 0) RRETURN(rrc);
939            }
940          ecode += _pcre_OP_lengths[OP_CALLOUT];
941          }
942    
943        condcode = ecode[LINK_SIZE+1];
944    
945        /* Now see what the actual condition is */
946    
947        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
948          {
949          if (md->recursive == NULL)                /* Not recursing => FALSE */
950            {
951            condition = FALSE;
952            ecode += GET(ecode, 1);
953            }
954          else
955            {
956            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
957            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
958    
959            /* If the test is for recursion into a specific subpattern, and it is
960            false, but the test was set up by name, scan the table to see if the
961            name refers to any other numbers, and test them. The condition is true
962            if any one is set. */
963    
964            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
965              {
966              uschar *slotA = md->name_table;
967              for (i = 0; i < md->name_count; i++)
968                {
969                if (GET2(slotA, 0) == recno) break;
970                slotA += md->name_entry_size;
971                }
972    
973              /* Found a name for the number - there can be only one; duplicate
974              names for different numbers are allowed, but not vice versa. First
975              scan down for duplicates. */
976    
977              if (i < md->name_count)
978                {
979                uschar *slotB = slotA;
980                while (slotB > md->name_table)
981                  {
982                  slotB -= md->name_entry_size;
983                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
984                    {
985                    condition = GET2(slotB, 0) == md->recursive->group_num;
986                    if (condition) break;
987                    }
988                  else break;
989                  }
990    
991                /* Scan up for duplicates */
992    
993                if (!condition)
994                  {
995                  slotB = slotA;
996                  for (i++; i < md->name_count; i++)
997                    {
998                    slotB += md->name_entry_size;
999                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1000                      {
1001                      condition = GET2(slotB, 0) == md->recursive->group_num;
1002                      if (condition) break;
1003                      }
1004                    else break;
1005                    }
1006                  }
1007                }
1008              }
1009    
1010            /* Chose branch according to the condition */
1011    
1012            ecode += condition? 3 : GET(ecode, 1);
1013            }
1014          }
1015    
1016        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1017        {        {
1018        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1019        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
1020          (md->recursive != NULL) :  
1021          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
1022        RMATCH(rrc, eptr, ecode + (condition?        scan the table to see if the name refers to any other numbers, and test
1023          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        them. The condition is true if any one is set. This is tediously similar
1024          offset_top, md, ims, eptrb, match_isgroup);        to the code above, but not close enough to try to amalgamate. */
1025        RRETURN(rrc);  
1026          if (!condition && condcode == OP_NCREF)
1027            {
1028            int refno = offset >> 1;
1029            uschar *slotA = md->name_table;
1030    
1031            for (i = 0; i < md->name_count; i++)
1032              {
1033              if (GET2(slotA, 0) == refno) break;
1034              slotA += md->name_entry_size;
1035              }
1036    
1037            /* Found a name for the number - there can be only one; duplicate names
1038            for different numbers are allowed, but not vice versa. First scan down
1039            for duplicates. */
1040    
1041            if (i < md->name_count)
1042              {
1043              uschar *slotB = slotA;
1044              while (slotB > md->name_table)
1045                {
1046                slotB -= md->name_entry_size;
1047                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1048                  {
1049                  offset = GET2(slotB, 0) << 1;
1050                  condition = offset < offset_top &&
1051                    md->offset_vector[offset] >= 0;
1052                  if (condition) break;
1053                  }
1054                else break;
1055                }
1056    
1057              /* Scan up for duplicates */
1058    
1059              if (!condition)
1060                {
1061                slotB = slotA;
1062                for (i++; i < md->name_count; i++)
1063                  {
1064                  slotB += md->name_entry_size;
1065                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1066                    {
1067                    offset = GET2(slotB, 0) << 1;
1068                    condition = offset < offset_top &&
1069                      md->offset_vector[offset] >= 0;
1070                    if (condition) break;
1071                    }
1072                  else break;
1073                  }
1074                }
1075              }
1076            }
1077    
1078          /* Chose branch according to the condition */
1079    
1080          ecode += condition? 3 : GET(ecode, 1);
1081          }
1082    
1083        else if (condcode == OP_DEF)     /* DEFINE - always false */
1084          {
1085          condition = FALSE;
1086          ecode += GET(ecode, 1);
1087        }        }
1088    
1089      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1090      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1091        assertion. */
1092    
1093      else      else
1094        {        {
1095        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1096            match_condassert | match_isgroup);            match_condassert, RM3);
1097        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1098          {          {
1099          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1100            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1101          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1102          }          }
1103        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH &&
1104                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1105          {          {
1106          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1107          }          }
1108        else ecode += GET(ecode, 1);        else
1109        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
1110          match_isgroup);          condition = FALSE;
1111        RRETURN(rrc);          ecode += codelink;
1112            }
1113        }        }
     /* Control never reaches here */  
1114    
1115      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
1116      encountered. */      we can use tail recursion to avoid using another stack frame, except when
1117        match_cbegroup is required for an unlimited repeat of a possibly empty
1118        group. If the second alternative doesn't exist, we can just plough on. */
1119    
1120        if (condition || *ecode == OP_ALT)
1121          {
1122          ecode += 1 + LINK_SIZE;
1123          if (op == OP_SCOND)        /* Possibly empty group */
1124            {
1125            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1126            RRETURN(rrc);
1127            }
1128          else                       /* Group must match something */
1129            {
1130            flags = 0;
1131            goto TAIL_RECURSE;
1132            }
1133          }
1134        else                         /* Condition false & no alternative */
1135          {
1136          ecode += 1 + LINK_SIZE;
1137          }
1138        break;
1139    
1140    
1141        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1142        to close any currently open capturing brackets. */
1143    
1144        case OP_CLOSE:
1145        number = GET2(ecode, 1);
1146        offset = number << 1;
1147    
1148    #ifdef PCRE_DEBUG
1149          printf("end bracket %d at *ACCEPT", number);
1150          printf("\n");
1151    #endif
1152    
1153      case OP_CREF:      md->capture_last = number;
1154      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1155          {
1156          md->offset_vector[offset] =
1157            md->offset_vector[md->offset_end - number];
1158          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1159          if (offset_top <= offset) offset_top = offset + 2;
1160          }
1161      ecode += 3;      ecode += 3;
1162      break;      break;
1163    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1164    
1165        /* End of the pattern, either real or forced. If we are in a top-level
1166        recursion, we should restore the offsets appropriately and continue from
1167        after the call. */
1168    
1169        case OP_ACCEPT:
1170      case OP_END:      case OP_END:
1171      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1172        {        {
# Line 713  for (;;) Line 1175  for (;;)
1175        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1176        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1177          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1178        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1179        ims = original_ims;        ims = original_ims;
1180        ecode = rec->after_call;        ecode = rec->after_call;
1181        break;        break;
1182        }        }
1183    
1184      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1185      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1186        the subject. In both cases, backtracking will then try other alternatives,
1187        if any. */
1188    
1189        if (eptr == mstart &&
1190            (md->notempty ||
1191              (md->notempty_atstart &&
1192                mstart == md->start_subject + md->start_offset)))
1193          MRRETURN(MATCH_NOMATCH);
1194    
1195        /* Otherwise, we have a match. */
1196    
1197        md->end_match_ptr = eptr;           /* Record where we ended */
1198        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1199        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1200    
1201      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      /* For some reason, the macros don't work properly if an expression is
1202      md->end_match_ptr = eptr;          /* Record where we ended */      given as the argument to MRRETURN when the heap is in use. */
1203      md->end_offset_top = offset_top;   /* and how many extracts were taken */  
1204      RRETURN(MATCH_MATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1205        MRRETURN(rrc);
1206    
1207      /* Change option settings */      /* Change option settings */
1208    
# Line 745  for (;;) Line 1222  for (;;)
1222      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1223      do      do
1224        {        {
1225        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1226          match_isgroup);          RM4);
1227        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1228        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1229            mstart = md->start_match_ptr;   /* In case \K reset it */
1230            break;
1231            }
1232          if (rrc != MATCH_NOMATCH &&
1233              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1234            RRETURN(rrc);
1235        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1236        }        }
1237      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1238      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1239    
1240      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1241    
# Line 766  for (;;) Line 1249  for (;;)
1249      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1250      continue;      continue;
1251    
1252      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1253        PRUNE, or COMMIT means we must assume failure without checking subsequent
1254        branches. */
1255    
1256      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1257      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1258      do      do
1259        {        {
1260        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1261          match_isgroup);          RM5);
1262        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1263        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1264            {
1265            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1266            break;
1267            }
1268          if (rrc != MATCH_NOMATCH &&
1269              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1270            RRETURN(rrc);
1271        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1272        }        }
1273      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 794  for (;;) Line 1286  for (;;)
1286  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1287      if (utf8)      if (utf8)
1288        {        {
1289        c = GET(ecode,1);        i = GET(ecode, 1);
1290        for (i = 0; i < c; i++)        while (i-- > 0)
1291          {          {
1292          eptr--;          eptr--;
1293          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1294          BACKCHAR(eptr)          BACKCHAR(eptr);
1295          }          }
1296        }        }
1297      else      else
# Line 808  for (;;) Line 1300  for (;;)
1300      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1301    
1302        {        {
1303        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1304        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1305        }        }
1306    
1307      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1308    
1309        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1310      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1311      break;      break;
1312    
# Line 829  for (;;) Line 1322  for (;;)
1322        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1323        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1324        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1325        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1326        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1327        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1328        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1329        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1330        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1331        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1332        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1333        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1334        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1335        }        }
1336      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 865  for (;;) Line 1358  for (;;)
1358      case OP_RECURSE:      case OP_RECURSE:
1359        {        {
1360        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1361        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1362            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1363    
1364        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1365    
# Line 897  for (;;) Line 1385  for (;;)
1385    
1386        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1387              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1388        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1389    
1390        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1391        restore the offset and recursion data. */        restore the offset and recursion data. */
1392    
1393        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1394          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1395        do        do
1396          {          {
1397          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1398              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1399          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1400            {            {
1401            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1402            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1403            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1404              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1405            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1406            }            }
1407          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH &&
1408                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1409            {            {
1410            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1411              if (new_recursive.offset_save != stacksave)
1412                (pcre_free)(new_recursive.offset_save);
1413            RRETURN(rrc);            RRETURN(rrc);
1414            }            }
1415    
# Line 933  for (;;) Line 1424  for (;;)
1424        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1425        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1426          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1427        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1428        }        }
1429      /* Control never reaches here */      /* Control never reaches here */
1430    
# Line 942  for (;;) Line 1433  for (;;)
1433      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1434      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1435      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1436      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1437        the start-of-match value in case it was changed by \K. */
1438    
1439      case OP_ONCE:      case OP_ONCE:
1440        {      prev = ecode;
1441        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1442    
1443        do      do
1444          {
1445          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1446          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1447          {          {
1448          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          mstart = md->start_match_ptr;
1449            eptrb, match_isgroup);          break;
         if (rrc == MATCH_MATCH) break;  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         ecode += GET(ecode,1);  
1450          }          }
1451        while (*ecode == OP_ALT);        if (rrc != MATCH_NOMATCH &&
1452              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1453            RRETURN(rrc);
1454          ecode += GET(ecode,1);
1455          }
1456        while (*ecode == OP_ALT);
1457    
1458        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1459    
1460        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1461    
1462        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1463        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1464    
1465        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1466    
1467        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1468        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1469    
1470        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1471        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1472        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1473        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1474        course of events. */      course of events. */
1475    
1476        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1477          {        {
1478          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1479          break;        break;
1480          }        }
1481    
1482        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1483        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1484        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1485        opcode. */      any options that changed within the bracket before re-running it, so
1486        check the next opcode. */
1487    
1488        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1489          {        {
1490          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1491          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1492          }        }
1493    
1494        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1495          {        {
1496          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1497          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1498          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1499          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1500          }        goto TAIL_RECURSE;
1501        else  /* OP_KETRMAX */        }
1502          {      else  /* OP_KETRMAX */
1503          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1504          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1505          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1506          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1507          }        flags = 0;
1508          goto TAIL_RECURSE;
1509        }        }
1510      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1511    
1512      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1513      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1018  for (;;) Line 1516  for (;;)
1516      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1517      break;      break;
1518    
1519      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1520      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1521      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1522      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1523      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1524    
1525      case OP_BRAZERO:      case OP_BRAZERO:
1526        {        {
1527        next = ecode+1;        next = ecode+1;
1528        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1529        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1530        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1531        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1532        }        }
1533      break;      break;
1534    
1535      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1536        {        {
1537        next = ecode+1;        next = ecode+1;
1538        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1539        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1540        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1541        ecode++;        ecode++;
1542        }        }
1543      break;      break;
1544    
1545      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1546      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1547      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1548      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1549          ecode = next + 1 + LINK_SIZE;
1550          }
1551        break;
1552    
1553        /* End of a group, repeated or non-repeating. */
1554    
1555      case OP_KET:      case OP_KET:
1556      case OP_KETRMIN:      case OP_KETRMIN:
1557      case OP_KETRMAX:      case OP_KETRMAX:
1558        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1559    
1560        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1561        infinite repeats of empty string matches, retrieve the subject start from
1562        the chain. Otherwise, set it NULL. */
1563    
1564        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1565          {
1566        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1567            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1568            *prev == OP_ONCE)        }
1569          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1570    
1571        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group or an atomic group, stop
1572        group number back at the start and if necessary complete handling an      matching and return MATCH_MATCH, but record the current high water mark for
1573        extraction by setting the offsets and bumping the high water mark. */      use by positive assertions. We also need to record the match start in case
1574        it was changed by \K. */
1575    
1576        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1577          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1578          number = *prev - OP_BRA;          *prev == OP_ONCE)
1579          {
1580          md->end_match_ptr = eptr;      /* For ONCE */
1581          md->end_offset_top = offset_top;
1582          md->start_match_ptr = mstart;
1583          MRRETURN(MATCH_MATCH);
1584          }
1585    
1586          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1587          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1588        bumping the high water mark. Note that whole-pattern recursion is coded as
1589        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1590        when the OP_END is reached. Other recursion is handled here. */
1591    
1592          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1593          offset = number << 1;        {
1594          number = GET2(prev, 1+LINK_SIZE);
1595          offset = number << 1;
1596    
1597  #ifdef DEBUG  #ifdef PCRE_DEBUG
1598          printf("end bracket %d", number);        printf("end bracket %d", number);
1599          printf("\n");        printf("\n");
1600  #endif  #endif
1601    
1602          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1603          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1604          into group 0, so it won't be picked up here. Instead, we catch it when          {
1605          the OP_END is reached. */          md->offset_vector[offset] =
1606              md->offset_vector[md->offset_end - number];
1607          if (number > 0)          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1608            {          if (offset_top <= offset) offset_top = offset + 2;
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
   
           /* Handle a recursively called group. Restore the offsets  
           appropriately and continue from after the call. */  
   
           if (md->recursive != NULL && md->recursive->group_num == number)  
             {  
             recursion_info *rec = md->recursive;  
             DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1609          }          }
1610    
1611        /* Reset the value of the ims flags, in case they got changed during        /* Handle a recursively called group. Restore the offsets
1612        the group. */        appropriately and continue from after the call. */
1613    
1614        ims = original_ims;        if (md->recursive != NULL && md->recursive->group_num == number)
       DPRINTF(("ims reset to %02lx\n", ims));  
   
       /* For a non-repeating ket, just continue at this level. This also  
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
   
       if (*ecode == OP_KET || eptr == saved_eptr)  
1615          {          {
1616          ecode += 1 + LINK_SIZE;          recursion_info *rec = md->recursive;
1617            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1618            md->recursive = rec->prevrec;
1619            memcpy(md->offset_vector, rec->offset_save,
1620              rec->saved_max * sizeof(int));
1621            offset_top = rec->save_offset_top;
1622            ecode = rec->after_call;
1623            ims = original_ims;
1624          break;          break;
1625          }          }
1626          }
1627    
1628        /* The repeating kets try the rest of the pattern or restart from the      /* For both capturing and non-capturing groups, reset the value of the ims
1629        preceding bracket, in the appropriate order. */      flags, in case they got changed during the group. */
1630    
1631        if (*ecode == OP_KETRMIN)      ims = original_ims;
1632          {      DPRINTF(("ims reset to %02lx\n", ims));
1633          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
1634          if (rrc != MATCH_NOMATCH) RRETURN(rrc);      /* For a non-repeating ket, just continue at this level. This also
1635          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      happens for a repeating ket if no characters were matched in the group.
1636          if (rrc != MATCH_NOMATCH) RRETURN(rrc);      This is the forcible breaking of infinite loops as implemented in Perl
1637          }      5.005. If there is an options reset, it will get obeyed in the normal
1638        else  /* OP_KETRMAX */      course of events. */
1639    
1640        if (*ecode == OP_KET || eptr == saved_eptr)
1641          {
1642          ecode += 1 + LINK_SIZE;
1643          break;
1644          }
1645    
1646        /* The repeating kets try the rest of the pattern or restart from the
1647        preceding bracket, in the appropriate order. In the second case, we can use
1648        tail recursion to avoid using another stack frame, unless we have an
1649        unlimited repeat of a group that can match an empty string. */
1650    
1651        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1652    
1653        if (*ecode == OP_KETRMIN)
1654          {
1655          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1656          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1657          if (flags != 0)    /* Could match an empty string */
1658          {          {
1659          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1660          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RRETURN(rrc);
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1661          }          }
1662          ecode = prev;
1663          goto TAIL_RECURSE;
1664        }        }
1665        else  /* OP_KETRMAX */
1666      RRETURN(MATCH_NOMATCH);        {
1667          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1668          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1669          ecode += 1 + LINK_SIZE;
1670          flags = 0;
1671          goto TAIL_RECURSE;
1672          }
1673        /* Control never gets here */
1674    
1675      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1676    
1677      case OP_CIRC:      case OP_CIRC:
1678      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1679      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1680        {        {
1681        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1682          RRETURN(MATCH_NOMATCH);            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1683            MRRETURN(MATCH_NOMATCH);
1684        ecode++;        ecode++;
1685        break;        break;
1686        }        }
# Line 1178  for (;;) Line 1689  for (;;)
1689      /* Start of subject assertion */      /* Start of subject assertion */
1690    
1691      case OP_SOD:      case OP_SOD:
1692      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1693      ecode++;      ecode++;
1694      break;      break;
1695    
1696      /* Start of match assertion */      /* Start of match assertion */
1697    
1698      case OP_SOM:      case OP_SOM:
1699      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1700        ecode++;
1701        break;
1702    
1703        /* Reset the start of match point */
1704    
1705        case OP_SET_SOM:
1706        mstart = eptr;
1707      ecode++;      ecode++;
1708      break;      break;
1709    
# Line 1196  for (;;) Line 1714  for (;;)
1714      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1715        {        {
1716        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1717          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1718        else        else
1719          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          {
1720            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1721            SCHECK_PARTIAL();
1722            }
1723        ecode++;        ecode++;
1724        break;        break;
1725        }        }
1726      else      else  /* Not multiline */
1727        {        {
1728        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1729        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr < md->end_subject - 1 ||  
            (eptr == md->end_subject - 1 && *eptr != NEWLINE))  
           RRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1730        }        }
1731      /* ... else fall through */  
1732        /* ... else fall through for endonly */
1733    
1734      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1735    
1736      case OP_EOD:      case OP_EOD:
1737      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1738        SCHECK_PARTIAL();
1739      ecode++;      ecode++;
1740      break;      break;
1741    
1742      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1743    
1744      case OP_EODN:      case OP_EODN:
1745      if (eptr < md->end_subject - 1 ||      ASSERT_NL_OR_EOS:
1746         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject &&
1747            (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1748          MRRETURN(MATCH_NOMATCH);
1749    
1750        /* Either at end of string or \n before end. */
1751    
1752        SCHECK_PARTIAL();
1753      ecode++;      ecode++;
1754      break;      break;
1755    
# Line 1239  for (;;) Line 1761  for (;;)
1761    
1762        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1763        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1764        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1765          partial matching. */
1766    
1767  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1768        if (utf8)        if (utf8)
1769          {          {
1770            /* Get status of previous character */
1771    
1772          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1773            {            {
1774            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1775            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1776              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1777            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1778    #ifdef SUPPORT_UCP
1779              if (md->use_ucp)
1780                {
1781                if (c == '_') prev_is_word = TRUE; else
1782                  {
1783                  int cat = UCD_CATEGORY(c);
1784                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1785                  }
1786                }
1787              else
1788    #endif
1789            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1790            }            }
1791          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1792            /* Get status of next character */
1793    
1794            if (eptr >= md->end_subject)
1795              {
1796              SCHECK_PARTIAL();
1797              cur_is_word = FALSE;
1798              }
1799            else
1800            {            {
1801            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1802    #ifdef SUPPORT_UCP
1803              if (md->use_ucp)
1804                {
1805                if (c == '_') cur_is_word = TRUE; else
1806                  {
1807                  int cat = UCD_CATEGORY(c);
1808                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1809                  }
1810                }
1811              else
1812    #endif
1813            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1814            }            }
1815          }          }
1816        else        else
1817  #endif  #endif
1818    
1819        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1820          consistency with the behaviour of \w we do use it in this case. */
1821    
1822          {          {
1823          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1824            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1825          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1826            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1827              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1828    #ifdef SUPPORT_UCP
1829              if (md->use_ucp)
1830                {
1831                c = eptr[-1];
1832                if (c == '_') prev_is_word = TRUE; else
1833                  {
1834                  int cat = UCD_CATEGORY(c);
1835                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1836                  }
1837                }
1838              else
1839    #endif
1840              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1841              }
1842    
1843            /* Get status of next character */
1844    
1845            if (eptr >= md->end_subject)
1846              {
1847              SCHECK_PARTIAL();
1848              cur_is_word = FALSE;
1849              }
1850            else
1851    #ifdef SUPPORT_UCP
1852            if (md->use_ucp)
1853              {
1854              c = *eptr;
1855              if (c == '_') cur_is_word = TRUE; else
1856                {
1857                int cat = UCD_CATEGORY(c);
1858                cur_is_word = (cat == ucp_L || cat == ucp_N);
1859                }
1860              }
1861            else
1862    #endif
1863            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1864          }          }
1865    
1866        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1867    
1868        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1869             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1870          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1871        }        }
1872      break;      break;
1873    
1874      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1875    
1876      case OP_ANY:      case OP_ANY:
1877      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1878        RRETURN(MATCH_NOMATCH);      /* Fall through */
1879      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);  
1880  #ifdef SUPPORT_UTF8      case OP_ALLANY:
1881      if (utf8)      if (eptr++ >= md->end_subject)
1882        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        {
1883  #endif        SCHECK_PARTIAL();
1884          MRRETURN(MATCH_NOMATCH);
1885          }
1886        if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1887      ecode++;      ecode++;
1888      break;      break;
1889    
# Line 1294  for (;;) Line 1891  for (;;)
1891      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1892    
1893      case OP_ANYBYTE:      case OP_ANYBYTE:
1894      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1895          {
1896          SCHECK_PARTIAL();
1897          MRRETURN(MATCH_NOMATCH);
1898          }
1899      ecode++;      ecode++;
1900      break;      break;
1901    
1902      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1903      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1904          {
1905          SCHECK_PARTIAL();
1906          MRRETURN(MATCH_NOMATCH);
1907          }
1908      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1909      if (      if (
1910  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1307  for (;;) Line 1912  for (;;)
1912  #endif  #endif
1913         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1914         )         )
1915        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1916      ecode++;      ecode++;
1917      break;      break;
1918    
1919      case OP_DIGIT:      case OP_DIGIT:
1920      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1921          {
1922          SCHECK_PARTIAL();
1923          MRRETURN(MATCH_NOMATCH);
1924          }
1925      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1926      if (      if (
1927  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 1929  for (;;)
1929  #endif  #endif
1930         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1931         )         )
1932        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1933      ecode++;      ecode++;
1934      break;      break;
1935    
1936      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1937      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1938          {
1939          SCHECK_PARTIAL();
1940          MRRETURN(MATCH_NOMATCH);
1941          }
1942      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1943      if (      if (
1944  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1333  for (;;) Line 1946  for (;;)
1946  #endif  #endif
1947         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1948         )         )
1949        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1950      ecode++;      ecode++;
1951      break;      break;
1952    
1953      case OP_WHITESPACE:      case OP_WHITESPACE:
1954      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1955          {
1956          SCHECK_PARTIAL();
1957          MRRETURN(MATCH_NOMATCH);
1958          }
1959      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1960      if (      if (
1961  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1346  for (;;) Line 1963  for (;;)
1963  #endif  #endif
1964         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1965         )         )
1966        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1967      ecode++;      ecode++;
1968      break;      break;
1969    
1970      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1971      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1972          {
1973          SCHECK_PARTIAL();
1974          MRRETURN(MATCH_NOMATCH);
1975          }
1976      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1977      if (      if (
1978  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1359  for (;;) Line 1980  for (;;)
1980  #endif  #endif
1981         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1982         )         )
1983        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1984      ecode++;      ecode++;
1985      break;      break;
1986    
1987      case OP_WORDCHAR:      case OP_WORDCHAR:
1988      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1989          {
1990          SCHECK_PARTIAL();
1991          MRRETURN(MATCH_NOMATCH);
1992          }
1993      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1994      if (      if (
1995  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1372  for (;;) Line 1997  for (;;)
1997  #endif  #endif
1998         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1999         )         )
2000        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2001        ecode++;
2002        break;
2003    
2004        case OP_ANYNL:
2005        if (eptr >= md->end_subject)
2006          {
2007          SCHECK_PARTIAL();
2008          MRRETURN(MATCH_NOMATCH);
2009          }
2010        GETCHARINCTEST(c, eptr);
2011        switch(c)
2012          {
2013          default: MRRETURN(MATCH_NOMATCH);
2014          case 0x000d:
2015          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2016          break;
2017    
2018          case 0x000a:
2019          break;
2020    
2021          case 0x000b:
2022          case 0x000c:
2023          case 0x0085:
2024          case 0x2028:
2025          case 0x2029:
2026          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2027          break;
2028          }
2029        ecode++;
2030        break;
2031    
2032        case OP_NOT_HSPACE:
2033        if (eptr >= md->end_subject)
2034          {
2035          SCHECK_PARTIAL();
2036          MRRETURN(MATCH_NOMATCH);
2037          }
2038        GETCHARINCTEST(c, eptr);
2039        switch(c)
2040          {
2041          default: break;
2042          case 0x09:      /* HT */
2043          case 0x20:      /* SPACE */
2044          case 0xa0:      /* NBSP */
2045          case 0x1680:    /* OGHAM SPACE MARK */
2046          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2047          case 0x2000:    /* EN QUAD */
2048          case 0x2001:    /* EM QUAD */
2049          case 0x2002:    /* EN SPACE */
2050          case 0x2003:    /* EM SPACE */
2051          case 0x2004:    /* THREE-PER-EM SPACE */
2052          case 0x2005:    /* FOUR-PER-EM SPACE */
2053          case 0x2006:    /* SIX-PER-EM SPACE */
2054          case 0x2007:    /* FIGURE SPACE */
2055          case 0x2008:    /* PUNCTUATION SPACE */
2056          case 0x2009:    /* THIN SPACE */
2057          case 0x200A:    /* HAIR SPACE */
2058          case 0x202f:    /* NARROW NO-BREAK SPACE */
2059          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2060          case 0x3000:    /* IDEOGRAPHIC SPACE */
2061          MRRETURN(MATCH_NOMATCH);
2062          }
2063        ecode++;
2064        break;
2065    
2066        case OP_HSPACE:
2067        if (eptr >= md->end_subject)
2068          {
2069          SCHECK_PARTIAL();
2070          MRRETURN(MATCH_NOMATCH);
2071          }
2072        GETCHARINCTEST(c, eptr);
2073        switch(c)
2074          {
2075          default: MRRETURN(MATCH_NOMATCH);
2076          case 0x09:      /* HT */
2077          case 0x20:      /* SPACE */
2078          case 0xa0:      /* NBSP */
2079          case 0x1680:    /* OGHAM SPACE MARK */
2080          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2081          case 0x2000:    /* EN QUAD */
2082          case 0x2001:    /* EM QUAD */
2083          case 0x2002:    /* EN SPACE */
2084          case 0x2003:    /* EM SPACE */
2085          case 0x2004:    /* THREE-PER-EM SPACE */
2086          case 0x2005:    /* FOUR-PER-EM SPACE */
2087          case 0x2006:    /* SIX-PER-EM SPACE */
2088          case 0x2007:    /* FIGURE SPACE */
2089          case 0x2008:    /* PUNCTUATION SPACE */
2090          case 0x2009:    /* THIN SPACE */
2091          case 0x200A:    /* HAIR SPACE */
2092          case 0x202f:    /* NARROW NO-BREAK SPACE */
2093          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2094          case 0x3000:    /* IDEOGRAPHIC SPACE */
2095          break;
2096          }
2097        ecode++;
2098        break;
2099    
2100        case OP_NOT_VSPACE:
2101        if (eptr >= md->end_subject)
2102          {
2103          SCHECK_PARTIAL();
2104          MRRETURN(MATCH_NOMATCH);
2105          }
2106        GETCHARINCTEST(c, eptr);
2107        switch(c)
2108          {
2109          default: break;
2110          case 0x0a:      /* LF */
2111          case 0x0b:      /* VT */
2112          case 0x0c:      /* FF */
2113          case 0x0d:      /* CR */
2114          case 0x85:      /* NEL */
2115          case 0x2028:    /* LINE SEPARATOR */
2116          case 0x2029:    /* PARAGRAPH SEPARATOR */
2117          MRRETURN(MATCH_NOMATCH);
2118          }
2119        ecode++;
2120        break;
2121    
2122        case OP_VSPACE:
2123        if (eptr >= md->end_subject)
2124          {
2125          SCHECK_PARTIAL();
2126          MRRETURN(MATCH_NOMATCH);
2127          }
2128        GETCHARINCTEST(c, eptr);
2129        switch(c)
2130          {
2131          default: MRRETURN(MATCH_NOMATCH);
2132          case 0x0a:      /* LF */
2133          case 0x0b:      /* VT */
2134          case 0x0c:      /* FF */
2135          case 0x0d:      /* CR */
2136          case 0x85:      /* NEL */
2137          case 0x2028:    /* LINE SEPARATOR */
2138          case 0x2029:    /* PARAGRAPH SEPARATOR */
2139          break;
2140          }
2141      ecode++;      ecode++;
2142      break;      break;
2143    
# Line 1382  for (;;) Line 2147  for (;;)
2147    
2148      case OP_PROP:      case OP_PROP:
2149      case OP_NOTPROP:      case OP_NOTPROP:
2150      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2151          {
2152          SCHECK_PARTIAL();
2153          MRRETURN(MATCH_NOMATCH);
2154          }
2155      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2156        {        {
2157        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2158    
2159        switch(ecode[1])        switch(ecode[1])
2160          {          {
2161          case PT_ANY:          case PT_ANY:
2162          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2163          break;          break;
2164    
2165          case PT_LAMP:          case PT_LAMP:
2166          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2167               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2168               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2169            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2170           break;          break;
2171    
2172          case PT_GC:          case PT_GC:
2173          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2174            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2175          break;          break;
2176    
2177          case PT_PC:          case PT_PC:
2178          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2179            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2180          break;          break;
2181    
2182          case PT_SC:          case PT_SC:
2183          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2184            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2185            break;
2186    
2187            /* These are specials */
2188    
2189            case PT_ALNUM:
2190            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2191                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2192              MRRETURN(MATCH_NOMATCH);
2193            break;
2194    
2195            case PT_SPACE:    /* Perl space */
2196            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2197                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2198                   == (op == OP_NOTPROP))
2199              MRRETURN(MATCH_NOMATCH);
2200            break;
2201    
2202            case PT_PXSPACE:  /* POSIX space */
2203            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2205                 c == CHAR_FF || c == CHAR_CR)
2206                   == (op == OP_NOTPROP))
2207              MRRETURN(MATCH_NOMATCH);
2208          break;          break;
2209    
2210            case PT_WORD:
2211            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2212                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2213                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2214              MRRETURN(MATCH_NOMATCH);
2215            break;
2216    
2217            /* This should never occur */
2218    
2219          default:          default:
2220          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
2221          }          }
2222    
2223        ecode += 3;        ecode += 3;
# Line 1429  for (;;) Line 2228  for (;;)
2228      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2229    
2230      case OP_EXTUNI:      case OP_EXTUNI:
2231      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2232          {
2233          SCHECK_PARTIAL();
2234          MRRETURN(MATCH_NOMATCH);
2235          }
2236      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2237        {        {
2238        int chartype, script;        int category = UCD_CATEGORY(c);
2239        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2240        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2241          {          {
2242          int len = 1;          int len = 1;
# Line 1442  for (;;) Line 2244  for (;;)
2244            {            {
2245            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2246            }            }
2247          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2248          if (category != ucp_M) break;          if (category != ucp_M) break;
2249          eptr += len;          eptr += len;
2250          }          }
# Line 1461  for (;;) Line 2263  for (;;)
2263      loops). */      loops). */
2264    
2265      case OP_REF:      case OP_REF:
2266        {      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2267        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      ecode += 3;
       ecode += 3;                                 /* Advance past item */  
2268    
2269        /* If the reference is unset, set the length to be longer than the amount      /* If the reference is unset, there are two possibilities:
       of subject left; this ensures that every attempt at a match fails. We  
       can't just fail here, because of the possibility of quantifiers with zero  
       minima. */  
   
       length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
         md->end_subject - eptr + 1 :  
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2270    
2271        /* Set up for repetition, or handle the non-repeated case */      (a) In the default, Perl-compatible state, set the length negative;
2272        this ensures that every attempt at a match fails. We can't just fail
2273        here, because of the possibility of quantifiers with zero minima.
2274    
2275        switch (*ecode)      (b) If the JavaScript compatibility flag is set, set the length to zero
2276          {      so that the back reference matches an empty string.
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         c = *ecode++ - OP_CRSTAR;  
         minimize = (c & 1) != 0;  
         min = rep_min[c];                 /* Pick up values from tables; */  
         max = rep_max[c];                 /* zero for max => infinity */  
         if (max == 0) max = INT_MAX;  
         break;  
2277    
2278          case OP_CRRANGE:      Otherwise, set the length to the length of what was matched by the
2279          case OP_CRMINRANGE:      referenced subpattern. */
         minimize = (*ecode == OP_CRMINRANGE);  
         min = GET2(ecode, 1);  
         max = GET2(ecode, 3);  
         if (max == 0) max = INT_MAX;  
         ecode += 5;  
         break;  
2280    
2281          default:               /* No repeat follows */      if (offset >= offset_top || md->offset_vector[offset] < 0)
2282          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);        length = (md->jscript_compat)? 0 : -1;
2283          eptr += length;      else
2284          continue;              /* With the main loop */        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2285    
2286        /* Set up for repetition, or handle the non-repeated case */
2287    
2288        switch (*ecode)
2289          {
2290          case OP_CRSTAR:
2291          case OP_CRMINSTAR:
2292          case OP_CRPLUS:
2293          case OP_CRMINPLUS:
2294          case OP_CRQUERY:
2295          case OP_CRMINQUERY:
2296          c = *ecode++ - OP_CRSTAR;
2297          minimize = (c & 1) != 0;
2298          min = rep_min[c];                 /* Pick up values from tables; */
2299          max = rep_max[c];                 /* zero for max => infinity */
2300          if (max == 0) max = INT_MAX;
2301          break;
2302    
2303          case OP_CRRANGE:
2304          case OP_CRMINRANGE:
2305          minimize = (*ecode == OP_CRMINRANGE);
2306          min = GET2(ecode, 1);
2307          max = GET2(ecode, 3);
2308          if (max == 0) max = INT_MAX;
2309          ecode += 5;
2310          break;
2311    
2312          default:               /* No repeat follows */
2313          if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2314            {
2315            CHECK_PARTIAL();
2316            MRRETURN(MATCH_NOMATCH);
2317          }          }
2318          eptr += length;
2319          continue;              /* With the main loop */
2320          }
2321    
2322        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2323        main loop. */      zero, just continue with the main loop. */
2324    
2325        if (length == 0) continue;      if (length == 0) continue;
2326    
2327        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2328        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2329        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2330    
2331        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2332          {
2333          int slength;
2334          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2335          {          {
2336          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          CHECK_PARTIAL();
2337          eptr += length;          MRRETURN(MATCH_NOMATCH);
2338          }          }
2339          eptr += slength;
2340          }
2341    
2342        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2343        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2344    
2345        if (min == max) continue;      if (min == max) continue;
2346    
2347        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2348    
2349        if (minimize)      if (minimize)
2350          {
2351          for (fi = min;; fi++)
2352          {          {
2353          for (fi = min;; fi++)          int slength;
2354            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2355            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2357            if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2358            {            {
2359            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            CHECK_PARTIAL();
2360            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max || !match_ref(offset, eptr, length, md, ims))  
             RRETURN(MATCH_NOMATCH);  
           eptr += length;  
2361            }            }
2362          /* Control never gets here */          eptr += slength;
2363          }          }
2364          /* Control never gets here */
2365          }
2366    
2367        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2368    
2369        else      else
2370          {
2371          pp = eptr;
2372          for (i = min; i < max; i++)
2373          {          {
2374          pp = eptr;          int slength;
2375          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2376            {            {
2377            if (!match_ref(offset, eptr, length, md, ims)) break;            CHECK_PARTIAL();
2378            eptr += length;            break;
           }  
         while (eptr >= pp)  
           {  
           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
           eptr -= length;  
2379            }            }
2380          RRETURN(MATCH_NOMATCH);          eptr += slength;
2381            }
2382          while (eptr >= pp)
2383            {
2384            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2385            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386            eptr -= length;
2387          }          }
2388          MRRETURN(MATCH_NOMATCH);
2389        }        }
2390      /* Control never gets here */      /* Control never gets here */
2391    
   
   
2392      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2393      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2394      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1618  for (;;) Line 2443  for (;;)
2443          {          {
2444          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2445            {            {
2446            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2447                {
2448                SCHECK_PARTIAL();
2449                MRRETURN(MATCH_NOMATCH);
2450                }
2451            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2452            if (c > 255)            if (c > 255)
2453              {              {
2454              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2455              }              }
2456            else            else
2457              {              {
2458              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2459              }              }
2460            }            }
2461          }          }
# Line 1636  for (;;) Line 2465  for (;;)
2465          {          {
2466          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2467            {            {
2468            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2469                {
2470                SCHECK_PARTIAL();
2471                MRRETURN(MATCH_NOMATCH);
2472                }
2473            c = *eptr++;            c = *eptr++;
2474            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2475            }            }
2476          }          }
2477    
# Line 1658  for (;;) Line 2491  for (;;)
2491            {            {
2492            for (fi = min;; fi++)            for (fi = min;; fi++)
2493              {              {
2494              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2495              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2496              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2497                if (eptr >= md->end_subject)
2498                  {
2499                  SCHECK_PARTIAL();
2500                  MRRETURN(MATCH_NOMATCH);
2501                  }
2502              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2503              if (c > 255)              if (c > 255)
2504                {                {
2505                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2506                }                }
2507              else              else
2508                {                {
2509                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2510                }                }
2511              }              }
2512            }            }
# Line 1678  for (;;) Line 2516  for (;;)
2516            {            {
2517            for (fi = min;; fi++)            for (fi = min;; fi++)
2518              {              {
2519              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2520              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522                if (eptr >= md->end_subject)
2523                  {
2524                  SCHECK_PARTIAL();
2525                  MRRETURN(MATCH_NOMATCH);
2526                  }
2527              c = *eptr++;              c = *eptr++;
2528              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2529              }              }
2530            }            }
2531          /* Control never gets here */          /* Control never gets here */
# Line 1701  for (;;) Line 2544  for (;;)
2544            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2545              {              {
2546              int len = 1;              int len = 1;
2547              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2548                  {
2549                  SCHECK_PARTIAL();
2550                  break;
2551                  }
2552              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2553              if (c > 255)              if (c > 255)
2554                {                {
# Line 1715  for (;;) Line 2562  for (;;)
2562              }              }
2563            for (;;)            for (;;)
2564              {              {
2565              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2566              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2567              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2568              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1727  for (;;) Line 2574  for (;;)
2574            {            {
2575            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2576              {              {
2577              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2578                  {
2579                  SCHECK_PARTIAL();
2580                  break;
2581                  }
2582              c = *eptr;              c = *eptr;
2583              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2584              eptr++;              eptr++;
2585              }              }
2586            while (eptr >= pp)            while (eptr >= pp)
2587              {              {
2588              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2589              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2590              eptr--;              eptr--;
2591              }              }
2592            }            }
2593    
2594          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2595          }          }
2596        }        }
2597      /* Control never gets here */      /* Control never gets here */
2598    
2599    
2600      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2601      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2602        mode, because Unicode properties are supported in non-UTF-8 mode. */
2603    
2604  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2605      case OP_XCLASS:      case OP_XCLASS:
# Line 1788  for (;;) Line 2640  for (;;)
2640    
2641        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2642          {          {
2643          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2644          GETCHARINC(c, eptr);            {
2645          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2646              MRRETURN(MATCH_NOMATCH);
2647              }
2648            GETCHARINCTEST(c, eptr);
2649            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2650          }          }
2651    
2652        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1805  for (;;) Line 2661  for (;;)
2661          {          {
2662          for (fi = min;; fi++)          for (fi = min;; fi++)
2663            {            {
2664            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2665            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2666            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2667            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2668            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2669                SCHECK_PARTIAL();
2670                MRRETURN(MATCH_NOMATCH);
2671                }
2672              GETCHARINCTEST(c, eptr);
2673              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2674            }            }
2675          /* Control never gets here */          /* Control never gets here */
2676          }          }
# Line 1822  for (;;) Line 2683  for (;;)
2683          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2684            {            {
2685            int len = 1;            int len = 1;
2686            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2687            GETCHARLEN(c, eptr, len);              {
2688                SCHECK_PARTIAL();
2689                break;
2690                }
2691              GETCHARLENTEST(c, eptr, len);
2692            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2693            eptr += len;            eptr += len;
2694            }            }
2695          for(;;)          for(;;)
2696            {            {
2697            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2698            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2699            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2700            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2701            }            }
2702          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2703          }          }
2704    
2705        /* Control never gets here */        /* Control never gets here */
# Line 1850  for (;;) Line 2715  for (;;)
2715        length = 1;        length = 1;
2716        ecode++;        ecode++;
2717        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2718        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2719        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2720            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2721            MRRETURN(MATCH_NOMATCH);
2722            }
2723          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2724        }        }
2725      else      else
2726  #endif  #endif
2727    
2728      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2729        {        {
2730        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2731        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2732            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2733            MRRETURN(MATCH_NOMATCH);
2734            }
2735          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2736        ecode += 2;        ecode += 2;
2737        }        }
2738      break;      break;
# Line 1874  for (;;) Line 2747  for (;;)
2747        ecode++;        ecode++;
2748        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2749    
2750        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2751            {
2752            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2753            MRRETURN(MATCH_NOMATCH);
2754            }
2755    
2756        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2757        can use the fast lookup table. */        can use the fast lookup table. */
2758    
2759        if (fc < 128)        if (fc < 128)
2760          {          {
2761          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2762          }          }
2763    
2764        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2765    
2766        else        else
2767          {          {
2768          int dc;          unsigned int dc;
2769          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2770          ecode += length;          ecode += length;
2771    
# Line 1898  for (;;) Line 2775  for (;;)
2775          if (fc != dc)          if (fc != dc)
2776            {            {
2777  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2778            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2779  #endif  #endif
2780              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2781            }            }
2782          }          }
2783        }        }
# Line 1909  for (;;) Line 2786  for (;;)
2786    
2787      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2788        {        {
2789        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2790        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2791            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2792            MRRETURN(MATCH_NOMATCH);
2793            }
2794          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2795        ecode += 2;        ecode += 2;
2796        }        }
2797      break;      break;
2798    
2799      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2800    
2801      case OP_EXACT:      case OP_EXACT:
2802      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2803      ecode += 3;      ecode += 3;
2804      goto REPEATCHAR;      goto REPEATCHAR;
2805    
2806        case OP_POSUPTO:
2807        possessive = TRUE;
2808        /* Fall through */
2809    
2810      case OP_UPTO:      case OP_UPTO:
2811      case OP_MINUPTO:      case OP_MINUPTO:
2812      min = 0;      min = 0;
# Line 1930  for (;;) Line 2815  for (;;)
2815      ecode += 3;      ecode += 3;
2816      goto REPEATCHAR;      goto REPEATCHAR;
2817    
2818        case OP_POSSTAR:
2819        possessive = TRUE;
2820        min = 0;
2821        max = INT_MAX;
2822        ecode++;
2823        goto REPEATCHAR;
2824    
2825        case OP_POSPLUS:
2826        possessive = TRUE;
2827        min = 1;
2828        max = INT_MAX;
2829        ecode++;
2830        goto REPEATCHAR;
2831    
2832        case OP_POSQUERY:
2833        possessive = TRUE;
2834        min = 0;
2835        max = 1;
2836        ecode++;
2837        goto REPEATCHAR;
2838    
2839      case OP_STAR:      case OP_STAR:
2840      case OP_MINSTAR:      case OP_MINSTAR:
2841      case OP_PLUS:      case OP_PLUS:
# Line 1938  for (;;) Line 2844  for (;;)
2844      case OP_MINQUERY:      case OP_MINQUERY:
2845      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2846      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2847    
2848      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2849      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2850      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2851    
2852      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2853    
2854      REPEATCHAR:      REPEATCHAR:
2855  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1953  for (;;) Line 2858  for (;;)
2858        length = 1;        length = 1;
2859        charptr = ecode;        charptr = ecode;
2860        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2861        ecode += length;        ecode += length;
2862    
2863        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1961  for (;;) Line 2865  for (;;)
2865    
2866        if (length > 1)        if (length > 1)
2867          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2868  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2869          int othercase;          unsigned int othercase;
2870          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2871              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase >= 0)  
2872            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2873            else oclength = 0;
2874  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2875    
2876          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2877            {            {
2878            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2879            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2880            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2881              else if (oclength > 0 &&
2882                       eptr <= md->end_subject - oclength &&
2883                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2884    #endif  /* SUPPORT_UCP */
2885            else            else
2886              {              {
2887              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2888              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2889              }              }
2890            }            }
2891    
# Line 1990  for (;;) Line 2895  for (;;)
2895            {            {
2896            for (fi = min;; fi++)            for (fi = min;; fi++)
2897              {              {
2898              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2899              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2900              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2901              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2902              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2903              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2904                else if (oclength > 0 &&
2905                         eptr <= md->end_subject - oclength &&
2906                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2907    #endif  /* SUPPORT_UCP */
2908              else              else
2909                {                {
2910                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2911                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2912                }                }
2913              }              }
2914            /* Control never gets here */            /* Control never gets here */
2915            }            }
2916          else  
2917            else  /* Maximize */
2918            {            {
2919            pp = eptr;            pp = eptr;
2920            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2921              {              {
2922              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2923              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2924              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2925                else if (oclength > 0 &&
2926                         eptr <= md->end_subject - oclength &&
2927                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2928    #endif  /* SUPPORT_UCP */
2929              else              else
2930                {                {
2931                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2932                eptr += oclength;                break;
2933                }                }
2934              }              }
2935            while (eptr >= pp)  
2936             {            if (possessive) continue;
2937             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2938             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2939             eptr -= length;              {
2940             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2941            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2942                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2943    #ifdef SUPPORT_UCP
2944                eptr--;
2945                BACKCHAR(eptr);
2946    #else   /* without SUPPORT_UCP */
2947                eptr -= length;
2948    #endif  /* SUPPORT_UCP */
2949                }
2950            }            }
2951          /* Control never gets here */          /* Control never gets here */
2952          }          }
# Line 2037  for (;;) Line 2959  for (;;)
2959  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2960    
2961      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2962        {  
2963        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2964    
2965      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2966      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2058  for (;;) Line 2978  for (;;)
2978        {        {
2979        fc = md->lcc[fc];        fc = md->lcc[fc];
2980        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2981          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2982            if (eptr >= md->end_subject)
2983              {
2984              SCHECK_PARTIAL();
2985              MRRETURN(MATCH_NOMATCH);
2986              }
2987            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988            }
2989        if (min == max) continue;        if (min == max) continue;
2990        if (minimize)        if (minimize)
2991          {          {
2992          for (fi = min;; fi++)          for (fi = min;; fi++)
2993            {            {
2994            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2995            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2996            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2997                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2998              RRETURN(MATCH_NOMATCH);              {
2999                SCHECK_PARTIAL();
3000                MRRETURN(MATCH_NOMATCH);
3001                }
3002              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3003            }            }
3004          /* Control never gets here */          /* Control never gets here */
3005          }          }
3006        else        else  /* Maximize */
3007          {          {
3008          pp = eptr;          pp = eptr;
3009          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3010            {            {
3011            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
3012                {
3013                SCHECK_PARTIAL();
3014                break;
3015                }
3016              if (fc != md->lcc[*eptr]) break;
3017            eptr++;            eptr++;
3018            }            }
3019    
3020            if (possessive) continue;
3021    
3022          while (eptr >= pp)          while (eptr >= pp)
3023            {            {
3024            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3025            eptr--;            eptr--;
3026            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3027            }            }
3028          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3029          }          }
3030        /* Control never gets here */        /* Control never gets here */
3031        }        }
# Line 2095  for (;;) Line 3034  for (;;)
3034    
3035      else      else
3036        {        {
3037        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3038            {
3039            if (eptr >= md->end_subject)
3040              {
3041              SCHECK_PARTIAL();
3042              MRRETURN(MATCH_NOMATCH);
3043              }
3044            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3045            }
3046    
3047        if (min == max) continue;        if (min == max) continue;
3048    
3049        if (minimize)        if (minimize)
3050          {          {
3051          for (fi = min;; fi++)          for (fi = min;; fi++)
3052            {            {
3053            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3054            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3055            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3056              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3057                {
3058                SCHECK_PARTIAL();
3059                MRRETURN(MATCH_NOMATCH);
3060                }
3061              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3062            }            }
3063          /* Control never gets here */          /* Control never gets here */
3064          }          }
3065        else        else  /* Maximize */
3066          {          {
3067          pp = eptr;          pp = eptr;
3068          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3069            {            {
3070            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3071                {
3072                SCHECK_PARTIAL();
3073                break;
3074                }
3075              if (fc != *eptr) break;
3076            eptr++;            eptr++;
3077            }            }
3078            if (possessive) continue;
3079    
3080          while (eptr >= pp)          while (eptr >= pp)
3081            {            {
3082            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3083            eptr--;            eptr--;
3084            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3085            }            }
3086          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3087          }          }
3088        }        }
3089      /* Control never gets here */      /* Control never gets here */
# Line 2131  for (;;) Line 3092  for (;;)
3092      checking can be multibyte. */      checking can be multibyte. */
3093    
3094      case OP_NOT:      case OP_NOT:
3095      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3096          {
3097          SCHECK_PARTIAL();
3098          MRRETURN(MATCH_NOMATCH);
3099          }
3100      ecode++;      ecode++;
3101      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3102      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2140  for (;;) Line 3105  for (;;)
3105        if (c < 256)        if (c < 256)
3106  #endif  #endif
3107        c = md->lcc[c];        c = md->lcc[c];
3108        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3109        }        }
3110      else      else
3111        {        {
3112        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3113        }        }
3114      break;      break;
3115    
# Line 2168  for (;;) Line 3133  for (;;)
3133      ecode += 3;      ecode += 3;
3134      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3135    
3136        case OP_NOTPOSSTAR:
3137        possessive = TRUE;
3138        min = 0;
3139        max = INT_MAX;
3140        ecode++;
3141        goto REPEATNOTCHAR;
3142    
3143        case OP_NOTPOSPLUS:
3144        possessive = TRUE;
3145        min = 1;
3146        max = INT_MAX;
3147        ecode++;
3148        goto REPEATNOTCHAR;
3149    
3150        case OP_NOTPOSQUERY:
3151        possessive = TRUE;
3152        min = 0;
3153        max = 1;
3154        ecode++;
3155        goto REPEATNOTCHAR;
3156    
3157        case OP_NOTPOSUPTO:
3158        possessive = TRUE;
3159        min = 0;
3160        max = GET2(ecode, 1);
3161        ecode += 3;
3162        goto REPEATNOTCHAR;
3163    
3164      case OP_NOTSTAR:      case OP_NOTSTAR:
3165      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3166      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2180  for (;;) Line 3173  for (;;)
3173      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3174      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3175    
3176      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3177    
3178      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3179      fc = *ecode++;      fc = *ecode++;
3180    
3181      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2207  for (;;) Line 3197  for (;;)
3197        /* UTF-8 mode */        /* UTF-8 mode */
3198        if (utf8)        if (utf8)
3199          {          {
3200          register int d;          register unsigned int d;
3201          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3202            {            {
3203              if (eptr >= md->end_subject)
3204                {
3205                SCHECK_PARTIAL();
3206                MRRETURN(MATCH_NOMATCH);
3207                }
3208            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3209            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3210            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3211            }            }
3212          }          }
3213        else        else
# Line 2221  for (;;) Line 3216  for (;;)
3216        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3217          {          {
3218          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3219            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3220              if (eptr >= md->end_subject)
3221                {
3222                SCHECK_PARTIAL();
3223                MRRETURN(MATCH_NOMATCH);
3224                }
3225              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3226              }
3227          }          }
3228    
3229        if (min == max) continue;        if (min == max) continue;
# Line 2232  for (;;) Line 3234  for (;;)
3234          /* UTF-8 mode */          /* UTF-8 mode */
3235          if (utf8)          if (utf8)
3236            {            {
3237            register int d;            register unsigned int d;
3238            for (fi = min;; fi++)            for (fi = min;; fi++)
3239              {              {
3240              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3241              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3242                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3243                if (eptr >= md->end_subject)
3244                  {
3245                  SCHECK_PARTIAL();
3246                  MRRETURN(MATCH_NOMATCH);
3247                  }
3248              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3249              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3250              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3251              }              }
3252            }            }
3253          else          else
# Line 2249  for (;;) Line 3256  for (;;)
3256            {            {
3257            for (fi = min;; fi++)            for (fi = min;; fi++)
3258              {              {
3259              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3260              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3261              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3262                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3263                  {
3264                  SCHECK_PARTIAL();
3265                  MRRETURN(MATCH_NOMATCH);
3266                  }
3267                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3268              }              }
3269            }            }
3270          /* Control never gets here */          /* Control never gets here */
# Line 2268  for (;;) Line 3280  for (;;)
3280          /* UTF-8 mode */          /* UTF-8 mode */
3281          if (utf8)          if (utf8)
3282            {            {
3283            register int d;            register unsigned int d;
3284            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3285              {              {
3286              int len = 1;              int len = 1;
3287              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3288                  {
3289                  SCHECK_PARTIAL();
3290                  break;
3291                  }
3292              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3293              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3294              if (fc == d) break;              if (fc == d) break;
3295              eptr += len;              eptr += len;
3296              }              }
3297            for(;;)          if (possessive) continue;
3298            for(;;)
3299              {              {
3300              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3301              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3302              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3303              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2292  for (;;) Line 3309  for (;;)
3309            {            {
3310            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3311              {              {
3312              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3313                  {
3314                  SCHECK_PARTIAL();
3315                  break;
3316                  }
3317                if (fc == md->lcc[*eptr]) break;
3318              eptr++;              eptr++;
3319              }              }
3320              if (possessive) continue;
3321            while (eptr >= pp)            while (eptr >= pp)
3322              {              {
3323              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3324              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3325              eptr--;              eptr--;
3326              }              }
3327            }            }
3328    
3329          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3330          }          }
3331        /* Control never gets here */        /* Control never gets here */
3332        }        }
# Line 2316  for (;;) Line 3339  for (;;)
3339        /* UTF-8 mode */        /* UTF-8 mode */
3340        if (utf8)        if (utf8)
3341          {          {
3342          register int d;          register unsigned int d;
3343          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3344            {            {
3345              if (eptr >= md->end_subject)
3346                {
3347                SCHECK_PARTIAL();
3348                MRRETURN(MATCH_NOMATCH);
3349                }
3350            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3351            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3352            }            }
3353          }          }
3354        else        else
# Line 2328  for (;;) Line 3356  for (;;)
3356        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3357          {          {
3358          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3359            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3360              if (eptr >= md->end_subject)
3361                {
3362                SCHECK_PARTIAL();
3363                MRRETURN(MATCH_NOMATCH);
3364                }
3365              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3366              }
3367          }          }
3368    
3369        if (min == max) continue;        if (min == max) continue;
# Line 2339  for (;;) Line 3374  for (;;)
3374          /* UTF-8 mode */          /* UTF-8 mode */
3375          if (utf8)          if (utf8)
3376            {            {
3377            register int d;            register unsigned int d;
3378            for (fi = min;; fi++)            for (fi = min;; fi++)
3379              {              {
3380              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3381              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3382                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3383                if (eptr >= md->end_subject)
3384                  {
3385                  SCHECK_PARTIAL();
3386                  MRRETURN(MATCH_NOMATCH);
3387                  }
3388              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3389              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3390              }              }
3391            }            }
3392          else          else
# Line 2355  for (;;) Line 3395  for (;;)
3395            {            {
3396            for (fi = min;; fi++)            for (fi = min;; fi++)
3397              {              {
3398              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3399              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3400              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3401                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3402                  {
3403                  SCHECK_PARTIAL();
3404                  MRRETURN(MATCH_NOMATCH);
3405                  }
3406                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3407              }              }
3408            }            }
3409          /* Control never gets here */          /* Control never gets here */
# Line 2374  for (;;) Line 3419  for (;;)
3419          /* UTF-8 mode */          /* UTF-8 mode */
3420          if (utf8)          if (utf8)
3421            {            {
3422            register int d;            register unsigned int d;
3423            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3424              {              {
3425              int len = 1;              int len = 1;
3426              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3427                  {
3428                  SCHECK_PARTIAL();
3429                  break;
3430                  }
3431              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3432              if (fc == d) break;              if (fc == d) break;
3433              eptr += len;              eptr += len;
3434              }              }
3435              if (possessive) continue;
3436            for(;;)            for(;;)
3437              {              {
3438              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3439              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3440              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3441              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2397  for (;;) Line 3447  for (;;)
3447            {            {
3448            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3449              {              {
3450              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3451                  {
3452                  SCHECK_PARTIAL();
3453                  break;
3454                  }
3455                if (fc == *eptr) break;
3456              eptr++;              eptr++;
3457              }              }
3458              if (possessive) continue;
3459            while (eptr >= pp)            while (eptr >= pp)
3460              {              {
3461              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3462              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3463              eptr--;              eptr--;
3464              }              }
3465            }            }
3466    
3467          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3468          }          }
3469        }        }
3470      /* Control never gets here */      /* Control never gets here */
# Line 2431  for (;;) Line 3487  for (;;)
3487      ecode += 3;      ecode += 3;
3488      goto REPEATTYPE;      goto REPEATTYPE;
3489    
3490        case OP_TYPEPOSSTAR:
3491        possessive = TRUE;
3492        min = 0;
3493        max = INT_MAX;
3494        ecode++;
3495        goto REPEATTYPE;
3496    
3497        case OP_TYPEPOSPLUS:
3498        possessive = TRUE;
3499        min = 1;
3500        max = INT_MAX;
3501        ecode++;
3502        goto REPEATTYPE;
3503    
3504        case OP_TYPEPOSQUERY:
3505        possessive = TRUE;
3506        min = 0;
3507        max = 1;
3508        ecode++;
3509        goto REPEATTYPE;
3510    
3511        case OP_TYPEPOSUPTO:
3512        possessive = TRUE;
3513        min = 0;
3514        max = GET2(ecode, 1);
3515        ecode += 3;
3516        goto REPEATTYPE;
3517    
3518      case OP_TYPESTAR:      case OP_TYPESTAR:
3519      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3520      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2462  for (;;) Line 3546  for (;;)
3546    
3547      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3548      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3549      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3550      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3551      and single-bytes. */      and single-bytes. */
3552    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3553      if (min > 0)      if (min > 0)
3554        {        {
3555  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2477  for (;;) Line 3558  for (;;)
3558          switch(prop_type)          switch(prop_type)
3559            {            {
3560            case PT_ANY:            case PT_ANY:
3561            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3562            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3563              {              {
3564              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3565              GETCHARINC(c, eptr);                {
3566                  SCHECK_PARTIAL();
3567                  MRRETURN(MATCH_NOMATCH);
3568                  }
3569                GETCHARINCTEST(c, eptr);
3570              }              }
3571            break;            break;
3572    
3573            case PT_LAMP:            case PT_LAMP:
3574            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3575              {              {
3576              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3577              GETCHARINC(c, eptr);                {
3578              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3579                  MRRETURN(MATCH_NOMATCH);
3580                  }
3581                GETCHARINCTEST(c, eptr);
3582                prop_chartype = UCD_CHARTYPE(c);
3583              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3584                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3585                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3586                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3587              }              }
3588            break;            break;
3589    
3590            case PT_GC:            case PT_GC:
3591            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3592              {              {
3593              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3594              GETCHARINC(c, eptr);                {
3595              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3596                  MRRETURN(MATCH_NOMATCH);
3597                  }
3598                GETCHARINCTEST(c, eptr);
3599                prop_category = UCD_CATEGORY(c);
3600              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3601                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3602              }              }
3603            break;            break;
3604    
3605            case PT_PC:            case PT_PC:
3606            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3607              {              {
3608              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3609              GETCHARINC(c, eptr);                {
3610              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3611                  MRRETURN(MATCH_NOMATCH);
3612                  }
3613                GETCHARINCTEST(c, eptr);
3614                prop_chartype = UCD_CHARTYPE(c);
3615              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3616                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3617              }              }
3618            break;            break;
3619    
3620            case PT_SC:            case PT_SC:
3621            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3622              {              {
3623              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3624              GETCHARINC(c, eptr);                {
3625              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3626                  MRRETURN(MATCH_NOMATCH);
3627                  }
3628                GETCHARINCTEST(c, eptr);
3629                prop_script = UCD_SCRIPT(c);
3630              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3631                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3632                }
3633              break;
3634    
3635              case PT_ALNUM:
3636              for (i = 1; i <= min; i++)
3637                {
3638                if (eptr >= md->end_subject)
3639                  {
3640                  SCHECK_PARTIAL();
3641                  MRRETURN(MATCH_NOMATCH);
3642                  }
3643                GETCHARINCTEST(c, eptr);
3644                prop_category = UCD_CATEGORY(c);
3645                if ((prop_category == ucp_L || prop_category == ucp_N)
3646                       == prop_fail_result)
3647                  MRRETURN(MATCH_NOMATCH);
3648              }              }
3649            break;            break;
3650    
3651              case PT_SPACE:    /* Perl space */
3652              for (i = 1; i <= min; i++)
3653                {
3654                if (eptr >= md->end_subject)
3655                  {
3656                  SCHECK_PARTIAL();
3657                  MRRETURN(MATCH_NOMATCH);
3658                  }
3659                GETCHARINCTEST(c, eptr);
3660                prop_category = UCD_CATEGORY(c);
3661                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3662                     c == CHAR_FF || c == CHAR_CR)
3663                       == prop_fail_result)
3664                  MRRETURN(MATCH_NOMATCH);
3665                }
3666              break;
3667    
3668              case PT_PXSPACE:  /* POSIX space */
3669              for (i = 1; i <= min; i++)
3670                {
3671                if (eptr >= md->end_subject)
3672                  {
3673                  SCHECK_PARTIAL();
3674                  MRRETURN(MATCH_NOMATCH);
3675                  }
3676                GETCHARINCTEST(c, eptr);
3677                prop_category = UCD_CATEGORY(c);
3678                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3679                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3680                       == prop_fail_result)
3681                  MRRETURN(MATCH_NOMATCH);
3682                }
3683              break;
3684    
3685              case PT_WORD:
3686              for (i = 1; i <= min; i++)
3687                {
3688                if (eptr >= md->end_subject)
3689                  {
3690                  SCHECK_PARTIAL();
3691                  MRRETURN(MATCH_NOMATCH);
3692                  }
3693                GETCHARINCTEST(c, eptr);
3694                prop_category = UCD_CATEGORY(c);
3695               &