/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 529 by ph10, Mon May 31 17:28:08 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 119  Returns:     nothing
119  static void  static void
120  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121  {  {
122  int c;  unsigned int c;
123  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124  while (length-- > 0)  while (length-- > 0)
125    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 133  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 150  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 186  calls by keeping local variables that ne Line 230  calls by keeping local variables that ne
230  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
263    actually used in this definition. */
264    
265  #ifndef NO_RECURSE  #ifndef NO_RECURSE
266  #define REGISTER register  #define REGISTER register
267  #ifdef DEBUG  
268  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
269    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270    { \    { \
271    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
272    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
274    }    }
275  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 278  versions and production versions. */
278    return ra; \    return ra; \
279    }    }
280  #else  #else
281  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
284  #endif  #endif
285    
286  #else  #else
287    
288    
289  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
290  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291  match(), which never changes. */  argument of match(), which never changes. */
292    
293  #define REGISTER  #define REGISTER
294    
295  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
299      {\    newframe->Xeptr = ra;\
300      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
301      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
302      newframe->Xoffset_top = rc;\    newframe->Xmarkptr = markptr;\
303      newframe->Xims = re;\    newframe->Xoffset_top = rc;\
304      newframe->Xeptrb = rf;\    newframe->Xims = re;\
305      newframe->Xflags = rg;\    newframe->Xeptrb = rf;\
306      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xflags = rg;\
307      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
308      frame = newframe;\    newframe->Xprevframe = frame;\
309      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
310      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
311      }\    goto HEAP_RECURSE;\
312    else\    L_##rw:\
313      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
314    }    }
315    
316  #define RRETURN(ra)\  #define RRETURN(ra)\
317    {\    {\
318    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
319    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
320    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
321    if (frame != NULL)\    if (frame != NULL)\
322      {\      {\
323      frame->Xresult = ra;\      rrc = ra;\
324      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
325      }\      }\
326    return ra;\    return ra;\
327    }    }
# Line 269  typedef struct heapframe { Line 334  typedef struct heapframe {
334    
335    /* Function arguments that may change */    /* Function arguments that may change */
336    
337    const uschar *Xeptr;    USPTR Xeptr;
338    const uschar *Xecode;    const uschar *Xecode;
339      USPTR Xmstart;
340      USPTR Xmarkptr;
341    int Xoffset_top;    int Xoffset_top;
342    long int Xims;    long int Xims;
343    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 279  typedef struct heapframe { Line 346  typedef struct heapframe {
346    
347    /* Function local variables */    /* Function local variables */
348    
349    const uschar *Xcallpat;    USPTR Xcallpat;
350    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
351    const uschar *Xdata;    USPTR Xcharptr;
352    const uschar *Xnext;  #endif
353    const uschar *Xpp;    USPTR Xdata;
354    const uschar *Xprev;    USPTR Xnext;
355    const uschar *Xsaved_eptr;    USPTR Xpp;
356      USPTR Xprev;
357      USPTR Xsaved_eptr;
358    
359    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
360    
361    BOOL Xcur_is_word;    BOOL Xcur_is_word;
362    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
363    BOOL Xprev_is_word;    BOOL Xprev_is_word;
364    
365    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 371  typedef struct heapframe {
371    int Xprop_category;    int Xprop_category;
372    int Xprop_chartype;    int Xprop_chartype;
373    int Xprop_script;    int Xprop_script;
374    int *Xprop_test_variable;    int Xoclength;
375      uschar Xocchars[8];
376  #endif  #endif
377    
378      int Xcodelink;
379    int Xctype;    int Xctype;
380    int Xfc;    unsigned int Xfc;
381    int Xfi;    int Xfi;
382    int Xlength;    int Xlength;
383    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 391  typedef struct heapframe {
391    
392    eptrblock Xnewptrb;    eptrblock Xnewptrb;
393    
394    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
395    
396    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
397    
398  } heapframe;  } heapframe;
399    
# Line 340  typedef struct heapframe { Line 409  typedef struct heapframe {
409  *         Match from current position            *  *         Match from current position            *
410  *************************************************/  *************************************************/
411    
412  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
413  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
414  same response.  same response. */
415    
416    /* These macros pack up tests that are used for partial matching, and which
417    appears several times in the code. We set the "hit end" flag if the pointer is
418    at the end of the subject and also past the start of the subject (i.e.
419    something has been matched). For hard partial matching, we then return
420    immediately. The second one is used when we already know we are past the end of
421    the subject. */
422    
423    #define CHECK_PARTIAL()\
424      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
425        {\
426        md->hitend = TRUE;\
427        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
428        }
429    
430    #define SCHECK_PARTIAL()\
431      if (md->partial != 0 && eptr > mstart)\
432        {\
433        md->hitend = TRUE;\
434        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
435        }
436    
437  Performance note: It might be tempting to extract commonly used fields from the  
438  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
439    the md structure (e.g. utf8, end_subject) into individual variables to improve
440  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
441  made performance worse.  made performance worse.
442    
443  Arguments:  Arguments:
444     eptr        pointer in subject     eptr        pointer to current character in subject
445     ecode       position in code     ecode       pointer to current position in compiled code
446       mstart      pointer to the current match start position (can be modified
447                     by encountering \K)
448       markptr     pointer to the most recent MARK name, or NULL
449     offset_top  current top pointer     offset_top  current top pointer
450     md          pointer to "static" info for the match     md          pointer to "static" info for the match
451     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 453  Arguments:
453                   brackets - for testing for empty matches                   brackets - for testing for empty matches
454     flags       can contain     flags       can contain
455                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
456                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
457                       group that can match an empty string
458     rdepth      the recursion depth     rdepth      the recursion depth
459    
460  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
461                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
462                   a negative MATCH_xxx value for PRUNE, SKIP, etc
463                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
464                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
465  */  */
466    
467  static int  static int
468  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
469    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
470    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
471  {  {
472  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
473  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
474  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
475    
476  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
477  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
478  register unsigned int  c;  /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
479  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
480    
481    BOOL minimize, possessive; /* Quantifier options */
482    int condcode;
483    
484  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
485  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
486  heap storage. Set up the top-level frame here; others are obtained from the  heap storage. Set up the top-level frame here; others are obtained from the
# Line 398  frame->Xprevframe = NULL;            /* Line 494  frame->Xprevframe = NULL;            /*
494    
495  frame->Xeptr = eptr;  frame->Xeptr = eptr;
496  frame->Xecode = ecode;  frame->Xecode = ecode;
497    frame->Xmstart = mstart;
498    frame->Xmarkptr = markptr;
499  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
500  frame->Xims = ims;  frame->Xims = ims;
501  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 510  HEAP_RECURSE:
510    
511  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
512  #define ecode              frame->Xecode  #define ecode              frame->Xecode
513    #define mstart             frame->Xmstart
514    #define markptr            frame->Xmarkptr
515  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
516  #define ims                frame->Xims  #define ims                frame->Xims
517  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 424  HEAP_RECURSE: Line 524  HEAP_RECURSE:
524  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
525  #endif  #endif
526  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
527    #define codelink           frame->Xcodelink
528  #define data               frame->Xdata  #define data               frame->Xdata
529  #define next               frame->Xnext  #define next               frame->Xnext
530  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 434  HEAP_RECURSE: Line 535  HEAP_RECURSE:
535    
536  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
537  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
538  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
539    
540  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 546  HEAP_RECURSE:
546  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
547  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
548  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
549  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
550    #define occhars            frame->Xocchars
551  #endif  #endif
552    
553  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 571  HEAP_RECURSE:
571  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
572  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
573    
574  #else  #else         /* NO_RECURSE not defined */
575  #define fi i  #define fi i
576  #define fc c  #define fc c
577    
# Line 489  recursion_info new_recursive;      /* wi Line 590  recursion_info new_recursive;      /* wi
590                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
591  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
592  BOOL condition;  BOOL condition;
 BOOL minimize;  
593  BOOL prev_is_word;  BOOL prev_is_word;
594    
595  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 601  int prop_fail_result;
601  int prop_category;  int prop_category;
602  int prop_chartype;  int prop_chartype;
603  int prop_script;  int prop_script;
604  int *prop_test_variable;  int oclength;
605    uschar occhars[8];
606  #endif  #endif
607    
608    int codelink;
609  int ctype;  int ctype;
610  int length;  int length;
611  int max;  int max;
# Line 516  int save_offset1, save_offset2, save_off Line 618  int save_offset1, save_offset2, save_off
618  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
619    
620  eptrblock newptrb;  eptrblock newptrb;
621  #endif  #endif     /* NO_RECURSE */
622    
623  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
624  variables. */  variables. */
# Line 524  variables. */ Line 626  variables. */
626  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
627  prop_value = 0;  prop_value = 0;
628  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
629  #endif  #endif
630    
631    
632  /* This label is used for tail recursion, which is used in a few cases even  /* This label is used for tail recursion, which is used in a few cases even
633  when NO_RECURSE is not defined, in order to reduce the amount of stack that is  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
634  used. Thanks to Ian Taylor for noticing this possibility and sending the  used. Thanks to Ian Taylor for noticing this possibility and sending the
# Line 537  TAIL_RECURSE: Line 639  TAIL_RECURSE:
639  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
640  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
641  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
642  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
643  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
644  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
645  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
646    
647    #ifdef SUPPORT_UTF8
648    utf8 = md->utf8;       /* Local copy of the flag */
649    #else
650    utf8 = FALSE;
651    #endif
652    
653  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
654  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
655    
# Line 550  if (rdepth >= md->match_limit_recursion) Line 658  if (rdepth >= md->match_limit_recursion)
658    
659  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
660    
661  #ifdef SUPPORT_UTF8  /* At the start of a group with an unlimited repeat that may match an empty
662  utf8 = md->utf8;       /* Local copy of the flag */  string, the match_cbegroup flag is set. When this is the case, add the current
663  #else  subject pointer to the chain of such remembered pointers, to be checked when we
664  utf8 = FALSE;  hit the closing ket, in order to break infinite loops that match no characters.
665  #endif  When match() is called in other circumstances, don't add to the chain. The
666    match_cbegroup flag must NOT be used with tail recursion, because the memory
667  /* At the start of a bracketed group, add the current subject pointer to the  block that is used is on the stack, so a new one may be required for each
668  stack of such pointers, to be re-instated at the end of the group when we hit  match(). */
 the closing ket. When match() is called in other circumstances, we don't add to  
 this stack. */  
669    
670  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
671    {    {
   newptrb.epb_prev = eptrb;  
672    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
673      newptrb.epb_prev = eptrb;
674    eptrb = &newptrb;    eptrb = &newptrb;
675    }    }
676    
677  /* Now start processing the operations. */  /* Now start processing the opcodes. */
678    
679  for (;;)  for (;;)
680    {    {
681      minimize = possessive = FALSE;
682    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
683    
684    if (md->partial &&    switch(op)
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
685      {      {
686      number = op - OP_BRA;      case OP_MARK:
687        markptr = ecode + 2;
688      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
689      number from a dummy opcode at the start. */        ims, eptrb, flags, RM55);
690    
691      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
692        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
693        argument. It is passed back in md->start_match_ptr (an overloading of that
694        variable). If it does match, we reset that variable to the current subject
695        position and return MATCH_SKIP. Otherwise, pass back the return code
696        unaltered. */
697    
698        if (rrc == MATCH_SKIP_ARG &&
699            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
700          {
701          md->start_match_ptr = eptr;
702          RRETURN(MATCH_SKIP);
703          }
704    
705        if (md->mark == NULL) md->mark = markptr;
706        RRETURN(rrc);
707    
708        case OP_FAIL:
709        MRRETURN(MATCH_NOMATCH);
710    
711        case OP_COMMIT:
712        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
713          ims, eptrb, flags, RM52);
714        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
715        MRRETURN(MATCH_COMMIT);
716    
717        case OP_PRUNE:
718        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
719          ims, eptrb, flags, RM51);
720        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
721        MRRETURN(MATCH_PRUNE);
722    
723        case OP_PRUNE_ARG:
724        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
725          ims, eptrb, flags, RM56);
726        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
727        md->mark = ecode + 2;
728        RRETURN(MATCH_PRUNE);
729    
730        case OP_SKIP:
731        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
732          ims, eptrb, flags, RM53);
733        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
734        md->start_match_ptr = eptr;   /* Pass back current position */
735        MRRETURN(MATCH_SKIP);
736    
737        case OP_SKIP_ARG:
738        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
739          ims, eptrb, flags, RM57);
740        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
741    
742        /* Pass back the current skip name by overloading md->start_match_ptr and
743        returning the special MATCH_SKIP_ARG return code. This will either be
744        caught by a matching MARK, or get to the top, where it is treated the same
745        as PRUNE. */
746    
747        md->start_match_ptr = ecode + 2;
748        RRETURN(MATCH_SKIP_ARG);
749    
750        case OP_THEN:
751        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
752          ims, eptrb, flags, RM54);
753        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
754        MRRETURN(MATCH_THEN);
755    
756        case OP_THEN_ARG:
757        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
758          ims, eptrb, flags, RM58);
759        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
760        md->mark = ecode + 2;
761        RRETURN(MATCH_THEN);
762    
763        /* Handle a capturing bracket. If there is space in the offset vector, save
764        the current subject position in the working slot at the top of the vector.
765        We mustn't change the current values of the data slot, because they may be
766        set from a previous iteration of this group, and be referred to by a
767        reference inside the group.
768    
769        If the bracket fails to match, we need to restore this value and also the
770        values of the final offsets, in case they were set by a previous iteration
771        of the same bracket.
772    
773        If there isn't enough space in the offset vector, treat this as if it were
774        a non-capturing bracket. Don't worry about setting the flag for the error
775        case here; that is handled in the code for KET. */
776    
777        case OP_CBRA:
778        case OP_SCBRA:
779        number = GET2(ecode, 1+LINK_SIZE);
780      offset = number << 1;      offset = number << 1;
781    
782  #ifdef DEBUG  #ifdef PCRE_DEBUG
783      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
784        printf("subject=");
785      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
786      printf("\n");      printf("\n");
787  #endif  #endif
# Line 624  for (;;) Line 796  for (;;)
796        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
797        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
798    
799          flags = (op == OP_SCBRA)? match_cbegroup : 0;
800        do        do
801          {          {
802          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
803            match_isgroup);            ims, eptrb, flags, RM1);
804          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
805          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
806          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
807          }          }
# Line 640  for (;;) Line 813  for (;;)
813        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
814        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
815    
816          if (rrc != MATCH_THEN) md->mark = markptr;
817        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
818        }        }
819    
820      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
821        as a non-capturing bracket. */
822    
823      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
824      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   
   /* Other types of node can be handled by a switch */  
   
   switch(op)  
     {  
     case OP_BRA:     /* Non-capturing bracket: optimized */  
     DPRINTF(("start bracket 0\n"));  
825    
826      /* Loop for all the alternatives */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
827    
828        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
829        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
830    
831        /* Non-capturing bracket. Loop for all the alternatives. When we get to the
832        final alternative within the brackets, we would return the result of a
833        recursive call to match() whatever happened. We can reduce stack usage by
834        turning this into a tail recursion, except in the case when match_cbegroup
835        is set.*/
836    
837        case OP_BRA:
838        case OP_SBRA:
839        DPRINTF(("start non-capturing bracket\n"));
840        flags = (op >= OP_SBRA)? match_cbegroup : 0;
841      for (;;)      for (;;)
842        {        {
843        /* When we get to the final alternative within the brackets, we would        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
844        return the result of a recursive call to match() whatever happened. We          {
845        can reduce stack usage by turning this into a tail recursion. */          if (flags == 0)    /* Not a possibly empty group */
846              {
847        if (ecode[GET(ecode, 1)] != OP_ALT)            ecode += _pcre_OP_lengths[*ecode];
848         {            DPRINTF(("bracket 0 tail recursion\n"));
849         ecode += 1 + LINK_SIZE;            goto TAIL_RECURSE;
850         flags = match_isgroup;            }
851         DPRINTF(("bracket 0 tail recursion\n"));  
852         goto TAIL_RECURSE;          /* Possibly empty group; can't use tail recursion. */
853         }  
854            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
855              eptrb, flags, RM48);
856            if (rrc == MATCH_NOMATCH) md->mark = markptr;
857            RRETURN(rrc);
858            }
859    
860        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
861        otherwise return. */        otherwise return. */
862    
863        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
864          match_isgroup);          eptrb, flags, RM2);
865        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
866        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
867        }        }
868      /* Control never reaches here. */      /* Control never reaches here. */
# Line 688  for (;;) Line 874  for (;;)
874      obeyed, we can use tail recursion to avoid using another stack frame. */      obeyed, we can use tail recursion to avoid using another stack frame. */
875    
876      case OP_COND:      case OP_COND:
877      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
878        codelink= GET(ecode, 1);
879    
880        /* Because of the way auto-callout works during compile, a callout item is
881        inserted between OP_COND and an assertion condition. */
882    
883        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
884          {
885          if (pcre_callout != NULL)
886            {
887            pcre_callout_block cb;
888            cb.version          = 1;   /* Version 1 of the callout block */
889            cb.callout_number   = ecode[LINK_SIZE+2];
890            cb.offset_vector    = md->offset_vector;
891            cb.subject          = (PCRE_SPTR)md->start_subject;
892            cb.subject_length   = md->end_subject - md->start_subject;
893            cb.start_match      = mstart - md->start_subject;
894            cb.current_position = eptr - md->start_subject;
895            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
896            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
897            cb.capture_top      = offset_top/2;
898            cb.capture_last     = md->capture_last;
899            cb.callout_data     = md->callout_data;
900            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
901            if (rrc < 0) RRETURN(rrc);
902            }
903          ecode += _pcre_OP_lengths[OP_CALLOUT];
904          }
905    
906        condcode = ecode[LINK_SIZE+1];
907    
908        /* Now see what the actual condition is */
909    
910        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
911          {
912          if (md->recursive == NULL)                /* Not recursing => FALSE */
913            {
914            condition = FALSE;
915            ecode += GET(ecode, 1);
916            }
917          else
918            {
919            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
920            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
921    
922            /* If the test is for recursion into a specific subpattern, and it is
923            false, but the test was set up by name, scan the table to see if the
924            name refers to any other numbers, and test them. The condition is true
925            if any one is set. */
926    
927            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
928              {
929              uschar *slotA = md->name_table;
930              for (i = 0; i < md->name_count; i++)
931                {
932                if (GET2(slotA, 0) == recno) break;
933                slotA += md->name_entry_size;
934                }
935    
936              /* Found a name for the number - there can be only one; duplicate
937              names for different numbers are allowed, but not vice versa. First
938              scan down for duplicates. */
939    
940              if (i < md->name_count)
941                {
942                uschar *slotB = slotA;
943                while (slotB > md->name_table)
944                  {
945                  slotB -= md->name_entry_size;
946                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
947                    {
948                    condition = GET2(slotB, 0) == md->recursive->group_num;
949                    if (condition) break;
950                    }
951                  else break;
952                  }
953    
954                /* Scan up for duplicates */
955    
956                if (!condition)
957                  {
958                  slotB = slotA;
959                  for (i++; i < md->name_count; i++)
960                    {
961                    slotB += md->name_entry_size;
962                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
963                      {
964                      condition = GET2(slotB, 0) == md->recursive->group_num;
965                      if (condition) break;
966                      }
967                    else break;
968                    }
969                  }
970                }
971              }
972    
973            /* Chose branch according to the condition */
974    
975            ecode += condition? 3 : GET(ecode, 1);
976            }
977          }
978    
979        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
980        {        {
981        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
982        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
983          (md->recursive != NULL) :  
984          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
985        ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));        scan the table to see if the name refers to any other numbers, and test
986        flags = match_isgroup;        them. The condition is true if any one is set. This is tediously similar
987        goto TAIL_RECURSE;        to the code above, but not close enough to try to amalgamate. */
988    
989          if (!condition && condcode == OP_NCREF)
990            {
991            int refno = offset >> 1;
992            uschar *slotA = md->name_table;
993    
994            for (i = 0; i < md->name_count; i++)
995              {
996              if (GET2(slotA, 0) == refno) break;
997              slotA += md->name_entry_size;
998              }
999    
1000            /* Found a name for the number - there can be only one; duplicate names
1001            for different numbers are allowed, but not vice versa. First scan down
1002            for duplicates. */
1003    
1004            if (i < md->name_count)
1005              {
1006              uschar *slotB = slotA;
1007              while (slotB > md->name_table)
1008                {
1009                slotB -= md->name_entry_size;
1010                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1011                  {
1012                  offset = GET2(slotB, 0) << 1;
1013                  condition = offset < offset_top &&
1014                    md->offset_vector[offset] >= 0;
1015                  if (condition) break;
1016                  }
1017                else break;
1018                }
1019    
1020              /* Scan up for duplicates */
1021    
1022              if (!condition)
1023                {
1024                slotB = slotA;
1025                for (i++; i < md->name_count; i++)
1026                  {
1027                  slotB += md->name_entry_size;
1028                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1029                    {
1030                    offset = GET2(slotB, 0) << 1;
1031                    condition = offset < offset_top &&
1032                      md->offset_vector[offset] >= 0;
1033                    if (condition) break;
1034                    }
1035                  else break;
1036                  }
1037                }
1038              }
1039            }
1040    
1041          /* Chose branch according to the condition */
1042    
1043          ecode += condition? 3 : GET(ecode, 1);
1044          }
1045    
1046        else if (condcode == OP_DEF)     /* DEFINE - always false */
1047          {
1048          condition = FALSE;
1049          ecode += GET(ecode, 1);
1050        }        }
1051    
1052      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1053      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1054        assertion. */
1055    
1056      else      else
1057        {        {
1058        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1059            match_condassert | match_isgroup);            match_condassert, RM3);
1060        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1061          {          {
1062          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1063            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1064          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1065          }          }
1066        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1067          {          {
1068          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1069          }          }
1070        else ecode += GET(ecode, 1);        else
1071            {
1072            condition = FALSE;
1073            ecode += codelink;
1074            }
1075          }
1076    
1077        /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1078        we can use tail recursion to avoid using another stack frame. */      we can use tail recursion to avoid using another stack frame, except when
1079        match_cbegroup is required for an unlimited repeat of a possibly empty
1080        group. If the second alternative doesn't exist, we can just plough on. */
1081    
1082        if (condition || *ecode == OP_ALT)
1083          {
1084        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1085        flags = match_isgroup;        if (op == OP_SCOND)        /* Possibly empty group */
1086        goto TAIL_RECURSE;          {
1087            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1088            RRETURN(rrc);
1089            }
1090          else                       /* Group must match something */
1091            {
1092            flags = 0;
1093            goto TAIL_RECURSE;
1094            }
1095        }        }
1096      /* Control never reaches here */      else                         /* Condition false & no alternative */
1097          {
1098          ecode += 1 + LINK_SIZE;
1099          }
1100        break;
1101    
1102    
1103        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1104        to close any currently open capturing brackets. */
1105    
1106        case OP_CLOSE:
1107        number = GET2(ecode, 1);
1108        offset = number << 1;
1109    
1110      /* Skip over conditional reference or large extraction number data if  #ifdef PCRE_DEBUG
1111      encountered. */        printf("end bracket %d at *ACCEPT", number);
1112          printf("\n");
1113    #endif
1114    
1115      case OP_CREF:      md->capture_last = number;
1116      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1117          {
1118          md->offset_vector[offset] =
1119            md->offset_vector[md->offset_end - number];
1120          md->offset_vector[offset+1] = eptr - md->start_subject;
1121          if (offset_top <= offset) offset_top = offset + 2;
1122          }
1123      ecode += 3;      ecode += 3;
1124      break;      break;
1125    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1126    
1127        /* End of the pattern, either real or forced. If we are in a top-level
1128        recursion, we should restore the offsets appropriately and continue from
1129        after the call. */
1130    
1131        case OP_ACCEPT:
1132      case OP_END:      case OP_END:
1133      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1134        {        {
# Line 745  for (;;) Line 1137  for (;;)
1137        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1138        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1139          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1140        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1141        ims = original_ims;        ims = original_ims;
1142        ecode = rec->after_call;        ecode = rec->after_call;
1143        break;        break;
1144        }        }
1145    
1146      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1147      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1148        the subject. In both cases, backtracking will then try other alternatives,
1149        if any. */
1150    
1151        if (eptr == mstart &&
1152            (md->notempty ||
1153              (md->notempty_atstart &&
1154                mstart == md->start_subject + md->start_offset)))
1155          MRRETURN(MATCH_NOMATCH);
1156    
1157        /* Otherwise, we have a match. */
1158    
1159        md->end_match_ptr = eptr;           /* Record where we ended */
1160        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1161        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1162    
1163      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      /* For some reason, the macros don't work properly if an expression is
1164      md->end_match_ptr = eptr;          /* Record where we ended */      given as the argument to MRRETURN when the heap is in use. */
1165      md->end_offset_top = offset_top;   /* and how many extracts were taken */  
1166      RRETURN(MATCH_MATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1167        MRRETURN(rrc);
1168    
1169      /* Change option settings */      /* Change option settings */
1170    
# Line 777  for (;;) Line 1184  for (;;)
1184      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1185      do      do
1186        {        {
1187        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1188          match_isgroup);          RM4);
1189        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1190        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1191            mstart = md->start_match_ptr;   /* In case \K reset it */
1192            break;
1193            }
1194          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1195        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1196        }        }
1197      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1198      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1199    
1200      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1201    
# Line 798  for (;;) Line 1209  for (;;)
1209      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1210      continue;      continue;
1211    
1212      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1213        PRUNE, or COMMIT means we must assume failure without checking subsequent
1214        branches. */
1215    
1216      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1217      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1218      do      do
1219        {        {
1220        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1221          match_isgroup);          RM5);
1222        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1223        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1224            {
1225            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1226            break;
1227            }
1228          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1229        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1230        }        }
1231      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 826  for (;;) Line 1244  for (;;)
1244  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1245      if (utf8)      if (utf8)
1246        {        {
1247        c = GET(ecode,1);        i = GET(ecode, 1);
1248        for (i = 0; i < c; i++)        while (i-- > 0)
1249          {          {
1250          eptr--;          eptr--;
1251          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1252          BACKCHAR(eptr)          BACKCHAR(eptr);
1253          }          }
1254        }        }
1255      else      else
# Line 840  for (;;) Line 1258  for (;;)
1258      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1259    
1260        {        {
1261        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1262        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1263        }        }
1264    
1265      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1266    
1267        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1268      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1269      break;      break;
1270    
# Line 862  for (;;) Line 1281  for (;;)
1281        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1282        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1283        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1284        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1285        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1286        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1287        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1288        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1289        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1290        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1291        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1292        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1293        }        }
1294      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 897  for (;;) Line 1316  for (;;)
1316      case OP_RECURSE:      case OP_RECURSE:
1317        {        {
1318        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1319        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1320            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1321    
1322        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1323    
# Line 929  for (;;) Line 1343  for (;;)
1343    
1344        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1345              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1346        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1347    
1348        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1349        restore the offset and recursion data. */        restore the offset and recursion data. */
1350    
1351        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1352          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1353        do        do
1354          {          {
1355          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1356              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1357          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1358            {            {
1359            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1360            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1361            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1362              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1363            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1364            }            }
1365          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1366            {            {
1367            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1368              if (new_recursive.offset_save != stacksave)
1369                (pcre_free)(new_recursive.offset_save);
1370            RRETURN(rrc);            RRETURN(rrc);
1371            }            }
1372    
# Line 965  for (;;) Line 1381  for (;;)
1381        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1382        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1383          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1384        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1385        }        }
1386      /* Control never reaches here */      /* Control never reaches here */
1387    
# Line 974  for (;;) Line 1390  for (;;)
1390      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1391      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1392      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1393      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1394        the start-of-match value in case it was changed by \K. */
1395    
1396      case OP_ONCE:      case OP_ONCE:
1397      prev = ecode;      prev = ecode;
# Line 982  for (;;) Line 1399  for (;;)
1399    
1400      do      do
1401        {        {
1402        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1403          eptrb, match_isgroup);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1404        if (rrc == MATCH_MATCH) break;          {
1405        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1406            break;
1407            }
1408          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1409        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1410        }        }
1411      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 997  for (;;) Line 1417  for (;;)
1417      /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1418      mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1419    
1420      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1421    
1422      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1423      eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
# Line 1028  for (;;) Line 1448  for (;;)
1448    
1449      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1450        {        {
1451        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1452        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1453        ecode = prev;        ecode = prev;
1454        flags = match_isgroup;        flags = 0;
1455        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1456        }        }
1457      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1458        {        {
1459        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1460        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1461        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1462        flags = 0;        flags = 0;
# Line 1051  for (;;) Line 1471  for (;;)
1471      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1472      break;      break;
1473    
1474      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1475      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1476      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1477      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1478      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1479    
1480      case OP_BRAZERO:      case OP_BRAZERO:
1481        {        {
1482        next = ecode+1;        next = ecode+1;
1483        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1484        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1485        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1486        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1487        }        }
1488      break;      break;
1489    
1490      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1491        {        {
1492        next = ecode+1;        next = ecode+1;
1493        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1494        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1495        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1496        ecode++;        ecode++;
1497        }        }
1498      break;      break;
1499    
1500      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1501      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1502      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1503      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1504          ecode = next + 1 + LINK_SIZE;
1505          }
1506        break;
1507    
1508        /* End of a group, repeated or non-repeating. */
1509    
1510      case OP_KET:      case OP_KET:
1511      case OP_KETRMIN:      case OP_KETRMIN:
1512      case OP_KETRMAX:      case OP_KETRMAX:
1513      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
     saved_eptr = eptrb->epb_saved_eptr;  
1514    
1515      /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1516        infinite repeats of empty string matches, retrieve the subject start from
1517        the chain. Otherwise, set it NULL. */
1518    
1519        if (*prev >= OP_SBRA)
1520          {
1521          saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1522          eptrb = eptrb->epb_prev;              /* Backup to previous group */
1523          }
1524        else saved_eptr = NULL;
1525    
1526      eptrb = eptrb->epb_prev;      /* If we are at the end of an assertion group or an atomic group, stop
1527        matching and return MATCH_MATCH, but record the current high water mark for
1528        use by positive assertions. We also need to record the match start in case
1529        it was changed by \K. */
1530    
1531      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1532          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1099  for (;;) Line 1534  for (;;)
1534        {        {
1535        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1536        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1537        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1538          MRRETURN(MATCH_MATCH);
1539        }        }
1540    
1541      /* In all other cases except a conditional group we have to check the      /* For capturing groups we have to check the group number back at the start
1542      group number back at the start and if necessary complete handling an      and if necessary complete handling an extraction by setting the offsets and
1543      extraction by setting the offsets and bumping the high water mark. */      bumping the high water mark. Note that whole-pattern recursion is coded as
1544        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1545        when the OP_END is reached. Other recursion is handled here. */
1546    
1547      if (*prev != OP_COND)      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1548        {        {
1549        number = *prev - OP_BRA;        number = GET2(prev, 1+LINK_SIZE);
   
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);  
1550        offset = number << 1;        offset = number << 1;
1551    
1552  #ifdef DEBUG  #ifdef PCRE_DEBUG
1553        printf("end bracket %d", number);        printf("end bracket %d", number);
1554        printf("\n");        printf("\n");
1555  #endif  #endif
1556    
1557        /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1558        of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       into group 0, so it won't be picked up here. Instead, we catch it when  
       the OP_END is reached. */  
   
       if (number > 0)  
1559          {          {
1560          md->capture_last = number;          md->offset_vector[offset] =
1561          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            md->offset_vector[md->offset_end - number];
1562            {          md->offset_vector[offset+1] = eptr - md->start_subject;
1563            md->offset_vector[offset] =          if (offset_top <= offset) offset_top = offset + 2;
1564              md->offset_vector[md->offset_end - number];          }
1565            md->offset_vector[offset+1] = eptr - md->start_subject;  
1566            if (offset_top <= offset) offset_top = offset + 2;        /* Handle a recursively called group. Restore the offsets
1567            }        appropriately and continue from after the call. */
1568    
1569          /* Handle a recursively called group. Restore the offsets        if (md->recursive != NULL && md->recursive->group_num == number)
1570          appropriately and continue from after the call. */          {
1571            recursion_info *rec = md->recursive;
1572          if (md->recursive != NULL && md->recursive->group_num == number)          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1573            {          md->recursive = rec->prevrec;
1574            recursion_info *rec = md->recursive;          memcpy(md->offset_vector, rec->offset_save,
1575            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));            rec->saved_max * sizeof(int));
1576            md->recursive = rec->prevrec;          offset_top = rec->save_offset_top;
1577            md->start_match = rec->save_start;          ecode = rec->after_call;
1578            memcpy(md->offset_vector, rec->offset_save,          ims = original_ims;
1579              rec->saved_max * sizeof(int));          break;
           ecode = rec->after_call;  
           ims = original_ims;  
           break;  
           }  
1580          }          }
1581        }        }
1582    
1583      /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1584      the group. */      flags, in case they got changed during the group. */
1585    
1586      ims = original_ims;      ims = original_ims;
1587      DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
# Line 1175  for (;;) Line 1600  for (;;)
1600    
1601      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1602      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1603      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1604        unlimited repeat of a group that can match an empty string. */
1605    
1606        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1607    
1608      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1609        {        {
1610        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1611        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1612          if (flags != 0)    /* Could match an empty string */
1613            {
1614            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1615            RRETURN(rrc);
1616            }
1617        ecode = prev;        ecode = prev;
       flags = match_isgroup;  
1618        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1619        }        }
1620      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1621        {        {
1622        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1623        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1624        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1625        flags = 0;        flags = 0;
# Line 1198  for (;;) Line 1630  for (;;)
1630      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1631    
1632      case OP_CIRC:      case OP_CIRC:
1633      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1634      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1635        {        {
1636        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1637            (eptr == md->end_subject ||            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1638             eptr < md->start_subject + md->nllen ||          MRRETURN(MATCH_NOMATCH);
            !IS_NEWLINE(eptr - md->nllen)))  
         RRETURN(MATCH_NOMATCH);  
1639        ecode++;        ecode++;
1640        break;        break;
1641        }        }
# Line 1214  for (;;) Line 1644  for (;;)
1644      /* Start of subject assertion */      /* Start of subject assertion */
1645    
1646      case OP_SOD:      case OP_SOD:
1647      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1648      ecode++;      ecode++;
1649      break;      break;
1650    
1651      /* Start of match assertion */      /* Start of match assertion */
1652    
1653      case OP_SOM:      case OP_SOM:
1654      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1655        ecode++;
1656        break;
1657    
1658        /* Reset the start of match point */
1659    
1660        case OP_SET_SOM:
1661        mstart = eptr;
1662      ecode++;      ecode++;
1663      break;      break;
1664    
# Line 1232  for (;;) Line 1669  for (;;)
1669      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1670        {        {
1671        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1672          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1673        else        else
1674          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1675        ecode++;        ecode++;
1676        break;        break;
1677        }        }
1678      else      else
1679        {        {
1680        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1681        if (!md->endonly)        if (!md->endonly)
1682          {          {
1683          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1684              (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1685            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1686          ecode++;          ecode++;
1687          break;          break;
1688          }          }
# Line 1255  for (;;) Line 1692  for (;;)
1692      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1693    
1694      case OP_EOD:      case OP_EOD:
1695      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1696      ecode++;      ecode++;
1697      break;      break;
1698    
# Line 1263  for (;;) Line 1700  for (;;)
1700    
1701      case OP_EODN:      case OP_EODN:
1702      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1703          (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1704        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1705      ecode++;      ecode++;
1706      break;      break;
1707    
# Line 1276  for (;;) Line 1713  for (;;)
1713    
1714        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1715        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1716        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1717          partial matching. */
1718    
1719  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1720        if (utf8)        if (utf8)
1721          {          {
1722            /* Get status of previous character */
1723    
1724          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1725            {            {
1726            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1727            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1728              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1729            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1730    #ifdef SUPPORT_UCP
1731              if (md->use_ucp)
1732                {
1733                if (c == '_') prev_is_word = TRUE; else
1734                  {
1735                  int cat = UCD_CATEGORY(c);
1736                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1737                  }
1738                }
1739              else
1740    #endif
1741            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1742            }            }
1743          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1744            /* Get status of next character */
1745    
1746            if (eptr >= md->end_subject)
1747              {
1748              SCHECK_PARTIAL();
1749              cur_is_word = FALSE;
1750              }
1751            else
1752            {            {
1753            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1754    #ifdef SUPPORT_UCP
1755              if (md->use_ucp)
1756                {
1757                if (c == '_') cur_is_word = TRUE; else
1758                  {
1759                  int cat = UCD_CATEGORY(c);
1760                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1761                  }
1762                }
1763              else
1764    #endif
1765            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1766            }            }
1767          }          }
1768        else        else
1769  #endif  #endif
1770    
1771        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1772          consistency with the behaviour of \w we do use it in this case. */
1773    
1774          {          {
1775          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
           ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
         cur_is_word = (eptr < md->end_subject) &&  
           ((md->ctypes[*eptr] & ctype_word) != 0);  
         }  
1776    
1777        /* Now see if the situation is what we want */          if (eptr == md->start_subject) prev_is_word = FALSE; else
1778              {
1779              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1780    #ifdef SUPPORT_UCP
1781              if (md->use_ucp)
1782                {
1783                c = eptr[-1];
1784                if (c == '_') prev_is_word = TRUE; else
1785                  {
1786                  int cat = UCD_CATEGORY(c);
1787                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1788                  }
1789                }
1790              else
1791    #endif
1792              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1793              }
1794    
1795        if ((*ecode++ == OP_WORD_BOUNDARY)?          /* Get status of next character */
            cur_is_word == prev_is_word : cur_is_word != prev_is_word)  
         RRETURN(MATCH_NOMATCH);  
       }  
     break;  
1796    
1797      /* Match a single character type; inline for speed */          if (eptr >= md->end_subject)
1798              {
1799              SCHECK_PARTIAL();
1800              cur_is_word = FALSE;
1801              }
1802            else
1803    #ifdef SUPPORT_UCP
1804            if (md->use_ucp)
1805              {
1806              c = *eptr;
1807              if (c == '_') cur_is_word = TRUE; else
1808                {
1809                int cat = UCD_CATEGORY(c);
1810                cur_is_word = (cat == ucp_L || cat == ucp_N);
1811                }
1812              }
1813            else
1814    #endif
1815            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1816            }
1817    
1818          /* Now see if the situation is what we want */
1819    
1820          if ((*ecode++ == OP_WORD_BOUNDARY)?
1821               cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1822            MRRETURN(MATCH_NOMATCH);
1823          }
1824        break;
1825    
1826        /* Match a single character type; inline for speed */
1827    
1828      case OP_ANY:      case OP_ANY:
1829      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1830        /* Fall through */
1831    
1832        case OP_ALLANY:
1833        if (eptr++ >= md->end_subject)
1834        {        {
1835        if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))        SCHECK_PARTIAL();
1836          RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1837        }        }
1838      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1839      ecode++;      ecode++;
1840      break;      break;
1841    
# Line 1332  for (;;) Line 1843  for (;;)
1843      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1844    
1845      case OP_ANYBYTE:      case OP_ANYBYTE:
1846      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1847          {
1848          SCHECK_PARTIAL();
1849          MRRETURN(MATCH_NOMATCH);
1850          }
1851      ecode++;      ecode++;
1852      break;      break;
1853    
1854      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1855      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1856          {
1857          SCHECK_PARTIAL();
1858          MRRETURN(MATCH_NOMATCH);
1859          }
1860      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1861      if (      if (
1862  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1345  for (;;) Line 1864  for (;;)
1864  #endif  #endif
1865         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1866         )         )
1867        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1868      ecode++;      ecode++;
1869      break;      break;
1870    
1871      case OP_DIGIT:      case OP_DIGIT:
1872      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1873          {
1874          SCHECK_PARTIAL();
1875          MRRETURN(MATCH_NOMATCH);
1876          }
1877      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1878      if (      if (
1879  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1358  for (;;) Line 1881  for (;;)
1881  #endif  #endif
1882         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1883         )         )
1884        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1885      ecode++;      ecode++;
1886      break;      break;
1887    
1888      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1889      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1890          {
1891          SCHECK_PARTIAL();
1892          MRRETURN(MATCH_NOMATCH);
1893          }
1894      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1895      if (      if (
1896  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1371  for (;;) Line 1898  for (;;)
1898  #endif  #endif
1899         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1900         )         )
1901        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1902      ecode++;      ecode++;
1903      break;      break;
1904    
1905      case OP_WHITESPACE:      case OP_WHITESPACE:
1906      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1907          {
1908          SCHECK_PARTIAL();
1909          MRRETURN(MATCH_NOMATCH);
1910          }
1911      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1912      if (      if (
1913  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1384  for (;;) Line 1915  for (;;)
1915  #endif  #endif
1916         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1917         )         )
1918        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1919      ecode++;      ecode++;
1920      break;      break;
1921    
1922      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1923      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1924          {
1925          SCHECK_PARTIAL();
1926          MRRETURN(MATCH_NOMATCH);
1927          }
1928      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1929      if (      if (
1930  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1397  for (;;) Line 1932  for (;;)
1932  #endif  #endif
1933         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1934         )         )
1935        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1936      ecode++;      ecode++;
1937      break;      break;
1938    
1939      case OP_WORDCHAR:      case OP_WORDCHAR:
1940      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1941          {
1942          SCHECK_PARTIAL();
1943          MRRETURN(MATCH_NOMATCH);
1944          }
1945      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1946      if (      if (
1947  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1410  for (;;) Line 1949  for (;;)
1949  #endif  #endif
1950         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1951         )         )
1952        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1953        ecode++;
1954        break;
1955    
1956        case OP_ANYNL:
1957        if (eptr >= md->end_subject)
1958          {
1959          SCHECK_PARTIAL();
1960          MRRETURN(MATCH_NOMATCH);
1961          }
1962        GETCHARINCTEST(c, eptr);
1963        switch(c)
1964          {
1965          default: MRRETURN(MATCH_NOMATCH);
1966          case 0x000d:
1967          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1968          break;
1969    
1970          case 0x000a:
1971          break;
1972    
1973          case 0x000b:
1974          case 0x000c:
1975          case 0x0085:
1976          case 0x2028:
1977          case 0x2029:
1978          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1979          break;
1980          }
1981        ecode++;
1982        break;
1983    
1984        case OP_NOT_HSPACE:
1985        if (eptr >= md->end_subject)
1986          {
1987          SCHECK_PARTIAL();
1988          MRRETURN(MATCH_NOMATCH);
1989          }
1990        GETCHARINCTEST(c, eptr);
1991        switch(c)
1992          {
1993          default: break;
1994          case 0x09:      /* HT */
1995          case 0x20:      /* SPACE */
1996          case 0xa0:      /* NBSP */
1997          case 0x1680:    /* OGHAM SPACE MARK */
1998          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1999          case 0x2000:    /* EN QUAD */
2000          case 0x2001:    /* EM QUAD */
2001          case 0x2002:    /* EN SPACE */
2002          case 0x2003:    /* EM SPACE */
2003          case 0x2004:    /* THREE-PER-EM SPACE */
2004          case 0x2005:    /* FOUR-PER-EM SPACE */
2005          case 0x2006:    /* SIX-PER-EM SPACE */
2006          case 0x2007:    /* FIGURE SPACE */
2007          case 0x2008:    /* PUNCTUATION SPACE */
2008          case 0x2009:    /* THIN SPACE */
2009          case 0x200A:    /* HAIR SPACE */
2010          case 0x202f:    /* NARROW NO-BREAK SPACE */
2011          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2012          case 0x3000:    /* IDEOGRAPHIC SPACE */
2013          MRRETURN(MATCH_NOMATCH);
2014          }
2015        ecode++;
2016        break;
2017    
2018        case OP_HSPACE:
2019        if (eptr >= md->end_subject)
2020          {
2021          SCHECK_PARTIAL();
2022          MRRETURN(MATCH_NOMATCH);
2023          }
2024        GETCHARINCTEST(c, eptr);
2025        switch(c)
2026          {
2027          default: MRRETURN(MATCH_NOMATCH);
2028          case 0x09:      /* HT */
2029          case 0x20:      /* SPACE */
2030          case 0xa0:      /* NBSP */
2031          case 0x1680:    /* OGHAM SPACE MARK */
2032          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2033          case 0x2000:    /* EN QUAD */
2034          case 0x2001:    /* EM QUAD */
2035          case 0x2002:    /* EN SPACE */
2036          case 0x2003:    /* EM SPACE */
2037          case 0x2004:    /* THREE-PER-EM SPACE */
2038          case 0x2005:    /* FOUR-PER-EM SPACE */
2039          case 0x2006:    /* SIX-PER-EM SPACE */
2040          case 0x2007:    /* FIGURE SPACE */
2041          case 0x2008:    /* PUNCTUATION SPACE */
2042          case 0x2009:    /* THIN SPACE */
2043          case 0x200A:    /* HAIR SPACE */
2044          case 0x202f:    /* NARROW NO-BREAK SPACE */
2045          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2046          case 0x3000:    /* IDEOGRAPHIC SPACE */
2047          break;
2048          }
2049        ecode++;
2050        break;
2051    
2052        case OP_NOT_VSPACE:
2053        if (eptr >= md->end_subject)
2054          {
2055          SCHECK_PARTIAL();
2056          MRRETURN(MATCH_NOMATCH);
2057          }
2058        GETCHARINCTEST(c, eptr);
2059        switch(c)
2060          {
2061          default: break;
2062          case 0x0a:      /* LF */
2063          case 0x0b:      /* VT */
2064          case 0x0c:      /* FF */
2065          case 0x0d:      /* CR */
2066          case 0x85:      /* NEL */
2067          case 0x2028:    /* LINE SEPARATOR */
2068          case 0x2029:    /* PARAGRAPH SEPARATOR */
2069          MRRETURN(MATCH_NOMATCH);
2070          }
2071        ecode++;
2072        break;
2073    
2074        case OP_VSPACE:
2075        if (eptr >= md->end_subject)
2076          {
2077          SCHECK_PARTIAL();
2078          MRRETURN(MATCH_NOMATCH);
2079          }
2080        GETCHARINCTEST(c, eptr);
2081        switch(c)
2082          {
2083          default: MRRETURN(MATCH_NOMATCH);
2084          case 0x0a:      /* LF */
2085          case 0x0b:      /* VT */
2086          case 0x0c:      /* FF */
2087          case 0x0d:      /* CR */
2088          case 0x85:      /* NEL */
2089          case 0x2028:    /* LINE SEPARATOR */
2090          case 0x2029:    /* PARAGRAPH SEPARATOR */
2091          break;
2092          }
2093      ecode++;      ecode++;
2094      break;      break;
2095    
# Line 1420  for (;;) Line 2099  for (;;)
2099    
2100      case OP_PROP:      case OP_PROP:
2101      case OP_NOTPROP:      case OP_NOTPROP:
2102      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2103          {
2104          SCHECK_PARTIAL();
2105          MRRETURN(MATCH_NOMATCH);
2106          }
2107      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2108        {        {
2109        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2110    
2111        switch(ecode[1])        switch(ecode[1])
2112          {          {
2113          case PT_ANY:          case PT_ANY:
2114          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2115          break;          break;
2116    
2117          case PT_LAMP:          case PT_LAMP:
2118          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2119               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2120               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2121            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2122           break;          break;
2123    
2124          case PT_GC:          case PT_GC:
2125          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2126            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2127          break;          break;
2128    
2129          case PT_PC:          case PT_PC:
2130          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2131            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2132          break;          break;
2133    
2134          case PT_SC:          case PT_SC:
2135          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2136            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2137            break;
2138    
2139            /* These are specials */
2140    
2141            case PT_ALNUM:
2142            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2143                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2144              MRRETURN(MATCH_NOMATCH);
2145            break;
2146    
2147            case PT_SPACE:    /* Perl space */
2148            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2149                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2150                   == (op == OP_NOTPROP))
2151              MRRETURN(MATCH_NOMATCH);
2152            break;
2153    
2154            case PT_PXSPACE:  /* POSIX space */
2155            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2156                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2157                 c == CHAR_FF || c == CHAR_CR)
2158                   == (op == OP_NOTPROP))
2159              MRRETURN(MATCH_NOMATCH);
2160            break;
2161    
2162            case PT_WORD:
2163            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2164                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2165                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2166              MRRETURN(MATCH_NOMATCH);
2167          break;          break;
2168    
2169            /* This should never occur */
2170    
2171          default:          default:
2172          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
2173          }          }
2174    
2175        ecode += 3;        ecode += 3;
# Line 1467  for (;;) Line 2180  for (;;)
2180      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2181    
2182      case OP_EXTUNI:      case OP_EXTUNI:
2183      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2184          {
2185          SCHECK_PARTIAL();
2186          MRRETURN(MATCH_NOMATCH);
2187          }
2188      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2189        {        {
2190        int chartype, script;        int category = UCD_CATEGORY(c);
2191        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2192        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2193          {          {
2194          int len = 1;          int len = 1;
# Line 1480  for (;;) Line 2196  for (;;)
2196            {            {
2197            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2198            }            }
2199          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2200          if (category != ucp_M) break;          if (category != ucp_M) break;
2201          eptr += len;          eptr += len;
2202          }          }
# Line 1501  for (;;) Line 2217  for (;;)
2217      case OP_REF:      case OP_REF:
2218        {        {
2219        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2220        ecode += 3;                                 /* Advance past item */        ecode += 3;
2221    
2222          /* If the reference is unset, there are two possibilities:
2223    
2224          (a) In the default, Perl-compatible state, set the length to be longer
2225          than the amount of subject left; this ensures that every attempt at a
2226          match fails. We can't just fail here, because of the possibility of
2227          quantifiers with zero minima.
2228    
2229        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
2230        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
2231        can't just fail here, because of the possibility of quantifiers with zero  
2232        minima. */        Otherwise, set the length to the length of what was matched by the
2233          referenced subpattern. */
2234        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
2235          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
2236          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2237          else
2238            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2239    
2240        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2241    
# Line 1539  for (;;) Line 2264  for (;;)
2264          break;          break;
2265    
2266          default:               /* No repeat follows */          default:               /* No repeat follows */
2267          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2268              {
2269              CHECK_PARTIAL();
2270              MRRETURN(MATCH_NOMATCH);
2271              }
2272          eptr += length;          eptr += length;
2273          continue;              /* With the main loop */          continue;              /* With the main loop */
2274          }          }
# Line 1555  for (;;) Line 2284  for (;;)
2284    
2285        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2286          {          {
2287          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2288              {
2289              CHECK_PARTIAL();
2290              MRRETURN(MATCH_NOMATCH);
2291              }
2292          eptr += length;          eptr += length;
2293          }          }
2294    
# Line 1570  for (;;) Line 2303  for (;;)
2303          {          {
2304          for (fi = min;; fi++)          for (fi = min;; fi++)
2305            {            {
2306            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2307            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2308            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2309              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2310                {
2311                CHECK_PARTIAL();
2312                MRRETURN(MATCH_NOMATCH);
2313                }
2314            eptr += length;            eptr += length;
2315            }            }
2316          /* Control never gets here */          /* Control never gets here */
# Line 1586  for (;;) Line 2323  for (;;)
2323          pp = eptr;          pp = eptr;
2324          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2325            {            {
2326            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2327                {
2328                CHECK_PARTIAL();
2329                break;
2330                }
2331            eptr += length;            eptr += length;
2332            }            }
2333          while (eptr >= pp)          while (eptr >= pp)
2334            {            {
2335            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2336            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2337            eptr -= length;            eptr -= length;
2338            }            }
2339          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2340          }          }
2341        }        }
2342      /* Control never gets here */      /* Control never gets here */
2343    
   
   
2344      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2345      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2346      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1656  for (;;) Line 2395  for (;;)
2395          {          {
2396          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2397            {            {
2398            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2399                {
2400                SCHECK_PARTIAL();
2401                MRRETURN(MATCH_NOMATCH);
2402                }
2403            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2404            if (c > 255)            if (c > 255)
2405              {              {
2406              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2407              }              }
2408            else            else
2409              {              {
2410              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2411              }              }
2412            }            }
2413          }          }
# Line 1674  for (;;) Line 2417  for (;;)
2417          {          {
2418          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2419            {            {
2420            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2421                {
2422                SCHECK_PARTIAL();
2423                MRRETURN(MATCH_NOMATCH);
2424                }
2425            c = *eptr++;            c = *eptr++;
2426            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2427            }            }
2428          }          }
2429    
# Line 1696  for (;;) Line 2443  for (;;)
2443            {            {
2444            for (fi = min;; fi++)            for (fi = min;; fi++)
2445              {              {
2446              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2447              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2448              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2449                if (eptr >= md->end_subject)
2450                  {
2451                  SCHECK_PARTIAL();
2452                  MRRETURN(MATCH_NOMATCH);
2453                  }
2454              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2455              if (c > 255)              if (c > 255)
2456                {                {
2457                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2458                }                }
2459              else              else
2460                {                {
2461                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2462                }                }
2463              }              }
2464            }            }
# Line 1716  for (;;) Line 2468  for (;;)
2468            {            {
2469            for (fi = min;; fi++)            for (fi = min;; fi++)
2470              {              {
2471              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2472              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2473              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2474                if (eptr >= md->end_subject)
2475                  {
2476                  SCHECK_PARTIAL();
2477                  MRRETURN(MATCH_NOMATCH);
2478                  }
2479              c = *eptr++;              c = *eptr++;
2480              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2481              }              }
2482            }            }
2483          /* Control never gets here */          /* Control never gets here */
# Line 1739  for (;;) Line 2496  for (;;)
2496            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2497              {              {
2498              int len = 1;              int len = 1;
2499              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2500                  {
2501                  SCHECK_PARTIAL();
2502                  break;
2503                  }
2504              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2505              if (c > 255)              if (c > 255)
2506                {                {
# Line 1753  for (;;) Line 2514  for (;;)
2514              }              }
2515            for (;;)            for (;;)
2516              {              {
2517              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2518              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2519              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2520              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1765  for (;;) Line 2526  for (;;)
2526            {            {
2527            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2528              {              {
2529              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2530                  {
2531                  SCHECK_PARTIAL();
2532                  break;
2533                  }
2534              c = *eptr;              c = *eptr;
2535              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2536              eptr++;              eptr++;
2537              }              }
2538            while (eptr >= pp)            while (eptr >= pp)
2539              {              {
2540              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2541              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2542              eptr--;              eptr--;
2543              }              }
2544            }            }
2545    
2546          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2547          }          }
2548        }        }
2549      /* Control never gets here */      /* Control never gets here */
2550    
2551    
2552      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2553      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2554        mode, because Unicode properties are supported in non-UTF-8 mode. */
2555    
2556  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2557      case OP_XCLASS:      case OP_XCLASS:
# Line 1826  for (;;) Line 2592  for (;;)
2592    
2593        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2594          {          {
2595          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2596          GETCHARINC(c, eptr);            {
2597          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2598              MRRETURN(MATCH_NOMATCH);
2599              }
2600            GETCHARINCTEST(c, eptr);
2601            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2602          }          }
2603    
2604        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1843  for (;;) Line 2613  for (;;)
2613          {          {
2614          for (fi = min;; fi++)          for (fi = min;; fi++)
2615            {            {
2616            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2617            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2618            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2619            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2620            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2621                SCHECK_PARTIAL();
2622                MRRETURN(MATCH_NOMATCH);
2623                }
2624              GETCHARINCTEST(c, eptr);
2625              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2626            }            }
2627          /* Control never gets here */          /* Control never gets here */
2628          }          }
# Line 1860  for (;;) Line 2635  for (;;)
2635          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2636            {            {
2637            int len = 1;            int len = 1;
2638            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2639            GETCHARLEN(c, eptr, len);              {
2640                SCHECK_PARTIAL();
2641                break;
2642                }
2643              GETCHARLENTEST(c, eptr, len);
2644            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2645            eptr += len;            eptr += len;
2646            }            }
2647          for(;;)          for(;;)
2648            {            {
2649            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2650            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2651            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2652            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2653            }            }
2654          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2655          }          }
2656    
2657        /* Control never gets here */        /* Control never gets here */
# Line 1888  for (;;) Line 2667  for (;;)
2667        length = 1;        length = 1;
2668        ecode++;        ecode++;
2669        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2670        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2671        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2672            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2673            MRRETURN(MATCH_NOMATCH);
2674            }
2675          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2676        }        }
2677      else      else
2678  #endif  #endif
2679    
2680      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2681        {        {
2682        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2683        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2684            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2685            MRRETURN(MATCH_NOMATCH);
2686            }
2687          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2688        ecode += 2;        ecode += 2;
2689        }        }
2690      break;      break;
# Line 1912  for (;;) Line 2699  for (;;)
2699        ecode++;        ecode++;
2700        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2701    
2702        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2703            {
2704            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2705            MRRETURN(MATCH_NOMATCH);
2706            }
2707    
2708        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2709        can use the fast lookup table. */        can use the fast lookup table. */
2710    
2711        if (fc < 128)        if (fc < 128)
2712          {          {
2713          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2714          }          }
2715    
2716        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2717    
2718        else        else
2719          {          {
2720          int dc;          unsigned int dc;
2721          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2722          ecode += length;          ecode += length;
2723    
# Line 1936  for (;;) Line 2727  for (;;)
2727          if (fc != dc)          if (fc != dc)
2728            {            {
2729  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2730            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2731  #endif  #endif
2732              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2733            }            }
2734          }          }
2735        }        }
# Line 1947  for (;;) Line 2738  for (;;)
2738    
2739      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2740        {        {
2741        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2742        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2743            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2744            MRRETURN(MATCH_NOMATCH);
2745            }
2746          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2747        ecode += 2;        ecode += 2;
2748        }        }
2749      break;      break;
2750    
2751      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2752    
2753      case OP_EXACT:      case OP_EXACT:
2754      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2755      ecode += 3;      ecode += 3;
2756      goto REPEATCHAR;      goto REPEATCHAR;
2757    
2758        case OP_POSUPTO:
2759        possessive = TRUE;
2760        /* Fall through */
2761    
2762      case OP_UPTO:      case OP_UPTO:
2763      case OP_MINUPTO:      case OP_MINUPTO:
2764      min = 0;      min = 0;
# Line 1968  for (;;) Line 2767  for (;;)
2767      ecode += 3;      ecode += 3;
2768      goto REPEATCHAR;      goto REPEATCHAR;
2769    
2770        case OP_POSSTAR:
2771        possessive = TRUE;
2772        min = 0;
2773        max = INT_MAX;
2774        ecode++;
2775        goto REPEATCHAR;
2776    
2777        case OP_POSPLUS:
2778        possessive = TRUE;
2779        min = 1;
2780        max = INT_MAX;
2781        ecode++;
2782        goto REPEATCHAR;
2783    
2784        case OP_POSQUERY:
2785        possessive = TRUE;
2786        min = 0;
2787        max = 1;
2788        ecode++;
2789        goto REPEATCHAR;
2790    
2791      case OP_STAR:      case OP_STAR:
2792      case OP_MINSTAR:      case OP_MINSTAR:
2793      case OP_PLUS:      case OP_PLUS:
# Line 1976  for (;;) Line 2796  for (;;)
2796      case OP_MINQUERY:      case OP_MINQUERY:
2797      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2798      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2799    
2800      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2801      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2802      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2803    
2804      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2805    
2806      REPEATCHAR:      REPEATCHAR:
2807  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1991  for (;;) Line 2810  for (;;)
2810        length = 1;        length = 1;
2811        charptr = ecode;        charptr = ecode;
2812        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2813        ecode += length;        ecode += length;
2814    
2815        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1999  for (;;) Line 2817  for (;;)
2817    
2818        if (length > 1)        if (length > 1)
2819          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2820  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2821          int othercase;          unsigned int othercase;
2822          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2823              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase >= 0)  
2824            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2825            else oclength = 0;
2826  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2827    
2828          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2829            {            {
2830            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2831            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2832            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2833              else if (oclength > 0 &&
2834                       eptr <= md->end_subject - oclength &&
2835                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2836    #endif  /* SUPPORT_UCP */
2837            else            else
2838              {              {
2839              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2840              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2841              }              }
2842            }            }
2843    
# Line 2028  for (;;) Line 2847  for (;;)
2847            {            {
2848            for (fi = min;; fi++)            for (fi = min;; fi++)
2849              {              {
2850              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2851              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2852              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2853              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2854              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2855              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2856                else if (oclength > 0 &&
2857                         eptr <= md->end_subject - oclength &&
2858                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2859    #endif  /* SUPPORT_UCP */
2860              else              else
2861                {                {
2862                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2863                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2864                }                }
2865              }              }
2866            /* Control never gets here */            /* Control never gets here */
2867            }            }
2868          else  
2869            else  /* Maximize */
2870            {            {
2871            pp = eptr;            pp = eptr;
2872            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2873              {              {
2874              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2875              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2876              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2877                else if (oclength > 0 &&
2878                         eptr <= md->end_subject - oclength &&
2879                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2880    #endif  /* SUPPORT_UCP */
2881              else              else
2882                {                {
2883                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2884                eptr += oclength;                break;
2885                }                }
2886              }              }
2887            while (eptr >= pp)  
2888             {            if (possessive) continue;
2889             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2890             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2891             eptr -= length;              {
2892             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2893            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2894                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2895    #ifdef SUPPORT_UCP
2896                eptr--;
2897                BACKCHAR(eptr);
2898    #else   /* without SUPPORT_UCP */
2899                eptr -= length;
2900    #endif  /* SUPPORT_UCP */
2901                }
2902            }            }
2903          /* Control never gets here */          /* Control never gets here */
2904          }          }
# Line 2075  for (;;) Line 2911  for (;;)
2911  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2912    
2913      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2914        {  
2915        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2916    
2917      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2918      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2096  for (;;) Line 2930  for (;;)
2930        {        {
2931        fc = md->lcc[fc];        fc = md->lcc[fc];
2932        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2933          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2934            if (eptr >= md->end_subject)
2935              {
2936              SCHECK_PARTIAL();
2937              MRRETURN(MATCH_NOMATCH);
2938              }
2939            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2940            }
2941        if (min == max) continue;        if (min == max) continue;
2942        if (minimize)        if (minimize)
2943          {          {
2944          for (fi = min;; fi++)          for (fi = min;; fi++)
2945            {            {
2946            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2947            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2948            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2949                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2950              RRETURN(MATCH_NOMATCH);              {
2951                SCHECK_PARTIAL();
2952                MRRETURN(MATCH_NOMATCH);
2953                }
2954              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2955            }            }
2956          /* Control never gets here */          /* Control never gets here */
2957          }          }
2958        else        else  /* Maximize */
2959          {          {
2960          pp = eptr;          pp = eptr;
2961          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2962            {            {
2963            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2964                {
2965                SCHECK_PARTIAL();
2966                break;
2967                }
2968              if (fc != md->lcc[*eptr]) break;
2969            eptr++;            eptr++;
2970            }            }
2971    
2972            if (possessive) continue;
2973    
2974          while (eptr >= pp)          while (eptr >= pp)
2975            {            {
2976            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2977            eptr--;            eptr--;
2978            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2979            }            }
2980          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2981          }          }
2982        /* Control never gets here */        /* Control never gets here */
2983        }        }
# Line 2133  for (;;) Line 2986  for (;;)
2986    
2987      else      else
2988        {        {
2989        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2990            {
2991            if (eptr >= md->end_subject)
2992              {
2993              SCHECK_PARTIAL();
2994              MRRETURN(MATCH_NOMATCH);
2995              }
2996            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2997            }
2998    
2999        if (min == max) continue;        if (min == max) continue;
3000    
3001        if (minimize)        if (minimize)
3002          {          {
3003          for (fi = min;; fi++)          for (fi = min;; fi++)
3004            {            {
3005            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3006            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3007            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3008              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3009                {
3010                SCHECK_PARTIAL();
3011                MRRETURN(MATCH_NOMATCH);
3012                }
3013              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3014            }            }
3015          /* Control never gets here */          /* Control never gets here */
3016          }          }
3017        else        else  /* Maximize */
3018          {          {
3019          pp = eptr;          pp = eptr;
3020          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3021            {            {
3022            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3023                {
3024                SCHECK_PARTIAL();
3025                break;
3026                }
3027              if (fc != *eptr) break;
3028            eptr++;            eptr++;
3029            }            }
3030            if (possessive) continue;
3031    
3032          while (eptr >= pp)          while (eptr >= pp)
3033            {            {
3034            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3035            eptr--;            eptr--;
3036            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3037            }            }
3038          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3039          }          }
3040        }        }
3041      /* Control never gets here */      /* Control never gets here */
# Line 2169  for (;;) Line 3044  for (;;)
3044      checking can be multibyte. */      checking can be multibyte. */
3045    
3046      case OP_NOT:      case OP_NOT:
3047      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3048          {
3049          SCHECK_PARTIAL();
3050          MRRETURN(MATCH_NOMATCH);
3051          }
3052      ecode++;      ecode++;
3053      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3054      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2178  for (;;) Line 3057  for (;;)
3057        if (c < 256)        if (c < 256)
3058  #endif  #endif
3059        c = md->lcc[c];        c = md->lcc[c];
3060        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3061        }        }
3062      else      else
3063        {        {
3064        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3065        }        }
3066      break;      break;
3067    
# Line 2206  for (;;) Line 3085  for (;;)
3085      ecode += 3;      ecode += 3;
3086      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3087    
3088        case OP_NOTPOSSTAR:
3089        possessive = TRUE;
3090        min = 0;
3091        max = INT_MAX;
3092        ecode++;
3093        goto REPEATNOTCHAR;
3094    
3095        case OP_NOTPOSPLUS:
3096        possessive = TRUE;
3097        min = 1;
3098        max = INT_MAX;
3099        ecode++;
3100        goto REPEATNOTCHAR;
3101    
3102        case OP_NOTPOSQUERY:
3103        possessive = TRUE;
3104        min = 0;
3105        max = 1;
3106        ecode++;
3107        goto REPEATNOTCHAR;
3108    
3109        case OP_NOTPOSUPTO:
3110        possessive = TRUE;
3111        min = 0;
3112        max = GET2(ecode, 1);
3113        ecode += 3;
3114        goto REPEATNOTCHAR;
3115    
3116      case OP_NOTSTAR:      case OP_NOTSTAR:
3117      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3118      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2218  for (;;) Line 3125  for (;;)
3125      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3126      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3127    
3128      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3129    
3130      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3131      fc = *ecode++;      fc = *ecode++;
3132    
3133      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2245  for (;;) Line 3149  for (;;)
3149        /* UTF-8 mode */        /* UTF-8 mode */
3150        if (utf8)        if (utf8)
3151          {          {
3152          register int d;          register unsigned int d;
3153          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3154            {            {
3155              if (eptr >= md->end_subject)
3156                {
3157                SCHECK_PARTIAL();
3158                MRRETURN(MATCH_NOMATCH);
3159                }
3160            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3161            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3162            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3163            }            }
3164          }          }
3165        else        else
# Line 2259  for (;;) Line 3168  for (;;)
3168        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3169          {          {
3170          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3171            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3172              if (eptr >= md->end_subject)
3173                {
3174                SCHECK_PARTIAL();
3175                MRRETURN(MATCH_NOMATCH);
3176                }
3177              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3178              }
3179          }          }
3180    
3181        if (min == max) continue;        if (min == max) continue;
# Line 2270  for (;;) Line 3186  for (;;)
3186          /* UTF-8 mode */          /* UTF-8 mode */
3187          if (utf8)          if (utf8)
3188            {            {
3189            register int d;            register unsigned int d;
3190            for (fi = min;; fi++)            for (fi = min;; fi++)
3191              {              {
3192              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3193              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3194                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3195                if (eptr >= md->end_subject)
3196                  {
3197                  SCHECK_PARTIAL();
3198                  MRRETURN(MATCH_NOMATCH);
3199                  }
3200              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3201              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3202              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3203              }              }
3204            }            }
3205          else          else
# Line 2287  for (;;) Line 3208  for (;;)
3208            {            {
3209            for (fi = min;; fi++)            for (fi = min;; fi++)
3210              {              {
3211              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3212              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3213              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3214                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3215                  {
3216                  SCHECK_PARTIAL();
3217                  MRRETURN(MATCH_NOMATCH);
3218                  }
3219                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3220              }              }
3221            }            }
3222          /* Control never gets here */          /* Control never gets here */
# Line 2306  for (;;) Line 3232  for (;;)
3232          /* UTF-8 mode */          /* UTF-8 mode */
3233          if (utf8)          if (utf8)
3234            {            {
3235            register int d;            register unsigned int d;
3236            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3237              {              {
3238              int len = 1;              int len = 1;
3239              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3240                  {
3241                  SCHECK_PARTIAL();
3242                  break;
3243                  }
3244              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3245              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3246              if (fc == d) break;              if (fc == d) break;
3247              eptr += len;              eptr += len;
3248              }              }
3249            for(;;)          if (possessive) continue;
3250            for(;;)
3251              {              {
3252              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3253              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3254              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3255              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2330  for (;;) Line 3261  for (;;)
3261            {            {
3262            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3263              {              {
3264              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3265                  {
3266                  SCHECK_PARTIAL();
3267                  break;
3268                  }
3269                if (fc == md->lcc[*eptr]) break;
3270              eptr++;              eptr++;
3271              }              }
3272              if (possessive) continue;
3273            while (eptr >= pp)            while (eptr >= pp)
3274              {              {
3275              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3276              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3277              eptr--;              eptr--;
3278              }              }
3279            }            }
3280    
3281          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3282          }          }
3283        /* Control never gets here */        /* Control never gets here */
3284        }        }
# Line 2354  for (;;) Line 3291  for (;;)
3291        /* UTF-8 mode */        /* UTF-8 mode */
3292        if (utf8)        if (utf8)
3293          {          {
3294          register int d;          register unsigned int d;
3295          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3296            {            {
3297              if (eptr >= md->end_subject)
3298                {
3299                SCHECK_PARTIAL();
3300                MRRETURN(MATCH_NOMATCH);
3301                }
3302            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3303            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3304            }            }
3305          }          }
3306        else        else
# Line 2366  for (;;) Line 3308  for (;;)
3308        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3309          {          {
3310          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3311            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3312              if (eptr >= md->end_subject)
3313                {
3314                SCHECK_PARTIAL();
3315                MRRETURN(MATCH_NOMATCH);
3316                }
3317              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3318              }
3319          }          }
3320    
3321        if (min == max) continue;        if (min == max) continue;
# Line 2377  for (;;) Line 3326  for (;;)
3326          /* UTF-8 mode */          /* UTF-8 mode */
3327          if (utf8)          if (utf8)
3328            {            {
3329            register int d;            register unsigned int d;
3330            for (fi = min;; fi++)            for (fi = min;; fi++)
3331              {              {
3332              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3333              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3334                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3335                if (eptr >= md->end_subject)
3336                  {
3337                  SCHECK_PARTIAL();
3338                  MRRETURN(MATCH_NOMATCH);
3339                  }
3340              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3341              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3342              }              }
3343            }            }
3344          else          else
# Line 2393  for (;;) Line 3347  for (;;)
3347            {            {
3348            for (fi = min;; fi++)            for (fi = min;; fi++)
3349              {              {
3350              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3351              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3352              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3353                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3354                  {
3355                  SCHECK_PARTIAL();
3356                  MRRETURN(MATCH_NOMATCH);
3357                  }
3358                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3359              }              }
3360            }            }
3361          /* Control never gets here */          /* Control never gets here */
# Line 2412  for (;;) Line 3371  for (;;)
3371          /* UTF-8 mode */          /* UTF-8 mode */
3372          if (utf8)          if (utf8)
3373            {            {
3374            register int d;            register unsigned int d;
3375            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3376              {              {
3377              int len = 1;              int len = 1;
3378              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3379                  {
3380                  SCHECK_PARTIAL();
3381                  break;
3382                  }
3383              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3384              if (fc == d) break;              if (fc == d) break;
3385              eptr += len;              eptr += len;
3386              }              }
3387              if (possessive) continue;
3388            for(;;)            for(;;)
3389              {              {
3390              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3391              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3392              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3393              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2435  for (;;) Line 3399  for (;;)
3399            {            {
3400            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3401              {              {
3402              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3403                  {
3404                  SCHECK_PARTIAL();
3405                  break;
3406                  }
3407                if (fc == *eptr) break;
3408              eptr++;              eptr++;
3409              }              }
3410              if (possessive) continue;
3411            while (eptr >= pp)            while (eptr >= pp)
3412              {              {
3413              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3414              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3415              eptr--;              eptr--;
3416              }              }
3417            }            }
3418    
3419          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3420          }          }
3421        }        }
3422      /* Control never gets here */      /* Control never gets here */
# Line 2469  for (;;) Line 3439  for (;;)
3439      ecode += 3;      ecode += 3;
3440      goto REPEATTYPE;      goto REPEATTYPE;
3441    
3442        case OP_TYPEPOSSTAR:
3443        possessive = TRUE;
3444        min = 0;
3445        max = INT_MAX;
3446        ecode++;
3447        goto REPEATTYPE;
3448    
3449        case OP_TYPEPOSPLUS:
3450        possessive = TRUE;
3451        min = 1;
3452        max = INT_MAX;
3453        ecode++;
3454        goto REPEATTYPE;
3455    
3456        case OP_TYPEPOSQUERY:
3457        possessive = TRUE;
3458        min = 0;
3459        max = 1;
3460        ecode++;
3461        goto REPEATTYPE;
3462    
3463        case OP_TYPEPOSUPTO:
3464        possessive = TRUE;
3465        min = 0;
3466        max = GET2(ecode, 1);
3467        ecode += 3;
3468        goto REPEATTYPE;
3469    
3470      case OP_TYPESTAR:      case OP_TYPESTAR:
3471      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3472      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2500  for (;;) Line 3498  for (;;)
3498    
3499      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3500      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3501      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3502      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3503      and single-bytes. */      and single-bytes. */
3504    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3505      if (min > 0)      if (min > 0)
3506        {        {
3507  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2515  for (;;) Line 3510  for (;;)
3510          switch(prop_type)          switch(prop_type)
3511            {            {
3512            case PT_ANY:            case PT_ANY:
3513            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3514            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3515              {              {
3516              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3517              GETCHARINC(c, eptr);                {
3518                  SCHECK_PARTIAL();
3519                  MRRETURN(MATCH_NOMATCH);
3520                  }
3521                GETCHARINCTEST(c, eptr);
3522              }              }
3523            break;            break;
3524    
3525            case PT_LAMP:            case PT_LAMP:
3526            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3527              {              {
3528              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3529              GETCHARINC(c, eptr);                {
3530              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3531                  MRRETURN(MATCH_NOMATCH);
3532                  }
3533                GETCHARINCTEST(c, eptr);
3534                prop_chartype = UCD_CHARTYPE(c);
3535              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3536                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3537                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3538                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3539              }              }
3540            break;            break;
3541    
3542            case PT_GC:            case PT_GC:
3543            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3544              {              {
3545              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3546              GETCHARINC(c, eptr);                {
3547              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3548                  MRRETURN(MATCH_NOMATCH);
3549                  }
3550                GETCHARINCTEST(c, eptr);
3551                prop_category = UCD_CATEGORY(c);
3552              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3553                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3554              }              }
3555            break;            break;
3556    
3557            case PT_PC:            case PT_PC:
3558            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3559              {              {
3560              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3561              GETCHARINC(c, eptr);                {
3562              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3563                  MRRETURN(MATCH_NOMATCH);
3564                  }
3565                GETCHARINCTEST(c, eptr);
3566                prop_chartype = UCD_CHARTYPE(c);
3567              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3568                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3569              }              }
3570            break;            break;
3571    
3572            case PT_SC:            case PT_SC:
3573            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3574              {              {
3575              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3576              GETCHARINC(c, eptr);                {
3577              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3578                  MRRETURN(MATCH_NOMATCH);
3579                  }
3580                GETCHARINCTEST(c, eptr);
3581                prop_script = UCD_SCRIPT(c);
3582              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3583                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3584                }
3585              break;
3586    
3587              case PT_ALNUM:
3588              for (i = 1; i <= min; i++)
3589                {
3590                if (eptr >= md->end_subject)
3591                  {
3592                  SCHECK_PARTIAL();
3593                  MRRETURN(MATCH_NOMATCH);
3594                  }
3595                GETCHARINCTEST(c, eptr);
3596                prop_category = UCD_CATEGORY(c);
3597                if ((prop_category == ucp_L || prop_category == ucp_N)
3598                       == prop_fail_result)
3599                  MRRETURN(MATCH_NOMATCH);
3600                }
3601              break;
3602    
3603              case PT_SPACE:    /* Perl space */
3604              for (i = 1; i <= min; i++)
3605                {
3606                if (eptr >= md->end_subject)
3607                  {
3608                  SCHECK_PARTIAL();
3609                  MRRETURN(MATCH_NOMATCH);
3610                  }
3611                GETCHARINCTEST(c, eptr);
3612                prop_category = UCD_CATEGORY(c);
3613                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3614                     c == CHAR_FF || c == CHAR_CR)
3615                       == prop_fail_result)
3616                  MRRETURN(MATCH_NOMATCH);
3617                }
3618              break;
3619    
3620              case PT_PXSPACE:  /* POSIX space */
3621              for (i = 1; i <= min; i++)
3622                {
3623                if (eptr >= md->end_subject)
3624                  {
3625                  SCHECK_PARTIAL();
3626                  MRRETURN(MATCH_NOMATCH);
3627                  }
3628                GETCHARINCTEST(c, eptr);
3629                prop_category = UCD_CATEGORY(c);
3630                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3631                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3632                       == prop_fail_result)
3633                  MRRETURN(MATCH_NOMATCH);
3634                }
3635              break;
3636    
3637              case PT_WORD:
3638              for (i = 1; i <= min; i++)
3639                {
3640                if (eptr >= md->end_subject)
3641                  {
3642                  SCHECK_PARTIAL();
3643                  MRRETURN(MATCH_NOMATCH);
3644                  }
3645                GETCHARINCTEST(c, eptr);
3646                prop_category = UCD_CATEGORY(c);
3647                if ((prop_category == ucp_L || prop_category == ucp_N ||
3648                     c == CHAR_UNDERSCORE)
3649                       == prop_fail_result)
3650                  MRRETURN(MATCH_NOMATCH);
3651              }              }
3652            break;            break;
3653    
3654              /* This should not occur */
3655    
3656            default:            default:
3657            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3658            }            }
3659          }          }
3660    
# Line 2582  for (;;) Line 3665  for (;;)
3665          {          {
3666          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3667            {            {
3668              if (eptr >= md->end_subject)
3669                {
3670                SCHECK_PARTIAL();
3671                MRRETURN(MATCH_NOMATCH);
3672                }
3673            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3674            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3675            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3676            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3677              {              {
3678              int len = 1;              int len = 1;
3679              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3680                {                else { GETCHARLEN(c, eptr, len); }
3681                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3682              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3683              eptr += len;              eptr += len;
3684              }              }
# Line 2610  for (;;) Line 3696  for (;;)
3696          case OP_ANY:          case OP_ANY:
3697          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3698            {            {
3699            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3700                 ((ims & PCRE_DOTALL) == 0 &&              {
3701                   eptr <= md->end_subject - md->nllen &&              SCHECK_PARTIAL();
3702                   IS_NEWLINE(eptr)))              MRRETURN(MATCH_NOMATCH);
3703              RRETURN(MATCH_NOMATCH);              }
3704              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3705              eptr++;
3706              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3707              }
3708            break;
3709    
3710            case OP_ALLANY:
3711            for (i = 1; i <= min; i++)
3712              {
3713              if (eptr >= md->end_subject)
3714                {
3715                SCHECK_PARTIAL();
3716                MRRETURN(MATCH_NOMATCH);
3717                }
3718            eptr++;            eptr++;
3719            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3720            }            }
3721          break;          break;
3722    
3723          case OP_ANYBYTE:          case OP_ANYBYTE:
3724            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3725          eptr += min;          eptr += min;
3726          break;          break;
3727    
3728            case OP_ANYNL:
3729            for (i = 1; i <= min; i++)
3730              {
3731              if (eptr >= md->end_subject)
3732                {
3733                SCHECK_PARTIAL();
3734                MRRETURN(MATCH_NOMATCH);
3735                }
3736              GETCHARINC(c, eptr);
3737              switch(c)
3738                {
3739                default: MRRETURN(MATCH_NOMATCH);
3740                case 0x000d:
3741                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3742                break;
3743    
3744                case 0x000a:
3745                break;
3746    
3747                case 0x000b:
3748                case 0x000c:
3749                case 0x0085:
3750                case 0x2028:
3751                case 0x2029:
3752                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3753                break;
3754                }
3755              }
3756            break;
3757    
3758            case OP_NOT_HSPACE:
3759            for (i = 1; i <= min; i++)
3760              {
3761              if (eptr >= md->end_subject)
3762                {
3763                SCHECK_PARTIAL();
3764                MRRETURN(MATCH_NOMATCH);
3765                }
3766              GETCHARINC(c, eptr);
3767              switch(c)
3768                {
3769                default: break;
3770                case 0x09:      /* HT */
3771                case 0x20:      /* SPACE */
3772                case 0xa0:      /* NBSP */
3773                case 0x1680:    /* OGHAM SPACE MARK */
3774                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3775                case 0x2000:    /* EN QUAD */
3776                case 0x2001:    /* EM QUAD */
3777                case 0x2002:    /* EN SPACE */
3778                case 0x2003:    /* EM SPACE */
3779                case 0x2004:    /* THREE-PER-EM SPACE */
3780                case 0x2005:    /* FOUR-PER-EM SPACE */
3781                case 0x2006:    /* SIX-PER-EM SPACE */
3782                case 0x2007:    /* FIGURE SPACE */
3783                case 0x2008:    /* PUNCTUATION SPACE */
3784                case 0x2009:    /* THIN SPACE */
3785                case 0x200A:    /* HAIR SPACE */
3786                case 0x202f:    /* NARROW NO-BREAK SPACE */
3787                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3788                case 0x3000:    /* IDEOGRAPHIC SPACE */
3789                MRRETURN(MATCH_NOMATCH);
3790                }
3791              }
3792            break;
3793    
3794            case OP_HSPACE:
3795            for (i = 1; i <= min; i++)
3796              {
3797              if (eptr >= md->end_subject)
3798                {
3799                SCHECK_PARTIAL();
3800                MRRETURN(MATCH_NOMATCH);
3801                }
3802              GETCHARINC(c, eptr);
3803              switch(c)
3804                {
3805                default: MRRETURN(MATCH_NOMATCH);
3806                case 0x09:      /* HT */
3807                case 0x20:      /* SPACE */
3808                case 0xa0:      /* NBSP */
3809                case 0x1680:    /* OGHAM SPACE MARK */
3810                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3811                case 0x2000:    /* EN QUAD */
3812                case 0x2001:    /* EM QUAD */
3813                case 0x2002:    /* EN SPACE */
3814                case 0x2003:    /* EM SPACE */
3815                case 0x2004:    /* THREE-PER-EM SPACE */
3816                case 0x2005:    /* FOUR-PER-EM SPACE */
3817                case 0x2006:    /* SIX-PER-EM SPACE */
3818                case 0x2007:    /* FIGURE SPACE */
3819                case 0x2008:    /* PUNCTUATION SPACE */
3820                case 0x2009:    /* THIN SPACE */
3821                case 0x200A:    /* HAIR SPACE */
3822                case 0x202f:    /* NARROW NO-BREAK SPACE */
3823                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3824                case 0x3000:    /* IDEOGRAPHIC SPACE */
3825                break;
3826                }
3827              }
3828            break;
3829    
3830            case OP_NOT_VSPACE:
3831            for (i = 1; i <= min; i++)
3832              {
3833              if (eptr >= md->end_subject)
3834                {
3835                SCHECK_PARTIAL();
3836                MRRETURN(MATCH_NOMATCH);
3837                }
3838              GETCHARINC(c, eptr);
3839              switch(c)
3840                {
3841                default: break;
3842                case 0x0a:      /* LF */
3843                case 0x0b:      /* VT */
3844                case 0x0c:      /* FF */
3845                case 0x0d:      /* CR */
3846                case 0x85:      /* NEL */
3847                case 0x2028:    /* LINE SEPARATOR */
3848                case 0x2029:    /* PARAGRAPH SEPARATOR */
3849                MRRETURN(MATCH_NOMATCH);
3850                }
3851              }
3852            break;
3853    
3854            case OP_VSPACE:
3855            for (i = 1; i <= min; i++)
3856              {
3857