/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 551 by ph10, Sun Oct 10 17:33:07 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 119  Returns:     nothing
119  static void  static void
120  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121  {  {
122  int c;  unsigned int c;
123  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124  while (length-- > 0)  while (length-- > 0)
125    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 133  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 150  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 186  calls by keeping local variables that ne Line 230  calls by keeping local variables that ne
230  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
263    actually used in this definition. */
264    
265  #ifndef NO_RECURSE  #ifndef NO_RECURSE
266  #define REGISTER register  #define REGISTER register
267  #ifdef DEBUG  
268  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
269    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270    { \    { \
271    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
272    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
274    }    }
275  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 278  versions and production versions. */
278    return ra; \    return ra; \
279    }    }
280  #else  #else
281  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
284  #endif  #endif
285    
286  #else  #else
287    
288    
289  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
290  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291  match(), which never changes. */  argument of match(), which never changes. */
292    
293  #define REGISTER  #define REGISTER
294    
295  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298    if (setjmp(frame->Xwhere) == 0)\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299      {\    frame->Xwhere = rw; \
300      newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301      newframe->Xecode = rb;\    newframe->Xecode = rb;\
302      newframe->Xoffset_top = rc;\    newframe->Xmstart = mstart;\
303      newframe->Xims = re;\    newframe->Xmarkptr = markptr;\
304      newframe->Xeptrb = rf;\    newframe->Xoffset_top = rc;\
305      newframe->Xflags = rg;\    newframe->Xims = re;\
306      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xeptrb = rf;\
307      newframe->Xprevframe = frame;\    newframe->Xflags = rg;\
308      frame = newframe;\    newframe->Xrdepth = frame->Xrdepth + 1;\
309      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xprevframe = frame;\
310      goto HEAP_RECURSE;\    frame = newframe;\
311      }\    DPRINTF(("restarting from line %d\n", __LINE__));\
312    else\    goto HEAP_RECURSE;\
313      {\    L_##rw:\
314      DPRINTF(("longjumped back to line %d\n", __LINE__));\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
315    }    }
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      frame->Xresult = ra;\      rrc = ra;\
325      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
326      }\      }\
327    return ra;\    return ra;\
328    }    }
# Line 269  typedef struct heapframe { Line 335  typedef struct heapframe {
335    
336    /* Function arguments that may change */    /* Function arguments that may change */
337    
338    const uschar *Xeptr;    USPTR Xeptr;
339    const uschar *Xecode;    const uschar *Xecode;
340      USPTR Xmstart;
341      USPTR Xmarkptr;
342    int Xoffset_top;    int Xoffset_top;
343    long int Xims;    long int Xims;
344    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 279  typedef struct heapframe { Line 347  typedef struct heapframe {
347    
348    /* Function local variables */    /* Function local variables */
349    
350    const uschar *Xcallpat;    USPTR Xcallpat;
351    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
352    const uschar *Xdata;    USPTR Xcharptr;
353    const uschar *Xnext;  #endif
354    const uschar *Xpp;    USPTR Xdata;
355    const uschar *Xprev;    USPTR Xnext;
356    const uschar *Xsaved_eptr;    USPTR Xpp;
357      USPTR Xprev;
358      USPTR Xsaved_eptr;
359    
360    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
361    
362    BOOL Xcur_is_word;    BOOL Xcur_is_word;
363    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
364    BOOL Xprev_is_word;    BOOL Xprev_is_word;
365    
366    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 372  typedef struct heapframe {
372    int Xprop_category;    int Xprop_category;
373    int Xprop_chartype;    int Xprop_chartype;
374    int Xprop_script;    int Xprop_script;
375    int *Xprop_test_variable;    int Xoclength;
376      uschar Xocchars[8];
377  #endif  #endif
378    
379      int Xcodelink;
380    int Xctype;    int Xctype;
381    int Xfc;    unsigned int Xfc;
382    int Xfi;    int Xfi;
383    int Xlength;    int Xlength;
384    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 392  typedef struct heapframe {
392    
393    eptrblock Xnewptrb;    eptrblock Xnewptrb;
394    
395    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
396    
397    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
398    
399  } heapframe;  } heapframe;
400    
# Line 340  typedef struct heapframe { Line 410  typedef struct heapframe {
410  *         Match from current position            *  *         Match from current position            *
411  *************************************************/  *************************************************/
412    
413  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
414  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
415  same response.  same response. */
416    
417    /* These macros pack up tests that are used for partial matching, and which
418    appears several times in the code. We set the "hit end" flag if the pointer is
419    at the end of the subject and also past the start of the subject (i.e.
420    something has been matched). For hard partial matching, we then return
421    immediately. The second one is used when we already know we are past the end of
422    the subject. */
423    
424    #define CHECK_PARTIAL()\
425      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
426        {\
427        md->hitend = TRUE;\
428        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
429        }
430    
431    #define SCHECK_PARTIAL()\
432      if (md->partial != 0 && eptr > mstart)\
433        {\
434        md->hitend = TRUE;\
435        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
436        }
437    
438  Performance note: It might be tempting to extract commonly used fields from the  
439  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
440    the md structure (e.g. utf8, end_subject) into individual variables to improve
441  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
442  made performance worse.  made performance worse.
443    
444  Arguments:  Arguments:
445     eptr        pointer in subject     eptr        pointer to current character in subject
446     ecode       position in code     ecode       pointer to current position in compiled code
447       mstart      pointer to the current match start position (can be modified
448                     by encountering \K)
449       markptr     pointer to the most recent MARK name, or NULL
450     offset_top  current top pointer     offset_top  current top pointer
451     md          pointer to "static" info for the match     md          pointer to "static" info for the match
452     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 454  Arguments:
454                   brackets - for testing for empty matches                   brackets - for testing for empty matches
455     flags       can contain     flags       can contain
456                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
457                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
458                       group that can match an empty string
459     rdepth      the recursion depth     rdepth      the recursion depth
460    
461  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
462                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
463                   a negative MATCH_xxx value for PRUNE, SKIP, etc
464                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
465                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
466  */  */
467    
468  static int  static int
469  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
470    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
471    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
472  {  {
473  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
474  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
475  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
476    
477  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
478  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
479  register unsigned int  c;  /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
480  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
481    
482    BOOL minimize, possessive; /* Quantifier options */
483    int condcode;
484    
485  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
486  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
487  heap storage. Set up the top-level frame here; others are obtained from the  heap storage. Set up the top-level frame here; others are obtained from the
# Line 392  heap whenever RMATCH() does a "recursion Line 489  heap whenever RMATCH() does a "recursion
489    
490  #ifdef NO_RECURSE  #ifdef NO_RECURSE
491  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
494    
495  /* Copy in the original argument variables */  /* Copy in the original argument variables */
496    
497  frame->Xeptr = eptr;  frame->Xeptr = eptr;
498  frame->Xecode = ecode;  frame->Xecode = ecode;
499    frame->Xmstart = mstart;
500    frame->Xmarkptr = markptr;
501  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
502  frame->Xims = ims;  frame->Xims = ims;
503  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 512  HEAP_RECURSE:
512    
513  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
514  #define ecode              frame->Xecode  #define ecode              frame->Xecode
515    #define mstart             frame->Xmstart
516    #define markptr            frame->Xmarkptr
517  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
518  #define ims                frame->Xims  #define ims                frame->Xims
519  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 424  HEAP_RECURSE: Line 526  HEAP_RECURSE:
526  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
527  #endif  #endif
528  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
529    #define codelink           frame->Xcodelink
530  #define data               frame->Xdata  #define data               frame->Xdata
531  #define next               frame->Xnext  #define next               frame->Xnext
532  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 434  HEAP_RECURSE: Line 537  HEAP_RECURSE:
537    
538  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
539  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
540  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
541    
542  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 548  HEAP_RECURSE:
548  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
549  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
550  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
551  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
552    #define occhars            frame->Xocchars
553  #endif  #endif
554    
555  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 573  HEAP_RECURSE:
573  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
574  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
575    
576  #else  #else         /* NO_RECURSE not defined */
577  #define fi i  #define fi i
578  #define fc c  #define fc c
579    
# Line 489  recursion_info new_recursive;      /* wi Line 592  recursion_info new_recursive;      /* wi
592                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
593  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
594  BOOL condition;  BOOL condition;
 BOOL minimize;  
595  BOOL prev_is_word;  BOOL prev_is_word;
596    
597  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 603  int prop_fail_result;
603  int prop_category;  int prop_category;
604  int prop_chartype;  int prop_chartype;
605  int prop_script;  int prop_script;
606  int *prop_test_variable;  int oclength;
607    uschar occhars[8];
608  #endif  #endif
609    
610    int codelink;
611  int ctype;  int ctype;
612  int length;  int length;
613  int max;  int max;
# Line 516  int save_offset1, save_offset2, save_off Line 620  int save_offset1, save_offset2, save_off
620  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
621    
622  eptrblock newptrb;  eptrblock newptrb;
623  #endif  #endif     /* NO_RECURSE */
624    
625  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
626  variables. */  variables. */
# Line 524  variables. */ Line 628  variables. */
628  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
629  prop_value = 0;  prop_value = 0;
630  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
631  #endif  #endif
632    
633    
634  /* This label is used for tail recursion, which is used in a few cases even  /* This label is used for tail recursion, which is used in a few cases even
635  when NO_RECURSE is not defined, in order to reduce the amount of stack that is  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
636  used. Thanks to Ian Taylor for noticing this possibility and sending the  used. Thanks to Ian Taylor for noticing this possibility and sending the
# Line 537  TAIL_RECURSE: Line 641  TAIL_RECURSE:
641  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
642  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
643  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
644  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
645  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
646  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
647  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
648    
649    #ifdef SUPPORT_UTF8
650    utf8 = md->utf8;       /* Local copy of the flag */
651    #else
652    utf8 = FALSE;
653    #endif
654    
655  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
656  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
657    
# Line 550  if (rdepth >= md->match_limit_recursion) Line 660  if (rdepth >= md->match_limit_recursion)
660    
661  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
662    
663  #ifdef SUPPORT_UTF8  /* At the start of a group with an unlimited repeat that may match an empty
664  utf8 = md->utf8;       /* Local copy of the flag */  string, the match_cbegroup flag is set. When this is the case, add the current
665  #else  subject pointer to the chain of such remembered pointers, to be checked when we
666  utf8 = FALSE;  hit the closing ket, in order to break infinite loops that match no characters.
667  #endif  When match() is called in other circumstances, don't add to the chain. The
668    match_cbegroup flag must NOT be used with tail recursion, because the memory
669  /* At the start of a bracketed group, add the current subject pointer to the  block that is used is on the stack, so a new one may be required for each
670  stack of such pointers, to be re-instated at the end of the group when we hit  match(). */
 the closing ket. When match() is called in other circumstances, we don't add to  
 this stack. */  
671    
672  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
673    {    {
   newptrb.epb_prev = eptrb;  
674    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
675      newptrb.epb_prev = eptrb;
676    eptrb = &newptrb;    eptrb = &newptrb;
677    }    }
678    
679  /* Now start processing the operations. */  /* Now start processing the opcodes. */
680    
681  for (;;)  for (;;)
682    {    {
683      minimize = possessive = FALSE;
684    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
685    
686    if (md->partial &&    switch(op)
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
687      {      {
688      number = op - OP_BRA;      case OP_MARK:
689        markptr = ecode + 2;
690      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691      number from a dummy opcode at the start. */        ims, eptrb, flags, RM55);
692    
693      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
695        argument. It is passed back in md->start_match_ptr (an overloading of that
696        variable). If it does match, we reset that variable to the current subject
697        position and return MATCH_SKIP. Otherwise, pass back the return code
698        unaltered. */
699    
700        if (rrc == MATCH_SKIP_ARG &&
701            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702          {
703          md->start_match_ptr = eptr;
704          RRETURN(MATCH_SKIP);
705          }
706    
707        if (md->mark == NULL) md->mark = markptr;
708        RRETURN(rrc);
709    
710        case OP_FAIL:
711        MRRETURN(MATCH_NOMATCH);
712    
713        /* COMMIT overrides PRUNE, SKIP, and THEN */
714    
715        case OP_COMMIT:
716        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717          ims, eptrb, flags, RM52);
718        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
719            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
720            rrc != MATCH_THEN)
721          RRETURN(rrc);
722        MRRETURN(MATCH_COMMIT);
723    
724        /* PRUNE overrides THEN */
725    
726        case OP_PRUNE:
727        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
728          ims, eptrb, flags, RM51);
729        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
730        MRRETURN(MATCH_PRUNE);
731    
732        case OP_PRUNE_ARG:
733        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
734          ims, eptrb, flags, RM56);
735        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736        md->mark = ecode + 2;
737        RRETURN(MATCH_PRUNE);
738    
739        /* SKIP overrides PRUNE and THEN */
740    
741        case OP_SKIP:
742        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
743          ims, eptrb, flags, RM53);
744        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
745          RRETURN(rrc);
746        md->start_match_ptr = eptr;   /* Pass back current position */
747        MRRETURN(MATCH_SKIP);
748    
749        case OP_SKIP_ARG:
750        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
751          ims, eptrb, flags, RM57);
752        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
753          RRETURN(rrc);
754    
755        /* Pass back the current skip name by overloading md->start_match_ptr and
756        returning the special MATCH_SKIP_ARG return code. This will either be
757        caught by a matching MARK, or get to the top, where it is treated the same
758        as PRUNE. */
759    
760        md->start_match_ptr = ecode + 2;
761        RRETURN(MATCH_SKIP_ARG);
762    
763        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
764        the alt that is at the start of the current branch. This makes it possible
765        to skip back past alternatives that precede the THEN within the current
766        branch. */
767    
768        case OP_THEN:
769        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
770          ims, eptrb, flags, RM54);
771        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
772        md->start_match_ptr = ecode - GET(ecode, 1);
773        MRRETURN(MATCH_THEN);
774    
775        case OP_THEN_ARG:
776        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
777          offset_top, md, ims, eptrb, flags, RM58);
778        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
779        md->start_match_ptr = ecode - GET(ecode, 1);
780        md->mark = ecode + LINK_SIZE + 2;
781        RRETURN(MATCH_THEN);
782    
783        /* Handle a capturing bracket. If there is space in the offset vector, save
784        the current subject position in the working slot at the top of the vector.
785        We mustn't change the current values of the data slot, because they may be
786        set from a previous iteration of this group, and be referred to by a
787        reference inside the group.
788    
789        If the bracket fails to match, we need to restore this value and also the
790        values of the final offsets, in case they were set by a previous iteration
791        of the same bracket.
792    
793        If there isn't enough space in the offset vector, treat this as if it were
794        a non-capturing bracket. Don't worry about setting the flag for the error
795        case here; that is handled in the code for KET. */
796    
797        case OP_CBRA:
798        case OP_SCBRA:
799        number = GET2(ecode, 1+LINK_SIZE);
800      offset = number << 1;      offset = number << 1;
801    
802  #ifdef DEBUG  #ifdef PCRE_DEBUG
803      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
804        printf("subject=");
805      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
806      printf("\n");      printf("\n");
807  #endif  #endif
# Line 622  for (;;) Line 814  for (;;)
814        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
815    
816        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
817        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
818            (int)(eptr - md->start_subject);
819    
820          flags = (op == OP_SCBRA)? match_cbegroup : 0;
821        do        do
822          {          {
823          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
824            match_isgroup);            ims, eptrb, flags, RM1);
825          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
826                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
827              RRETURN(rrc);
828          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
829          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
830          }          }
# Line 640  for (;;) Line 836  for (;;)
836        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
837        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
838    
839          if (rrc != MATCH_THEN) md->mark = markptr;
840        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
841        }        }
842    
843      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
844        as a non-capturing bracket. */
     else op = OP_BRA;  
     }  
   
   /* Other types of node can be handled by a switch */  
845    
846    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
847      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
     case OP_BRA:     /* Non-capturing bracket: optimized */  
     DPRINTF(("start bracket 0\n"));  
848    
849      /* Loop for all the alternatives */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
850    
851        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
852        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853    
854        /* Non-capturing bracket. Loop for all the alternatives. When we get to the
855        final alternative within the brackets, we would return the result of a
856        recursive call to match() whatever happened. We can reduce stack usage by
857        turning this into a tail recursion, except in the case when match_cbegroup
858        is set.*/
859    
860        case OP_BRA:
861        case OP_SBRA:
862        DPRINTF(("start non-capturing bracket\n"));
863        flags = (op >= OP_SBRA)? match_cbegroup : 0;
864      for (;;)      for (;;)
865        {        {
866        /* When we get to the final alternative within the brackets, we would        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
867        return the result of a recursive call to match() whatever happened. We          {
868        can reduce stack usage by turning this into a tail recursion. */          if (flags == 0)    /* Not a possibly empty group */
869              {
870        if (ecode[GET(ecode, 1)] != OP_ALT)            ecode += _pcre_OP_lengths[*ecode];
871         {            DPRINTF(("bracket 0 tail recursion\n"));
872         ecode += 1 + LINK_SIZE;            goto TAIL_RECURSE;
873         flags = match_isgroup;            }
874         DPRINTF(("bracket 0 tail recursion\n"));  
875         goto TAIL_RECURSE;          /* Possibly empty group; can't use tail recursion. */
876         }  
877            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
878              eptrb, flags, RM48);
879            if (rrc == MATCH_NOMATCH) md->mark = markptr;
880            RRETURN(rrc);
881            }
882    
883        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
884        otherwise return. */        otherwise return. */
885    
886        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
887          match_isgroup);          eptrb, flags, RM2);
888        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
889              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
890            RRETURN(rrc);
891        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
892        }        }
893      /* Control never reaches here. */      /* Control never reaches here. */
# Line 688  for (;;) Line 899  for (;;)
899      obeyed, we can use tail recursion to avoid using another stack frame. */      obeyed, we can use tail recursion to avoid using another stack frame. */
900    
901      case OP_COND:      case OP_COND:
902      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
903        codelink= GET(ecode, 1);
904    
905        /* Because of the way auto-callout works during compile, a callout item is
906        inserted between OP_COND and an assertion condition. */
907    
908        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
909          {
910          if (pcre_callout != NULL)
911            {
912            pcre_callout_block cb;
913            cb.version          = 1;   /* Version 1 of the callout block */
914            cb.callout_number   = ecode[LINK_SIZE+2];
915            cb.offset_vector    = md->offset_vector;
916            cb.subject          = (PCRE_SPTR)md->start_subject;
917            cb.subject_length   = (int)(md->end_subject - md->start_subject);
918            cb.start_match      = (int)(mstart - md->start_subject);
919            cb.current_position = (int)(eptr - md->start_subject);
920            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
921            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
922            cb.capture_top      = offset_top/2;
923            cb.capture_last     = md->capture_last;
924            cb.callout_data     = md->callout_data;
925            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
926            if (rrc < 0) RRETURN(rrc);
927            }
928          ecode += _pcre_OP_lengths[OP_CALLOUT];
929          }
930    
931        condcode = ecode[LINK_SIZE+1];
932    
933        /* Now see what the actual condition is */
934    
935        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
936          {
937          if (md->recursive == NULL)                /* Not recursing => FALSE */
938            {
939            condition = FALSE;
940            ecode += GET(ecode, 1);
941            }
942          else
943            {
944            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
945            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
946    
947            /* If the test is for recursion into a specific subpattern, and it is
948            false, but the test was set up by name, scan the table to see if the
949            name refers to any other numbers, and test them. The condition is true
950            if any one is set. */
951    
952            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
953              {
954              uschar *slotA = md->name_table;
955              for (i = 0; i < md->name_count; i++)
956                {
957                if (GET2(slotA, 0) == recno) break;
958                slotA += md->name_entry_size;
959                }
960    
961              /* Found a name for the number - there can be only one; duplicate
962              names for different numbers are allowed, but not vice versa. First
963              scan down for duplicates. */
964    
965              if (i < md->name_count)
966                {
967                uschar *slotB = slotA;
968                while (slotB > md->name_table)
969                  {
970                  slotB -= md->name_entry_size;
971                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
972                    {
973                    condition = GET2(slotB, 0) == md->recursive->group_num;
974                    if (condition) break;
975                    }
976                  else break;
977                  }
978    
979                /* Scan up for duplicates */
980    
981                if (!condition)
982                  {
983                  slotB = slotA;
984                  for (i++; i < md->name_count; i++)
985                    {
986                    slotB += md->name_entry_size;
987                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
988                      {
989                      condition = GET2(slotB, 0) == md->recursive->group_num;
990                      if (condition) break;
991                      }
992                    else break;
993                    }
994                  }
995                }
996              }
997    
998            /* Chose branch according to the condition */
999    
1000            ecode += condition? 3 : GET(ecode, 1);
1001            }
1002          }
1003    
1004        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1005        {        {
1006        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1007        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
1008          (md->recursive != NULL) :  
1009          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
1010        ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));        scan the table to see if the name refers to any other numbers, and test
1011        flags = match_isgroup;        them. The condition is true if any one is set. This is tediously similar
1012        goto TAIL_RECURSE;        to the code above, but not close enough to try to amalgamate. */
1013    
1014          if (!condition && condcode == OP_NCREF)
1015            {
1016            int refno = offset >> 1;
1017            uschar *slotA = md->name_table;
1018    
1019            for (i = 0; i < md->name_count; i++)
1020              {
1021              if (GET2(slotA, 0) == refno) break;
1022              slotA += md->name_entry_size;
1023              }
1024    
1025            /* Found a name for the number - there can be only one; duplicate names
1026            for different numbers are allowed, but not vice versa. First scan down
1027            for duplicates. */
1028    
1029            if (i < md->name_count)
1030              {
1031              uschar *slotB = slotA;
1032              while (slotB > md->name_table)
1033                {
1034                slotB -= md->name_entry_size;
1035                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1036                  {
1037                  offset = GET2(slotB, 0) << 1;
1038                  condition = offset < offset_top &&
1039                    md->offset_vector[offset] >= 0;
1040                  if (condition) break;
1041                  }
1042                else break;
1043                }
1044    
1045              /* Scan up for duplicates */
1046    
1047              if (!condition)
1048                {
1049                slotB = slotA;
1050                for (i++; i < md->name_count; i++)
1051                  {
1052                  slotB += md->name_entry_size;
1053                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054                    {
1055                    offset = GET2(slotB, 0) << 1;
1056                    condition = offset < offset_top &&
1057                      md->offset_vector[offset] >= 0;
1058                    if (condition) break;
1059                    }
1060                  else break;
1061                  }
1062                }
1063              }
1064            }
1065    
1066          /* Chose branch according to the condition */
1067    
1068          ecode += condition? 3 : GET(ecode, 1);
1069          }
1070    
1071        else if (condcode == OP_DEF)     /* DEFINE - always false */
1072          {
1073          condition = FALSE;
1074          ecode += GET(ecode, 1);
1075        }        }
1076    
1077      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1078      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1079        assertion. */
1080    
1081      else      else
1082        {        {
1083        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1084            match_condassert | match_isgroup);            match_condassert, RM3);
1085        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1086          {          {
1087          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1088            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1089          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1090          }          }
1091        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH &&
1092                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1093          {          {
1094          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1095          }          }
1096        else ecode += GET(ecode, 1);        else
1097            {
1098            condition = FALSE;
1099            ecode += codelink;
1100            }
1101          }
1102    
1103        /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1104        we can use tail recursion to avoid using another stack frame. */      we can use tail recursion to avoid using another stack frame, except when
1105        match_cbegroup is required for an unlimited repeat of a possibly empty
1106        group. If the second alternative doesn't exist, we can just plough on. */
1107    
1108        if (condition || *ecode == OP_ALT)
1109          {
1110        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1111        flags = match_isgroup;        if (op == OP_SCOND)        /* Possibly empty group */
1112        goto TAIL_RECURSE;          {
1113            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1114            RRETURN(rrc);
1115            }
1116          else                       /* Group must match something */
1117            {
1118            flags = 0;
1119            goto TAIL_RECURSE;
1120            }
1121        }        }
1122      /* Control never reaches here */      else                         /* Condition false & no alternative */
1123          {
1124          ecode += 1 + LINK_SIZE;
1125          }
1126        break;
1127    
1128    
1129        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1130        to close any currently open capturing brackets. */
1131    
1132      /* Skip over conditional reference or large extraction number data if      case OP_CLOSE:
1133      encountered. */      number = GET2(ecode, 1);
1134        offset = number << 1;
1135    
1136    #ifdef PCRE_DEBUG
1137          printf("end bracket %d at *ACCEPT", number);
1138          printf("\n");
1139    #endif
1140    
1141      case OP_CREF:      md->capture_last = number;
1142      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1143          {
1144          md->offset_vector[offset] =
1145            md->offset_vector[md->offset_end - number];
1146          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1147          if (offset_top <= offset) offset_top = offset + 2;
1148          }
1149      ecode += 3;      ecode += 3;
1150      break;      break;
1151    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1152    
1153        /* End of the pattern, either real or forced. If we are in a top-level
1154        recursion, we should restore the offsets appropriately and continue from
1155        after the call. */
1156    
1157        case OP_ACCEPT:
1158      case OP_END:      case OP_END:
1159      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1160        {        {
# Line 745  for (;;) Line 1163  for (;;)
1163        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1164        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1165          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1166        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1167        ims = original_ims;        ims = original_ims;
1168        ecode = rec->after_call;        ecode = rec->after_call;
1169        break;        break;
1170        }        }
1171    
1172      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1173      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1174        the subject. In both cases, backtracking will then try other alternatives,
1175        if any. */
1176    
1177        if (eptr == mstart &&
1178            (md->notempty ||
1179              (md->notempty_atstart &&
1180                mstart == md->start_subject + md->start_offset)))
1181          MRRETURN(MATCH_NOMATCH);
1182    
1183        /* Otherwise, we have a match. */
1184    
1185        md->end_match_ptr = eptr;           /* Record where we ended */
1186        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1187        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1188    
1189        /* For some reason, the macros don't work properly if an expression is
1190        given as the argument to MRRETURN when the heap is in use. */
1191    
1192      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1193      md->end_match_ptr = eptr;          /* Record where we ended */      MRRETURN(rrc);
     md->end_offset_top = offset_top;   /* and how many extracts were taken */  
     RRETURN(MATCH_MATCH);  
1194    
1195      /* Change option settings */      /* Change option settings */
1196    
# Line 777  for (;;) Line 1210  for (;;)
1210      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1211      do      do
1212        {        {
1213        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214          match_isgroup);          RM4);
1215        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1216        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1217            mstart = md->start_match_ptr;   /* In case \K reset it */
1218            break;
1219            }
1220          if (rrc != MATCH_NOMATCH &&
1221              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1222            RRETURN(rrc);
1223        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1224        }        }
1225      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1226      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1227    
1228      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1229    
# Line 798  for (;;) Line 1237  for (;;)
1237      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1238      continue;      continue;
1239    
1240      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1241        PRUNE, or COMMIT means we must assume failure without checking subsequent
1242        branches. */
1243    
1244      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1245      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1246      do      do
1247        {        {
1248        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1249          match_isgroup);          RM5);
1250        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1251        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1252            {
1253            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1254            break;
1255            }
1256          if (rrc != MATCH_NOMATCH &&
1257              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1258            RRETURN(rrc);
1259        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1260        }        }
1261      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 826  for (;;) Line 1274  for (;;)
1274  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1275      if (utf8)      if (utf8)
1276        {        {
1277        c = GET(ecode,1);        i = GET(ecode, 1);
1278        for (i = 0; i < c; i++)        while (i-- > 0)
1279          {          {
1280          eptr--;          eptr--;
1281          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1282          BACKCHAR(eptr)          BACKCHAR(eptr);
1283          }          }
1284        }        }
1285      else      else
# Line 840  for (;;) Line 1288  for (;;)
1288      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1289    
1290        {        {
1291        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1292        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1293        }        }
1294    
1295      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1296    
1297        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1298      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1299      break;      break;
1300    
# Line 861  for (;;) Line 1310  for (;;)
1310        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1311        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1312        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1313        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1314        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1315        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1316        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1317        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1318        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1319        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1320        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1321        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1322        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1323        }        }
1324      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 897  for (;;) Line 1346  for (;;)
1346      case OP_RECURSE:      case OP_RECURSE:
1347        {        {
1348        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1349        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1350            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1351    
1352        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1353    
# Line 929  for (;;) Line 1373  for (;;)
1373    
1374        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1375              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1376        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1377    
1378        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1379        restore the offset and recursion data. */        restore the offset and recursion data. */
1380    
1381        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1382          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1383        do        do
1384          {          {
1385          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1386              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1387          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1388            {            {
1389            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1390            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1391            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1392              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1393            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1394            }            }
1395          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH &&
1396                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1397            {            {
1398            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1399              if (new_recursive.offset_save != stacksave)
1400                (pcre_free)(new_recursive.offset_save);
1401            RRETURN(rrc);            RRETURN(rrc);
1402            }            }
1403    
# Line 965  for (;;) Line 1412  for (;;)
1412        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1413        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1414          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1415        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1416        }        }
1417      /* Control never reaches here */      /* Control never reaches here */
1418    
# Line 974  for (;;) Line 1421  for (;;)
1421      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1422      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1423      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1424      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1425        the start-of-match value in case it was changed by \K. */
1426    
1427      case OP_ONCE:      case OP_ONCE:
1428      prev = ecode;      prev = ecode;
# Line 982  for (;;) Line 1430  for (;;)
1430    
1431      do      do
1432        {        {
1433        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1434          eptrb, match_isgroup);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1435        if (rrc == MATCH_MATCH) break;          {
1436        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1437            break;
1438            }
1439          if (rrc != MATCH_NOMATCH &&
1440              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1441            RRETURN(rrc);
1442        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1443        }        }
1444      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 997  for (;;) Line 1450  for (;;)
1450      /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1451      mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1452    
1453      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1454    
1455      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1456      eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
# Line 1028  for (;;) Line 1481  for (;;)
1481    
1482      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1483        {        {
1484        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1485        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1486        ecode = prev;        ecode = prev;
1487        flags = match_isgroup;        flags = 0;
1488        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1489        }        }
1490      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1491        {        {
1492        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1493        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1494        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1495        flags = 0;        flags = 0;
# Line 1051  for (;;) Line 1504  for (;;)
1504      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1505      break;      break;
1506    
1507      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1508      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1509      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1510      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1511      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1512    
1513      case OP_BRAZERO:      case OP_BRAZERO:
1514        {        {
1515        next = ecode+1;        next = ecode+1;
1516        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1517        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1519        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1520        }        }
1521      break;      break;
1522    
1523      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1524        {        {
1525        next = ecode+1;        next = ecode+1;
1526        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1527        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1528        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1529        ecode++;        ecode++;
1530        }        }
1531      break;      break;
1532    
1533      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1534      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1535      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1536      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1537          ecode = next + 1 + LINK_SIZE;
1538          }
1539        break;
1540    
1541        /* End of a group, repeated or non-repeating. */
1542    
1543      case OP_KET:      case OP_KET:
1544      case OP_KETRMIN:      case OP_KETRMIN:
1545      case OP_KETRMAX:      case OP_KETRMAX:
1546      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
     saved_eptr = eptrb->epb_saved_eptr;  
1547    
1548      /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1549        infinite repeats of empty string matches, retrieve the subject start from
1550        the chain. Otherwise, set it NULL. */
1551    
1552        if (*prev >= OP_SBRA)
1553          {
1554          saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1555          eptrb = eptrb->epb_prev;              /* Backup to previous group */
1556          }
1557        else saved_eptr = NULL;
1558    
1559      eptrb = eptrb->epb_prev;      /* If we are at the end of an assertion group or an atomic group, stop
1560        matching and return MATCH_MATCH, but record the current high water mark for
1561        use by positive assertions. We also need to record the match start in case
1562        it was changed by \K. */
1563    
1564      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1565          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1099  for (;;) Line 1567  for (;;)
1567        {        {
1568        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1569        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1570        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1571          MRRETURN(MATCH_MATCH);
1572        }        }
1573    
1574      /* In all other cases except a conditional group we have to check the      /* For capturing groups we have to check the group number back at the start
1575      group number back at the start and if necessary complete handling an      and if necessary complete handling an extraction by setting the offsets and
1576      extraction by setting the offsets and bumping the high water mark. */      bumping the high water mark. Note that whole-pattern recursion is coded as
1577        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1578        when the OP_END is reached. Other recursion is handled here. */
1579    
1580      if (*prev != OP_COND)      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1581        {        {
1582        number = *prev - OP_BRA;        number = GET2(prev, 1+LINK_SIZE);
   
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);  
1583        offset = number << 1;        offset = number << 1;
1584    
1585  #ifdef DEBUG  #ifdef PCRE_DEBUG
1586        printf("end bracket %d", number);        printf("end bracket %d", number);
1587        printf("\n");        printf("\n");
1588  #endif  #endif
1589    
1590        /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1591        of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       into group 0, so it won't be picked up here. Instead, we catch it when  
       the OP_END is reached. */  
   
       if (number > 0)  
1592          {          {
1593          md->capture_last = number;          md->offset_vector[offset] =
1594          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            md->offset_vector[md->offset_end - number];
1595            {          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1596            md->offset_vector[offset] =          if (offset_top <= offset) offset_top = offset + 2;
1597              md->offset_vector[md->offset_end - number];          }
1598            md->offset_vector[offset+1] = eptr - md->start_subject;  
1599            if (offset_top <= offset) offset_top = offset + 2;        /* Handle a recursively called group. Restore the offsets
1600            }        appropriately and continue from after the call. */
1601    
1602          /* Handle a recursively called group. Restore the offsets        if (md->recursive != NULL && md->recursive->group_num == number)
1603          appropriately and continue from after the call. */          {
1604            recursion_info *rec = md->recursive;
1605          if (md->recursive != NULL && md->recursive->group_num == number)          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1606            {          md->recursive = rec->prevrec;
1607            recursion_info *rec = md->recursive;          memcpy(md->offset_vector, rec->offset_save,
1608            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));            rec->saved_max * sizeof(int));
1609            md->recursive = rec->prevrec;          offset_top = rec->save_offset_top;
1610            md->start_match = rec->save_start;          ecode = rec->after_call;
1611            memcpy(md->offset_vector, rec->offset_save,          ims = original_ims;
1612              rec->saved_max * sizeof(int));          break;
           ecode = rec->after_call;  
           ims = original_ims;  
           break;  
           }  
1613          }          }
1614        }        }
1615    
1616      /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1617      the group. */      flags, in case they got changed during the group. */
1618    
1619      ims = original_ims;      ims = original_ims;
1620      DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
# Line 1175  for (;;) Line 1633  for (;;)
1633    
1634      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1635      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1636      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1637        unlimited repeat of a group that can match an empty string. */
1638    
1639        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1640    
1641      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1642        {        {
1643        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1644        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1645          if (flags != 0)    /* Could match an empty string */
1646            {
1647            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1648            RRETURN(rrc);
1649            }
1650        ecode = prev;        ecode = prev;
       flags = match_isgroup;  
1651        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1652        }        }
1653      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1654        {        {
1655        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1656        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1657        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1658        flags = 0;        flags = 0;
# Line 1198  for (;;) Line 1663  for (;;)
1663      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1664    
1665      case OP_CIRC:      case OP_CIRC:
1666      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1667      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1668        {        {
1669        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1670            (eptr == md->end_subject ||            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1671             eptr < md->start_subject + md->nllen ||          MRRETURN(MATCH_NOMATCH);
            !IS_NEWLINE(eptr - md->nllen)))  
         RRETURN(MATCH_NOMATCH);  
1672        ecode++;        ecode++;
1673        break;        break;
1674        }        }
# Line 1214  for (;;) Line 1677  for (;;)
1677      /* Start of subject assertion */      /* Start of subject assertion */
1678    
1679      case OP_SOD:      case OP_SOD:
1680      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1681      ecode++;      ecode++;
1682      break;      break;
1683    
1684      /* Start of match assertion */      /* Start of match assertion */
1685    
1686      case OP_SOM:      case OP_SOM:
1687      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1688        ecode++;
1689        break;
1690    
1691        /* Reset the start of match point */
1692    
1693        case OP_SET_SOM:
1694        mstart = eptr;
1695      ecode++;      ecode++;
1696      break;      break;
1697    
# Line 1232  for (;;) Line 1702  for (;;)
1702      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1703        {        {
1704        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1705          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1706        else        else
1707          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1708        ecode++;        ecode++;
1709        break;        break;
1710        }        }
1711      else      else
1712        {        {
1713        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1714        if (!md->endonly)        if (!md->endonly)
1715          {          {
1716          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1717              (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1718            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1719          ecode++;          ecode++;
1720          break;          break;
1721          }          }
# Line 1255  for (;;) Line 1725  for (;;)
1725      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1726    
1727      case OP_EOD:      case OP_EOD:
1728      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1729      ecode++;      ecode++;
1730      break;      break;
1731    
# Line 1263  for (;;) Line 1733  for (;;)
1733    
1734      case OP_EODN:      case OP_EODN:
1735      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1736          (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1738      ecode++;      ecode++;
1739      break;      break;
1740    
# Line 1276  for (;;) Line 1746  for (;;)
1746    
1747        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1748        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1749        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1750          partial matching. */
1751    
1752  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1753        if (utf8)        if (utf8)
1754          {          {
1755            /* Get status of previous character */
1756    
1757          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1758            {            {
1759            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1760            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1761              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1762            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1763    #ifdef SUPPORT_UCP
1764              if (md->use_ucp)
1765                {
1766                if (c == '_') prev_is_word = TRUE; else
1767                  {
1768                  int cat = UCD_CATEGORY(c);
1769                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1770                  }
1771                }
1772              else
1773    #endif
1774            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1775            }            }
1776          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1777            /* Get status of next character */
1778    
1779            if (eptr >= md->end_subject)
1780              {
1781              SCHECK_PARTIAL();
1782              cur_is_word = FALSE;
1783              }
1784            else
1785            {            {
1786            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1787    #ifdef SUPPORT_UCP
1788              if (md->use_ucp)
1789                {
1790                if (c == '_') cur_is_word = TRUE; else
1791                  {
1792                  int cat = UCD_CATEGORY(c);
1793                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1794                  }
1795                }
1796              else
1797    #endif
1798            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1799            }            }
1800          }          }
1801        else        else
1802  #endif  #endif
1803    
1804        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1805          consistency with the behaviour of \w we do use it in this case. */
1806    
1807          {          {
1808          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
           ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
         cur_is_word = (eptr < md->end_subject) &&  
           ((md->ctypes[*eptr] & ctype_word) != 0);  
         }  
1809    
1810        /* Now see if the situation is what we want */          if (eptr == md->start_subject) prev_is_word = FALSE; else
1811              {
1812              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1813    #ifdef SUPPORT_UCP
1814              if (md->use_ucp)
1815                {
1816                c = eptr[-1];
1817                if (c == '_') prev_is_word = TRUE; else
1818                  {
1819                  int cat = UCD_CATEGORY(c);
1820                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1821                  }
1822                }
1823              else
1824    #endif
1825              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1826              }
1827    
1828        if ((*ecode++ == OP_WORD_BOUNDARY)?          /* Get status of next character */
1829             cur_is_word == prev_is_word : cur_is_word != prev_is_word)  
1830          RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
1831        }            {
1832      break;            SCHECK_PARTIAL();
1833              cur_is_word = FALSE;
1834              }
1835            else
1836    #ifdef SUPPORT_UCP
1837            if (md->use_ucp)
1838              {
1839              c = *eptr;
1840              if (c == '_') cur_is_word = TRUE; else
1841                {
1842                int cat = UCD_CATEGORY(c);
1843                cur_is_word = (cat == ucp_L || cat == ucp_N);
1844                }
1845              }
1846            else
1847    #endif
1848            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1849            }
1850    
1851          /* Now see if the situation is what we want */
1852    
1853          if ((*ecode++ == OP_WORD_BOUNDARY)?
1854               cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1855            MRRETURN(MATCH_NOMATCH);
1856          }
1857        break;
1858    
1859      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1860    
1861      case OP_ANY:      case OP_ANY:
1862      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1863        /* Fall through */
1864    
1865        case OP_ALLANY:
1866        if (eptr++ >= md->end_subject)
1867        {        {
1868        if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))        SCHECK_PARTIAL();
1869          RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1870        }        }
1871      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1872      ecode++;      ecode++;
1873      break;      break;
1874    
# Line 1332  for (;;) Line 1876  for (;;)
1876      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1877    
1878      case OP_ANYBYTE:      case OP_ANYBYTE:
1879      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1880          {
1881          SCHECK_PARTIAL();
1882          MRRETURN(MATCH_NOMATCH);
1883          }
1884      ecode++;      ecode++;
1885      break;      break;
1886    
1887      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1888      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1889          {
1890          SCHECK_PARTIAL();
1891          MRRETURN(MATCH_NOMATCH);
1892          }
1893      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1894      if (      if (
1895  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1345  for (;;) Line 1897  for (;;)
1897  #endif  #endif
1898         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1899         )         )
1900        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1901      ecode++;      ecode++;
1902      break;      break;
1903    
1904      case OP_DIGIT:      case OP_DIGIT:
1905      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1906          {
1907          SCHECK_PARTIAL();
1908          MRRETURN(MATCH_NOMATCH);
1909          }
1910      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1911      if (      if (
1912  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1358  for (;;) Line 1914  for (;;)
1914  #endif  #endif
1915         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1916         )         )
1917        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1918      ecode++;      ecode++;
1919      break;      break;
1920    
1921      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1922      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1923          {
1924          SCHECK_PARTIAL();
1925          MRRETURN(MATCH_NOMATCH);
1926          }
1927      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1928      if (      if (
1929  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1371  for (;;) Line 1931  for (;;)
1931  #endif  #endif
1932         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1933         )         )
1934        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1935      ecode++;      ecode++;
1936      break;      break;
1937    
1938      case OP_WHITESPACE:      case OP_WHITESPACE:
1939      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1940          {
1941          SCHECK_PARTIAL();
1942          MRRETURN(MATCH_NOMATCH);
1943          }
1944      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1945      if (      if (
1946  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1384  for (;;) Line 1948  for (;;)
1948  #endif  #endif
1949         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1950         )         )
1951        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1952      ecode++;      ecode++;
1953      break;      break;
1954    
1955      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1956      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1957          {
1958          SCHECK_PARTIAL();
1959          MRRETURN(MATCH_NOMATCH);
1960          }
1961      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1962      if (      if (
1963  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1397  for (;;) Line 1965  for (;;)
1965  #endif  #endif
1966         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1967         )         )
1968        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1969      ecode++;      ecode++;
1970      break;      break;
1971    
1972      case OP_WORDCHAR:      case OP_WORDCHAR:
1973      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1974          {
1975          SCHECK_PARTIAL();
1976          MRRETURN(MATCH_NOMATCH);
1977          }
1978      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1979      if (      if (
1980  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1410  for (;;) Line 1982  for (;;)
1982  #endif  #endif
1983         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1984         )         )
1985        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1986        ecode++;
1987        break;
1988    
1989        case OP_ANYNL:
1990        if (eptr >= md->end_subject)
1991          {
1992          SCHECK_PARTIAL();
1993          MRRETURN(MATCH_NOMATCH);
1994          }
1995        GETCHARINCTEST(c, eptr);
1996        switch(c)
1997          {
1998          default: MRRETURN(MATCH_NOMATCH);
1999          case 0x000d:
2000          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2001          break;
2002    
2003          case 0x000a:
2004          break;
2005    
2006          case 0x000b:
2007          case 0x000c:
2008          case 0x0085:
2009          case 0x2028:
2010          case 0x2029:
2011          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2012          break;
2013          }
2014        ecode++;
2015        break;
2016    
2017        case OP_NOT_HSPACE:
2018        if (eptr >= md->end_subject)
2019          {
2020          SCHECK_PARTIAL();
2021          MRRETURN(MATCH_NOMATCH);
2022          }
2023        GETCHARINCTEST(c, eptr);
2024        switch(c)
2025          {
2026          default: break;
2027          case 0x09:      /* HT */
2028          case 0x20:      /* SPACE */
2029          case 0xa0:      /* NBSP */
2030          case 0x1680:    /* OGHAM SPACE MARK */
2031          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2032          case 0x2000:    /* EN QUAD */
2033          case 0x2001:    /* EM QUAD */
2034          case 0x2002:    /* EN SPACE */
2035          case 0x2003:    /* EM SPACE */
2036          case 0x2004:    /* THREE-PER-EM SPACE */
2037          case 0x2005:    /* FOUR-PER-EM SPACE */
2038          case 0x2006:    /* SIX-PER-EM SPACE */
2039          case 0x2007:    /* FIGURE SPACE */
2040          case 0x2008:    /* PUNCTUATION SPACE */
2041          case 0x2009:    /* THIN SPACE */
2042          case 0x200A:    /* HAIR SPACE */
2043          case 0x202f:    /* NARROW NO-BREAK SPACE */
2044          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2045          case 0x3000:    /* IDEOGRAPHIC SPACE */
2046          MRRETURN(MATCH_NOMATCH);
2047          }
2048        ecode++;
2049        break;
2050    
2051        case OP_HSPACE:
2052        if (eptr >= md->end_subject)
2053          {
2054          SCHECK_PARTIAL();
2055          MRRETURN(MATCH_NOMATCH);
2056          }
2057        GETCHARINCTEST(c, eptr);
2058        switch(c)
2059          {
2060          default: MRRETURN(MATCH_NOMATCH);
2061          case 0x09:      /* HT */
2062          case 0x20:      /* SPACE */
2063          case 0xa0:      /* NBSP */
2064          case 0x1680:    /* OGHAM SPACE MARK */
2065          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2066          case 0x2000:    /* EN QUAD */
2067          case 0x2001:    /* EM QUAD */
2068          case 0x2002:    /* EN SPACE */
2069          case 0x2003:    /* EM SPACE */
2070          case 0x2004:    /* THREE-PER-EM SPACE */
2071          case 0x2005:    /* FOUR-PER-EM SPACE */
2072          case 0x2006:    /* SIX-PER-EM SPACE */
2073          case 0x2007:    /* FIGURE SPACE */
2074          case 0x2008:    /* PUNCTUATION SPACE */
2075          case 0x2009:    /* THIN SPACE */
2076          case 0x200A:    /* HAIR SPACE */
2077          case 0x202f:    /* NARROW NO-BREAK SPACE */
2078          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2079          case 0x3000:    /* IDEOGRAPHIC SPACE */
2080          break;
2081          }
2082        ecode++;
2083        break;
2084    
2085        case OP_NOT_VSPACE:
2086        if (eptr >= md->end_subject)
2087          {
2088          SCHECK_PARTIAL();
2089          MRRETURN(MATCH_NOMATCH);
2090          }
2091        GETCHARINCTEST(c, eptr);
2092        switch(c)
2093          {
2094          default: break;
2095          case 0x0a:      /* LF */
2096          case 0x0b:      /* VT */
2097          case 0x0c:      /* FF */
2098          case 0x0d:      /* CR */
2099          case 0x85:      /* NEL */
2100          case 0x2028:    /* LINE SEPARATOR */
2101          case 0x2029:    /* PARAGRAPH SEPARATOR */
2102          MRRETURN(MATCH_NOMATCH);
2103          }
2104        ecode++;
2105        break;
2106    
2107        case OP_VSPACE:
2108        if (eptr >= md->end_subject)
2109          {
2110          SCHECK_PARTIAL();
2111          MRRETURN(MATCH_NOMATCH);
2112          }
2113        GETCHARINCTEST(c, eptr);
2114        switch(c)
2115          {
2116          default: MRRETURN(MATCH_NOMATCH);
2117          case 0x0a:      /* LF */
2118          case 0x0b:      /* VT */
2119          case 0x0c:      /* FF */
2120          case 0x0d:      /* CR */
2121          case 0x85:      /* NEL */
2122          case 0x2028:    /* LINE SEPARATOR */
2123          case 0x2029:    /* PARAGRAPH SEPARATOR */
2124          break;
2125          }
2126      ecode++;      ecode++;
2127      break;      break;
2128    
# Line 1420  for (;;) Line 2132  for (;;)
2132    
2133      case OP_PROP:      case OP_PROP:
2134      case OP_NOTPROP:      case OP_NOTPROP:
2135      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2136          {
2137          SCHECK_PARTIAL();
2138          MRRETURN(MATCH_NOMATCH);
2139          }
2140      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2141        {        {
2142        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2143    
2144        switch(ecode[1])        switch(ecode[1])
2145          {          {
2146          case PT_ANY:          case PT_ANY:
2147          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2148          break;          break;
2149    
2150          case PT_LAMP:          case PT_LAMP:
2151          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2152               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2153               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2154            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2155           break;          break;
2156    
2157          case PT_GC:          case PT_GC:
2158          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2159            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2160          break;          break;
2161    
2162          case PT_PC:          case PT_PC:
2163          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2164            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2165          break;          break;
2166    
2167          case PT_SC:          case PT_SC:
2168          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2169            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2170            break;
2171    
2172            /* These are specials */
2173    
2174            case PT_ALNUM:
2175            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2176                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2177              MRRETURN(MATCH_NOMATCH);
2178            break;
2179    
2180            case PT_SPACE:    /* Perl space */
2181            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2182                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2183                   == (op == OP_NOTPROP))
2184              MRRETURN(MATCH_NOMATCH);
2185          break;          break;
2186    
2187            case PT_PXSPACE:  /* POSIX space */
2188            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2189                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2190                 c == CHAR_FF || c == CHAR_CR)
2191                   == (op == OP_NOTPROP))
2192              MRRETURN(MATCH_NOMATCH);
2193            break;
2194    
2195            case PT_WORD:
2196            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2197                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2198                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2199              MRRETURN(MATCH_NOMATCH);
2200            break;
2201    
2202            /* This should never occur */
2203    
2204          default:          default:
2205          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
2206          }          }
2207    
2208        ecode += 3;        ecode += 3;
# Line 1467  for (;;) Line 2213  for (;;)
2213      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2214    
2215      case OP_EXTUNI:      case OP_EXTUNI:
2216      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2217          {
2218          SCHECK_PARTIAL();
2219          MRRETURN(MATCH_NOMATCH);
2220          }
2221      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2222        {        {
2223        int chartype, script;        int category = UCD_CATEGORY(c);
2224        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2225        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2226          {          {
2227          int len = 1;          int len = 1;
# Line 1480  for (;;) Line 2229  for (;;)
2229            {            {
2230            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2231            }            }
2232          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2233          if (category != ucp_M) break;          if (category != ucp_M) break;
2234          eptr += len;          eptr += len;
2235          }          }
# Line 1501  for (;;) Line 2250  for (;;)
2250      case OP_REF:      case OP_REF:
2251        {        {
2252        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2253        ecode += 3;                                 /* Advance past item */        ecode += 3;
2254    
2255          /* If the reference is unset, there are two possibilities:
2256    
2257          (a) In the default, Perl-compatible state, set the length to be longer
2258          than the amount of subject left; this ensures that every attempt at a
2259          match fails. We can't just fail here, because of the possibility of
2260          quantifiers with zero minima.
2261    
2262          (b) If the JavaScript compatibility flag is set, set the length to zero
2263          so that the back reference matches an empty string.
2264    
2265        /* If the reference is unset, set the length to be longer than the amount        Otherwise, set the length to the length of what was matched by the
2266        of subject left; this ensures that every attempt at a match fails. We        referenced subpattern. */
2267        can't just fail here, because of the possibility of quantifiers with zero  
2268        minima. */        if (offset >= offset_top || md->offset_vector[offset] < 0)
2269            length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2270        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        else
2271          md->end_subject - eptr + 1 :          length = md->offset_vector[offset+1] - md->offset_vector[offset];
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2272    
2273        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2274    
# Line 1539  for (;;) Line 2297  for (;;)
2297          break;          break;
2298    
2299          default:               /* No repeat follows */          default:               /* No repeat follows */
2300          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2301              {
2302              CHECK_PARTIAL();
2303              MRRETURN(MATCH_NOMATCH);
2304              }
2305          eptr += length;          eptr += length;
2306          continue;              /* With the main loop */          continue;              /* With the main loop */
2307          }          }
# Line 1555  for (;;) Line 2317  for (;;)
2317    
2318        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2319          {          {
2320          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2321              {
2322              CHECK_PARTIAL();
2323              MRRETURN(MATCH_NOMATCH);
2324              }
2325          eptr += length;          eptr += length;
2326          }          }
2327    
# Line 1570  for (;;) Line 2336  for (;;)
2336          {          {
2337          for (fi = min;; fi++)          for (fi = min;; fi++)
2338            {            {
2339            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2340            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2341            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2342              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2343                {
2344                CHECK_PARTIAL();
2345                MRRETURN(MATCH_NOMATCH);
2346                }
2347            eptr += length;            eptr += length;
2348            }            }
2349          /* Control never gets here */          /* Control never gets here */
# Line 1586  for (;;) Line 2356  for (;;)
2356          pp = eptr;          pp = eptr;
2357          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2358            {            {
2359            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2360                {
2361                CHECK_PARTIAL();
2362                break;
2363                }
2364            eptr += length;            eptr += length;
2365            }            }
2366          while (eptr >= pp)          while (eptr >= pp)
2367            {            {
2368            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2369            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2370            eptr -= length;            eptr -= length;
2371            }            }
2372          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2373          }          }
2374        }        }
2375      /* Control never gets here */      /* Control never gets here */
2376    
   
   
2377      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2378      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2379      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1656  for (;;) Line 2428  for (;;)
2428          {          {
2429          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2430            {            {
2431            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2432                {
2433                SCHECK_PARTIAL();
2434                MRRETURN(MATCH_NOMATCH);
2435                }
2436            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2437            if (c > 255)            if (c > 255)
2438              {              {
2439              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2440              }              }
2441            else            else
2442              {              {
2443              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2444              }              }
2445            }            }
2446          }          }
# Line 1674  for (;;) Line 2450  for (;;)
2450          {          {
2451          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2452            {            {
2453            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2454                {
2455                SCHECK_PARTIAL();
2456                MRRETURN(MATCH_NOMATCH);
2457                }
2458            c = *eptr++;            c = *eptr++;
2459            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2460            }            }
2461          }          }
2462    
# Line 1696  for (;;) Line 2476  for (;;)
2476            {            {
2477            for (fi = min;; fi++)            for (fi = min;; fi++)
2478              {              {
2479              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2480              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2481              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2482                if (eptr >= md->end_subject)
2483                  {
2484                  SCHECK_PARTIAL();
2485                  MRRETURN(MATCH_NOMATCH);
2486                  }
2487              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2488              if (c > 255)              if (c > 255)
2489                {                {
2490                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2491                }                }
2492              else              else
2493                {                {
2494                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2495                }                }
2496              }              }
2497            }            }
# Line 1716  for (;;) Line 2501  for (;;)
2501            {            {
2502            for (fi = min;; fi++)            for (fi = min;; fi++)
2503              {              {
2504              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2505              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2506              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2507                if (eptr >= md->end_subject)
2508                  {
2509                  SCHECK_PARTIAL();
2510                  MRRETURN(MATCH_NOMATCH);
2511                  }
2512              c = *eptr++;              c = *eptr++;
2513              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2514              }              }
2515            }            }
2516          /* Control never gets here */          /* Control never gets here */
# Line 1739  for (;;) Line 2529  for (;;)
2529            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2530              {              {
2531              int len = 1;              int len = 1;
2532              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2533                  {
2534                  SCHECK_PARTIAL();
2535                  break;
2536                  }
2537              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2538              if (c > 255)              if (c > 255)
2539                {                {
# Line 1753  for (;;) Line 2547  for (;;)
2547              }              }
2548            for (;;)            for (;;)
2549              {              {
2550              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2551              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2552              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2553              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1765  for (;;) Line 2559  for (;;)
2559            {            {
2560            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2561              {              {
2562              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2563                  {
2564                  SCHECK_PARTIAL();
2565                  break;
2566                  }
2567              c = *eptr;              c = *eptr;
2568              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2569              eptr++;              eptr++;
2570              }              }
2571            while (eptr >= pp)            while (eptr >= pp)
2572              {              {
2573              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2574              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2575              eptr--;              eptr--;
2576              }              }
2577            }            }
2578    
2579          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2580          }          }
2581        }        }
2582      /* Control never gets here */      /* Control never gets here */
2583    
2584    
2585      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2586      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2587        mode, because Unicode properties are supported in non-UTF-8 mode. */
2588    
2589  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2590      case OP_XCLASS:      case OP_XCLASS:
# Line 1826  for (;;) Line 2625  for (;;)
2625    
2626        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2627          {          {
2628          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2629          GETCHARINC(c, eptr);            {
2630          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2631              MRRETURN(MATCH_NOMATCH);
2632              }
2633            GETCHARINCTEST(c, eptr);
2634            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2635          }          }
2636    
2637        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1843  for (;;) Line 2646  for (;;)
2646          {          {
2647          for (fi = min;; fi++)          for (fi = min;; fi++)
2648            {            {
2649            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2650            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2651            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2652            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2653            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2654                SCHECK_PARTIAL();
2655                MRRETURN(MATCH_NOMATCH);
2656                }
2657              GETCHARINCTEST(c, eptr);
2658              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2659            }            }
2660          /* Control never gets here */          /* Control never gets here */
2661          }          }
# Line 1860  for (;;) Line 2668  for (;;)
2668          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2669            {            {
2670            int len = 1;            int len = 1;
2671            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2672            GETCHARLEN(c, eptr, len);              {
2673                SCHECK_PARTIAL();
2674                break;
2675                }
2676              GETCHARLENTEST(c, eptr, len);
2677            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2678            eptr += len;            eptr += len;
2679            }            }
2680          for(;;)          for(;;)
2681            {            {
2682            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2683            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2685            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2686            }            }
2687          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2688          }          }
2689    
2690        /* Control never gets here */        /* Control never gets here */
# Line 1888  for (;;) Line 2700  for (;;)
2700        length = 1;        length = 1;
2701        ecode++;        ecode++;
2702        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2703        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2704        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2705            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2706            MRRETURN(MATCH_NOMATCH);
2707            }
2708          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2709        }        }
2710      else      else
2711  #endif  #endif
2712    
2713      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2714        {        {
2715        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2716        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2717            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2718            MRRETURN(MATCH_NOMATCH);
2719            }
2720          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2721        ecode += 2;        ecode += 2;
2722        }        }
2723      break;      break;
# Line 1912  for (;;) Line 2732  for (;;)
2732        ecode++;        ecode++;
2733        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2734    
2735        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2736            {
2737            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2738            MRRETURN(MATCH_NOMATCH);
2739            }
2740    
2741        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2742        can use the fast lookup table. */        can use the fast lookup table. */
2743    
2744        if (fc < 128)        if (fc < 128)
2745          {          {
2746          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2747          }          }
2748    
2749        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2750    
2751        else        else
2752          {          {
2753          int dc;          unsigned int dc;
2754          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2755          ecode += length;          ecode += length;
2756    
# Line 1936  for (;;) Line 2760  for (;;)
2760          if (fc != dc)          if (fc != dc)
2761            {            {
2762  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2763            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2764  #endif  #endif
2765              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2766            }            }
2767          }          }
2768        }        }
# Line 1947  for (;;) Line 2771  for (;;)
2771    
2772      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2773        {        {
2774        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2775        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2776            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2777            MRRETURN(MATCH_NOMATCH);
2778            }
2779          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2780        ecode += 2;        ecode += 2;
2781        }        }
2782      break;      break;
2783    
2784      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2785    
2786      case OP_EXACT:      case OP_EXACT:
2787      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2788      ecode += 3;      ecode += 3;
2789      goto REPEATCHAR;      goto REPEATCHAR;
2790    
2791        case OP_POSUPTO:
2792        possessive = TRUE;
2793        /* Fall through */
2794    
2795      case OP_UPTO:      case OP_UPTO:
2796      case OP_MINUPTO:      case OP_MINUPTO:
2797      min = 0;      min = 0;
# Line 1968  for (;;) Line 2800  for (;;)
2800      ecode += 3;      ecode += 3;
2801      goto REPEATCHAR;      goto REPEATCHAR;
2802    
2803        case OP_POSSTAR:
2804        possessive = TRUE;
2805        min = 0;
2806        max = INT_MAX;
2807        ecode++;
2808        goto REPEATCHAR;
2809    
2810        case OP_POSPLUS:
2811        possessive = TRUE;
2812        min = 1;
2813        max = INT_MAX;
2814        ecode++;
2815        goto REPEATCHAR;
2816    
2817        case OP_POSQUERY:
2818        possessive = TRUE;
2819        min = 0;
2820        max = 1;
2821        ecode++;
2822        goto REPEATCHAR;
2823    
2824      case OP_STAR:      case OP_STAR:
2825      case OP_MINSTAR:      case OP_MINSTAR:
2826      case OP_PLUS:      case OP_PLUS:
# Line 1976  for (;;) Line 2829  for (;;)
2829      case OP_MINQUERY:      case OP_MINQUERY:
2830      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2831      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2832    
2833      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2834      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2835      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2836    
2837      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2838    
2839      REPEATCHAR:      REPEATCHAR:
2840  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1991  for (;;) Line 2843  for (;;)
2843        length = 1;        length = 1;
2844        charptr = ecode;        charptr = ecode;
2845        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2846        ecode += length;        ecode += length;
2847    
2848        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1999  for (;;) Line 2850  for (;;)
2850    
2851        if (length > 1)        if (length > 1)
2852          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2853  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2854          int othercase;          unsigned int othercase;
2855          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2856              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase >= 0)  
2857            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2858            else oclength = 0;
2859  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2860    
2861          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2862            {            {
2863            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2864            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2865            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2866              else if (oclength > 0 &&
2867                       eptr <= md->end_subject - oclength &&
2868                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2869    #endif  /* SUPPORT_UCP */
2870            else            else
2871              {              {
2872              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2873              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2874              }              }
2875            }            }
2876    
# Line 2028  for (;;) Line 2880  for (;;)
2880            {            {
2881            for (fi = min;; fi++)            for (fi = min;; fi++)
2882              {              {
2883              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2884              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2886              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2887              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2888              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2889                else if (oclength > 0 &&
2890                         eptr <= md->end_subject - oclength &&
2891                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2892    #endif  /* SUPPORT_UCP */
2893              else              else
2894                {                {
2895                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2896                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2897                }                }
2898              }              }
2899            /* Control never gets here */            /* Control never gets here */
2900            }            }
2901          else  
2902            else  /* Maximize */
2903            {            {
2904            pp = eptr;            pp = eptr;
2905            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2906              {              {
2907              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2908              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2909              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2910                else if (oclength > 0 &&
2911                         eptr <= md->end_subject - oclength &&
2912                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913    #endif  /* SUPPORT_UCP */
2914              else              else
2915                {                {
2916                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2917                eptr += oclength;                break;
2918                }                }
2919              }              }
2920            while (eptr >= pp)  
2921             {            if (possessive) continue;
2922             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2923             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2924             eptr -= length;              {
2925             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2926            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2928    #ifdef SUPPORT_UCP
2929                eptr--;
2930                BACKCHAR(eptr);
2931    #else   /* without SUPPORT_UCP */
2932                eptr -= length;
2933    #endif  /* SUPPORT_UCP */
2934                }
2935            }            }
2936          /* Control never gets here */          /* Control never gets here */
2937          }          }
# Line 2075  for (;;) Line 2944  for (;;)
2944  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2945    
2946      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2947        {  
2948        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2949    
2950      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2951      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2096  for (;;) Line 2963  for (;;)
2963        {        {
2964        fc = md->lcc[fc];        fc = md->lcc[fc];
2965        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2966          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2967            if (eptr >= md->end_subject)
2968              {
2969              SCHECK_PARTIAL();
2970              MRRETURN(MATCH_NOMATCH);
2971              }
2972            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2973            }
2974        if (min == max) continue;        if (min == max) continue;
2975        if (minimize)        if (minimize)
2976          {          {
2977          for (fi = min;; fi++)          for (fi = min;; fi++)
2978            {            {
2979            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2980            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2982                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2983              RRETURN(MATCH_NOMATCH);              {
2984                SCHECK_PARTIAL();
2985                MRRETURN(MATCH_NOMATCH);
2986                }
2987              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988            }            }
2989          /* Control never gets here */          /* Control never gets here */
2990          }          }
2991        else        else  /* Maximize */
2992          {          {
2993          pp = eptr;          pp = eptr;
2994          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2995            {            {
2996            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2997                {
2998                SCHECK_PARTIAL();
2999                break;
3000                }
3001              if (fc != md->lcc[*eptr]) break;
3002            eptr++;            eptr++;
3003            }            }
3004    
3005            if (possessive) continue;
3006    
3007          while (eptr >= pp)          while (eptr >= pp)
3008            {            {
3009            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3010            eptr--;            eptr--;
3011            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3012            }            }
3013          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3014          }          }
3015        /* Control never gets here */        /* Control never gets here */
3016        }        }
# Line 2133  for (;;) Line 3019  for (;;)
3019    
3020      else      else
3021        {        {
3022        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3023            {
3024            if (eptr >= md->end_subject)
3025              {
3026              SCHECK_PARTIAL();
3027              MRRETURN(MATCH_NOMATCH);
3028              }
3029            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3030            }
3031    
3032        if (min == max) continue;        if (min == max) continue;
3033    
3034        if (minimize)        if (minimize)
3035          {          {
3036          for (fi = min;; fi++)          for (fi = min;; fi++)
3037            {            {
3038            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3039            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3041              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3042                {
3043                SCHECK_PARTIAL();
3044                MRRETURN(MATCH_NOMATCH);
3045                }
3046              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3047            }            }
3048          /* Control never gets here */          /* Control never gets here */
3049          }          }
3050        else        else  /* Maximize */
3051          {          {
3052          pp = eptr;          pp = eptr;
3053          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3054            {            {
3055            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3056                {
3057                SCHECK_PARTIAL();
3058                break;
3059                }
3060              if (fc != *eptr) break;
3061            eptr++;            eptr++;
3062            }            }
3063            if (possessive) continue;
3064    
3065          while (eptr >= pp)          while (eptr >= pp)
3066            {            {
3067            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3068            eptr--;            eptr--;
3069            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3070            }            }
3071          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3072          }          }
3073        }        }
3074      /* Control never gets here */      /* Control never gets here */
# Line 2169  for (;;) Line 3077  for (;;)
3077      checking can be multibyte. */      checking can be multibyte. */
3078    
3079      case OP_NOT:      case OP_NOT:
3080      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3081          {
3082          SCHECK_PARTIAL();
3083          MRRETURN(MATCH_NOMATCH);
3084          }
3085      ecode++;      ecode++;
3086      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3087      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2178  for (;;) Line 3090  for (;;)
3090        if (c < 256)        if (c < 256)
3091  #endif  #endif
3092        c = md->lcc[c];        c = md->lcc[c];
3093        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3094        }        }
3095      else      else
3096        {        {
3097        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3098        }        }
3099      break;      break;
3100    
# Line 2206  for (;;) Line 3118  for (;;)
3118      ecode += 3;      ecode += 3;
3119      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3120    
3121        case OP_NOTPOSSTAR:
3122        possessive = TRUE;
3123        min = 0;
3124        max = INT_MAX;
3125        ecode++;
3126        goto REPEATNOTCHAR;
3127    
3128        case OP_NOTPOSPLUS:
3129        possessive = TRUE;
3130        min = 1;
3131        max = INT_MAX;
3132        ecode++;
3133        goto REPEATNOTCHAR;
3134    
3135        case OP_NOTPOSQUERY:
3136        possessive = TRUE;
3137        min = 0;
3138        max = 1;
3139        ecode++;
3140        goto REPEATNOTCHAR;
3141    
3142        case OP_NOTPOSUPTO:
3143        possessive = TRUE;
3144        min = 0;
3145        max = GET2(ecode, 1);
3146        ecode += 3;
3147        goto REPEATNOTCHAR;
3148    
3149      case OP_NOTSTAR:      case OP_NOTSTAR:
3150      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3151      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2218  for (;;) Line 3158  for (;;)
3158      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3159      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3160    
3161      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3162    
3163      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3164      fc = *ecode++;      fc = *ecode++;
3165    
3166      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2245  for (;;) Line 3182  for (;;)
3182        /* UTF-8 mode */        /* UTF-8 mode */
3183        if (utf8)        if (utf8)
3184          {          {
3185          register int d;          register unsigned int d;
3186          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3187            {            {
3188              if (eptr >= md->end_subject)
3189                {
3190                SCHECK_PARTIAL();
3191                MRRETURN(MATCH_NOMATCH);
3192                }
3193            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3194            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3195            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3196            }            }
3197          }          }
3198        else        else
# Line 2259  for (;;) Line 3201  for (;;)
3201        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3202          {          {
3203          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3204            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3205              if (eptr >= md->end_subject)
3206                {
3207                SCHECK_PARTIAL();
3208                MRRETURN(MATCH_NOMATCH);
3209                }
3210              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3211              }
3212          }          }
3213    
3214        if (min == max) continue;        if (min == max) continue;
# Line 2270  for (;;) Line 3219  for (;;)
3219          /* UTF-8 mode */          /* UTF-8 mode */
3220          if (utf8)          if (utf8)
3221            {            {
3222            register int d;            register unsigned int d;
3223            for (fi = min;; fi++)            for (fi = min;; fi++)
3224              {              {
3225              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3226              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3227                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3228                if (eptr >= md->end_subject)
3229                  {
3230                  SCHECK_PARTIAL();
3231                  MRRETURN(MATCH_NOMATCH);
3232                  }
3233              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3234              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3235              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3236              }              }
3237            }            }
3238          else          else
# Line 2287  for (;;) Line 3241  for (;;)
3241            {            {
3242            for (fi = min;; fi++)            for (fi = min;; fi++)
3243              {              {
3244              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3245              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3246              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3247                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3248                  {
3249                  SCHECK_PARTIAL();
3250                  MRRETURN(MATCH_NOMATCH);
3251                  }
3252                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3253              }              }
3254            }            }
3255          /* Control never gets here */          /* Control never gets here */
# Line 2306  for (;;) Line 3265  for (;;)
3265          /* UTF-8 mode */          /* UTF-8 mode */
3266          if (utf8)          if (utf8)
3267            {            {
3268            register int d;            register unsigned int d;
3269            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3270              {              {
3271              int len = 1;              int len = 1;
3272              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3273                  {
3274                  SCHECK_PARTIAL();
3275                  break;
3276                  }
3277              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3278              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3279              if (fc == d) break;              if (fc == d) break;
3280              eptr += len;              eptr += len;
3281              }              }
3282            for(;;)          if (possessive) continue;
3283            for(;;)
3284              {              {
3285              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3286              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3287              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3288              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2330  for (;;) Line 3294  for (;;)
3294            {            {
3295            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3296              {              {
3297              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3298                  {
3299                  SCHECK_PARTIAL();
3300                  break;
3301                  }
3302                if (fc == md->lcc[*eptr]) break;
3303              eptr++;              eptr++;
3304              }              }
3305              if (possessive) continue;
3306            while (eptr >= pp)            while (eptr >= pp)
3307              {              {
3308              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3309              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3310              eptr--;              eptr--;
3311              }              }
3312            }            }
3313    
3314          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3315          }          }
3316        /* Control never gets here */        /* Control never gets here */
3317        }        }
# Line 2354  for (;;) Line 3324  for (;;)
3324        /* UTF-8 mode */        /* UTF-8 mode */
3325        if (utf8)        if (utf8)
3326          {          {
3327          register int d;          register unsigned int d;
3328          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3329            {            {
3330              if (eptr >= md->end_subject)
3331                {
3332                SCHECK_PARTIAL();
3333                MRRETURN(MATCH_NOMATCH);
3334                }
3335            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3336            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3337            }            }
3338          }          }
3339        else        else
# Line 2366  for (;;) Line 3341  for (;;)
3341        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3342          {          {
3343          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3344            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3345          }            if (eptr >= md->end_subject)
3346                {
3347                SCHECK_PARTIAL();
3348                MRRETURN(MATCH_NOMATCH);
3349                }
3350              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3351              }
3352            }
3353    
3354        if (min == max) continue;        if (min == max) continue;
3355    
# Line 2377  for (;;) Line 3359  for (;;)
3359          /* UTF-8 mode */          /* UTF-8 mode */
3360          if (utf8)          if (utf8)
3361            {            {
3362            register int d;            register unsigned int d;
3363            for (fi = min;; fi++)            for (fi = min;; fi++)
3364              {              {
3365              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3366              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3367                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3368                if (eptr >= md->end_subject)
3369                  {
3370                  SCHECK_PARTIAL();
3371                  MRRETURN(MATCH_NOMATCH);
3372                  }
3373              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3374              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3375              }              }
3376            }            }
3377          else          else
# Line 2393  for (;;) Line 3380  for (;;)
3380            {            {
3381            for (fi = min;; fi++)            for (fi = min;; fi++)
3382              {              {
3383              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3384              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3385              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3386                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3387                  {
3388                  SCHECK_PARTIAL();
3389                  MRRETURN(MATCH_NOMATCH);
3390                  }
3391                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3392              }              }
3393            }            }
3394          /* Control never gets here */          /* Control never gets here */
# Line 2412  for (;;) Line 3404  for (;;)
3404          /* UTF-8 mode */          /* UTF-8 mode */
3405          if (utf8)          if (utf8)
3406            {            {
3407            register int d;            register unsigned int d;
3408            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3409              {              {
3410              int len = 1;              int len = 1;
3411              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3412                  {
3413                  SCHECK_PARTIAL();
3414                  break;
3415                  }
3416              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3417              if (fc == d) break;              if (fc == d) break;
3418              eptr += len;              eptr += len;
3419              }              }
3420              if (possessive) continue;
3421            for(;;)            for(;;)
3422              {              {
3423              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3424              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3425              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3426              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2435  for (;;) Line 3432  for (;;)
3432            {            {
3433            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3434              {              {
3435              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3436                  {
3437                  SCHECK_PARTIAL();
3438                  break;
3439                  }
3440                if (fc == *eptr) break;
3441              eptr++;              eptr++;
3442              }              }
3443              if (possessive) continue;
3444            while (eptr >= pp)            while (eptr >= pp)
3445              {              {
3446              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3447              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3448              eptr--;              eptr--;
3449              }              }
3450            }            }
3451    
3452          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3453          }          }
3454        }        }
3455      /* Control never gets here */      /* Control never gets here */
# Line 2469  for (;;) Line 3472  for (;;)
3472      ecode += 3;      ecode += 3;
3473      goto REPEATTYPE;      goto REPEATTYPE;
3474    
3475        case OP_TYPEPOSSTAR:
3476        possessive = TRUE;
3477        min = 0;
3478        max = INT_MAX;
3479        ecode++;
3480        goto REPEATTYPE;
3481    
3482        case OP_TYPEPOSPLUS:
3483        possessive = TRUE;
3484        min = 1;
3485        max = INT_MAX;
3486        ecode++;
3487        goto REPEATTYPE;
3488    
3489        case OP_TYPEPOSQUERY:
3490        possessive = TRUE;
3491        min = 0;
3492        max = 1;
3493        ecode++;
3494        goto REPEATTYPE;
3495    
3496        case OP_TYPEPOSUPTO:
3497        possessive = TRUE;
3498        min = 0;
3499        max = GET2(ecode, 1);
3500        ecode += 3;
3501        goto REPEATTYPE;
3502    
3503      case OP_TYPESTAR:      case OP_TYPESTAR:
3504      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3505      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2500  for (;;) Line 3531  for (;;)
3531    
3532      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3533      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3534      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3535      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3536      and single-bytes. */      and single-bytes. */
3537    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3538      if (min > 0)      if (min > 0)
3539        {        {
3540  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2515  for (;;) Line 3543  for (;;)
3543          switch(prop_type)          switch(prop_type)
3544            {            {
3545            case PT_ANY:            case PT_ANY:
3546            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3547            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3548              {              {
3549              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3550              GETCHARINC(c, eptr);                {
3551                  SCHECK_PARTIAL();
3552                  MRRETURN(MATCH_NOMATCH);
3553                  }
3554                GETCHARINCTEST(c, eptr);
3555              }              }
3556            break;            break;
3557    
3558            case PT_LAMP:            case PT_LAMP:
3559            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3560              {              {
3561              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3562              GETCHARINC(c, eptr);                {
3563              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3564                  MRRETURN(MATCH_NOMATCH);
3565                  }
3566                GETCHARINCTEST(c, eptr);
3567                prop_chartype = UCD_CHARTYPE(c);
3568              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3569                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3570                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3571                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3572              }              }
3573            break;            break;
3574    
3575            case PT_GC:            case PT_GC:
3576            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3577              {              {
3578              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3579              GETCHARINC(c, eptr);                {
3580              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3581                  MRRETURN(MATCH_NOMATCH);
3582                  }
3583                GETCHARINCTEST(c, eptr);
3584                prop_category = UCD_CATEGORY(c);
3585              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3586                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3587              }              }
3588            break;            break;
3589    
3590            case PT_PC:            case PT_PC:
3591            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3592              {              {
3593              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3594              GETCHARINC(c, eptr);                {
3595              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3596                  MRRETURN(MATCH_NOMATCH);
3597                  }
3598                GETCHARINCTEST(c, eptr);
3599                prop_chartype = UCD_CHARTYPE(c);
3600              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3601                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3602              }              }
3603            break;            break;
3604    
3605            case PT_SC:            case PT_SC:
3606            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3607              {              {
3608              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3609              GETCHARINC(c, eptr);                {
3610              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3611                  MRRETURN(MATCH_NOMATCH);
3612                  }
3613                GETCHARINCTEST(c, eptr);
3614                prop_script = UCD_SCRIPT(c);
3615              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3616                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3617                }
3618              break;
3619    
3620              case PT_ALNUM:
3621              for (i = 1; i <= min; i++)
3622                {
3623                if (eptr >= md->end_subject)
3624                  {
3625                  SCHECK_PARTIAL();
3626                  MRRETURN(MATCH_NOMATCH);
3627                  }
3628                GETCHARINCTEST(c, eptr);
3629                prop_category = UCD_CATEGORY(c);
3630                if ((prop_category == ucp_L || prop_category == ucp_N)
3631                       == prop_fail_result)
3632                  MRRETURN(MATCH_NOMATCH);
3633                }
3634              break;
3635    
3636              case PT_SPACE:    /* Perl space */
3637              for (i = 1; i <= min; i++)
3638                {
3639                if (eptr >= md->end_subject)
3640                  {
3641                  SCHECK_PARTIAL();
3642                  MRRETURN(MATCH_NOMATCH);
3643                  }
3644                GETCHARINCTEST(c, eptr);
3645                prop_category = UCD_CATEGORY(c);
3646                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3647                     c == CHAR_FF || c == CHAR_CR)
3648                       == prop_fail_result)
3649                  MRRETURN(MATCH_NOMATCH);
3650                }
3651              break;
3652    
3653              case PT_PXSPACE:  /* POSIX space */
3654              for (i = 1; i <= min; i++)
3655                {
3656                if (eptr >= md->end_subject)
3657                  {
3658                  SCHECK_PARTIAL();
3659                  MRRETURN(MATCH_NOMATCH);
3660                  }
3661                GETCHARINCTEST(c, eptr);
3662                prop_category = UCD_CATEGORY(c);
3663                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3664                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3665                       == prop_fail_result)
3666                  MRRETURN(MATCH_NOMATCH);
3667                }
3668              break;
3669    
3670              case PT_WORD:
3671              for (i = 1; i <= min; i++)
3672                {
3673                if (eptr >= md->end_subject)
3674                  {
3675                  SCHECK_PARTIAL();
3676                  MRRETURN(MATCH_NOMATCH);
3677                  }
3678                GETCHARINCTEST(c, eptr);
3679                prop_category = UCD_CATEGORY(c);
3680                if ((prop_category == ucp_L || prop_category == ucp_N ||
3681                     c == CHAR_UNDERSCORE)
3682                       == prop_fail_result)
3683                  MRRETURN(MATCH_NOMATCH);
3684              }              }
3685            break;            break;
3686    
3687              /* This should not occur */
3688    
3689            default:            default:
3690            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3691            }            }
3692          }          }
3693    
# Line 2582  for (;;) Line 3698  for (;;)
3698          {          {
3699          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3700            {            {
3701              if (eptr >= md->end_subject)
3702                {
3703                SCHECK_PARTIAL();
3704                MRRETURN(MATCH_NOMATCH);
3705                }
3706            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3707            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3708            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3709            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3710              {              {
3711              int len = 1;              int len = 1;
3712              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3713                {                else { GETCHARLEN(c, eptr, len); }
3714                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3715              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3716              eptr += len;              eptr += len;
3717              }              }
# Line 2610  for (;;) Line 3729  for (;;)
3729          case OP_ANY:          case OP_ANY:
3730          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3731            {            {
3732            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3733                 ((ims & PCRE_DOTALL) == 0 &&              {
3734                   eptr <= md->end_subject - md->nllen &&              SCHECK_PARTIAL();
3735                   IS_NEWLINE(eptr)))              MRRETURN(MATCH_NOMATCH);
3736              RRETURN(MATCH_NOMATCH);              }
3737              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3738              eptr++;
3739              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3740              }
3741            break;
3742    
3743            case OP_ALLANY:
3744            for (i = 1; i <= min; i++)
3745              {
3746              if (eptr >= md->end_subject)
3747                {
3748                SCHECK_PARTIAL();
3749                MRRETURN(MATCH_NOMATCH);
3750                }
3751            eptr++;            eptr++;
3752            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3753            }            }
3754          break;          break;
3755    
3756          case OP_ANYBYTE:          case OP_ANYBYTE:
3757            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3758          eptr += min;          eptr += min;
3759          break;          break;
3760    
3761            case OP_ANYNL:
3762            for (i = 1; i <= min; i++)
3763              {
3764              if (eptr >= md->end_subject)
3765                {
3766                SCHECK_PARTIAL();
3767                MRRETURN(MATCH_NOMATCH);
3768                }
3769              GETCHARINC(c, eptr);
3770              switch(c)
3771                {
3772                default: MRRETURN(MATCH_NOMATCH);
3773                case 0x000d:
3774                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3775                break;
3776    
3777                case 0x000a:
3778                break;
3779    
3780                case 0x000b:
3781                case 0x000c:
3782                case 0x0085:
3783                case 0x2028:
3784                case 0x2029:
3785                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3786                break;
3787                }
3788              }
3789            break;
3790    
3791            case OP_NOT_HSPACE:
3792            for (i = 1; i <= min; i++)
3793              {
3794              if (eptr >= md->end_subject)
3795                {
3796                SCHECK_PARTIAL();
3797                MRRETURN(MATCH_NOMATCH);
3798                }
3799              GETCHARINC(c, eptr);
3800              switch(c)
3801                {
3802                default: break;
3803                case 0x09:      /* HT */
3804                case 0x20:      /* SPACE */
3805                case 0xa0:      /* NBSP */
3806                case 0x1680:    /* OGHAM SPACE MARK */
3807                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3808                case 0x2000:    /* EN QUAD */
3809                case 0x2001:    /* EM QUAD */
3810                case 0x2002:    /* EN SPACE */
3811                case 0x2003:    /* EM SPACE */
3812                case 0x2004:    /* THREE-PER-EM SPACE */
3813                case 0x2005:    /* FOUR-PER-EM SPACE */
3814                case 0x2006:    /* SIX-PER-EM SPACE */
3815                case 0x2007:    /* FIGURE SPACE */
3816                case 0x2008:    /* PUNCTUATION SPACE */
3817                case 0x2009:    /* THIN SPACE */
3818                case 0x200A:    /* HAIR SPACE */
3819                case 0x202f:    /* NARROW NO-BREAK SPACE */
3820                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3821                case 0x3000:    /* IDEOGRAPHIC SPACE */
3822                MRRETURN(MATCH_NOMATCH);
3823                }
3824              }
3825            break;
3826    
3827            case OP_HSPACE:
3828            for (i = 1; i <= min; i++)
3829              {
3830              if (eptr >= md->end_subject)
3831                {
3832                SCHECK_PARTIAL();
3833                MRRETURN(MATCH_NOMATCH);
3834                }
3835              GETCHARINC(c, eptr);
3836              switch(c)
3837                {
3838                default: MRRETURN(MATCH_NOMATCH);
3839                case 0x09:      /* HT */
3840                case 0x20:      /* SPACE */
3841                case 0xa0:      /* NBSP */
3842                case 0x1680:    /* OGHAM SPACE MARK */
3843                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3844                case 0x2000:    /* EN QUAD */
3845                case 0x2001:    /* EM QUAD */
3846                case 0x2002:    /* EN SPACE */
3847