/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 511 by ph10, Mon Mar 29 09:25:38 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 119  Returns:     nothing
119  static void  static void
120  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121  {  {
122  int c;  unsigned int c;
123  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124  while (length-- > 0)  while (length-- > 0)
125    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 133  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 150  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 186  calls by keeping local variables that ne Line 230  calls by keeping local variables that ne
230  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54 };
259    
260  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
261  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
262    actually used in this definition. */
263    
264  #ifndef NO_RECURSE  #ifndef NO_RECURSE
265  #define REGISTER register  #define REGISTER register
266  #ifdef DEBUG  
267  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
268    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
269    { \    { \
270    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
271    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
272    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
273    }    }
274  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 277  versions and production versions. */
277    return ra; \    return ra; \
278    }    }
279  #else  #else
280  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
282  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
283  #endif  #endif
284    
285  #else  #else
286    
287    
288  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
289  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
290  match(), which never changes. */  argument of match(), which never changes. */
291    
292  #define REGISTER  #define REGISTER
293    
294  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
295    {\    {\
296    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
297    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
298      {\    newframe->Xeptr = ra;\
299      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
300      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
301      newframe->Xoffset_top = rc;\    newframe->Xmarkptr = markptr;\
302      newframe->Xims = re;\    newframe->Xoffset_top = rc;\
303      newframe->Xeptrb = rf;\    newframe->Xims = re;\
304      newframe->Xflags = rg;\    newframe->Xeptrb = rf;\
305      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xflags = rg;\
306      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
307      frame = newframe;\    newframe->Xprevframe = frame;\
308      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
309      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
310      }\    goto HEAP_RECURSE;\
311    else\    L_##rw:\
312      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
313    }    }
314    
315  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 254  match(), which never changes. */ Line 319  match(), which never changes. */
319    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
320    if (frame != NULL)\    if (frame != NULL)\
321      {\      {\
322      frame->Xresult = ra;\      rrc = ra;\
323      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
324      }\      }\
325    return ra;\    return ra;\
326    }    }
# Line 269  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function arguments that may change */    /* Function arguments that may change */
335    
336    const uschar *Xeptr;    USPTR Xeptr;
337    const uschar *Xecode;    const uschar *Xecode;
338      USPTR Xmstart;
339      USPTR Xmarkptr;
340    int Xoffset_top;    int Xoffset_top;
341    long int Xims;    long int Xims;
342    eptrblock *Xeptrb;    eptrblock *Xeptrb;
343    int Xflags;    int Xflags;
344    int Xrdepth;    unsigned int Xrdepth;
345    
346    /* Function local variables */    /* Function local variables */
347    
348    const uschar *Xcallpat;    USPTR Xcallpat;
349    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
350    const uschar *Xdata;    USPTR Xcharptr;
351    const uschar *Xnext;  #endif
352    const uschar *Xpp;    USPTR Xdata;
353    const uschar *Xprev;    USPTR Xnext;
354    const uschar *Xsaved_eptr;    USPTR Xpp;
355      USPTR Xprev;
356      USPTR Xsaved_eptr;
357    
358    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
359    
360    BOOL Xcur_is_word;    BOOL Xcur_is_word;
361    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
362    BOOL Xprev_is_word;    BOOL Xprev_is_word;
363    
364    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 370  typedef struct heapframe {
370    int Xprop_category;    int Xprop_category;
371    int Xprop_chartype;    int Xprop_chartype;
372    int Xprop_script;    int Xprop_script;
373    int *Xprop_test_variable;    int Xoclength;
374      uschar Xocchars[8];
375  #endif  #endif
376    
377      int Xcodelink;
378    int Xctype;    int Xctype;
379    int Xfc;    unsigned int Xfc;
380    int Xfi;    int Xfi;
381    int Xlength;    int Xlength;
382    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 390  typedef struct heapframe {
390    
391    eptrblock Xnewptrb;    eptrblock Xnewptrb;
392    
393    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
394    
395    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
396    
397  } heapframe;  } heapframe;
398    
# Line 340  typedef struct heapframe { Line 408  typedef struct heapframe {
408  *         Match from current position            *  *         Match from current position            *
409  *************************************************/  *************************************************/
410    
411  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
412  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
413  same response.  same response. */
414    
415  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
416  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
417    at the end of the subject and also past the start of the subject (i.e.
418    something has been matched). For hard partial matching, we then return
419    immediately. The second one is used when we already know we are past the end of
420    the subject. */
421    
422    #define CHECK_PARTIAL()\
423      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
424        {\
425        md->hitend = TRUE;\
426        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
427        }
428    
429    #define SCHECK_PARTIAL()\
430      if (md->partial != 0 && eptr > mstart)\
431        {\
432        md->hitend = TRUE;\
433        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
434        }
435    
436    
437    /* Performance note: It might be tempting to extract commonly used fields from
438    the md structure (e.g. utf8, end_subject) into individual variables to improve
439  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
440  made performance worse.  made performance worse.
441    
442  Arguments:  Arguments:
443     eptr        pointer in subject     eptr        pointer to current character in subject
444     ecode       position in code     ecode       pointer to current position in compiled code
445       mstart      pointer to the current match start position (can be modified
446                     by encountering \K)
447       markptr     pointer to the most recent MARK name, or NULL
448     offset_top  current top pointer     offset_top  current top pointer
449     md          pointer to "static" info for the match     md          pointer to "static" info for the match
450     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 452  Arguments:
452                   brackets - for testing for empty matches                   brackets - for testing for empty matches
453     flags       can contain     flags       can contain
454                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
455                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
456                       group that can match an empty string
457     rdepth      the recursion depth     rdepth      the recursion depth
458    
459  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
460                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
461                   a negative MATCH_xxx value for PRUNE, SKIP, etc
462                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
463                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
464  */  */
465    
466  static int  static int
467  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
468    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
469    int flags, int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
470  {  {
471  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
472  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
473  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
474    
475  register int  rrc;    /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
476  register int  i;      /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
477  register int  c;      /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
478  register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
479    
480    BOOL minimize, possessive; /* Quantifier options */
481    int condcode;
482    
483  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
484  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 398  frame->Xprevframe = NULL;            /* Line 493  frame->Xprevframe = NULL;            /*
493    
494  frame->Xeptr = eptr;  frame->Xeptr = eptr;
495  frame->Xecode = ecode;  frame->Xecode = ecode;
496    frame->Xmstart = mstart;
497    frame->Xmarkptr = markptr;
498  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
499  frame->Xims = ims;  frame->Xims = ims;
500  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 509  HEAP_RECURSE:
509    
510  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
511  #define ecode              frame->Xecode  #define ecode              frame->Xecode
512    #define mstart             frame->Xmstart
513    #define markptr            frame->Xmarkptr
514  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
515  #define ims                frame->Xims  #define ims                frame->Xims
516  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 424  HEAP_RECURSE: Line 523  HEAP_RECURSE:
523  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
524  #endif  #endif
525  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
526    #define codelink           frame->Xcodelink
527  #define data               frame->Xdata  #define data               frame->Xdata
528  #define next               frame->Xnext  #define next               frame->Xnext
529  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 434  HEAP_RECURSE: Line 534  HEAP_RECURSE:
534    
535  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
536  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
537  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
538    
539  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 545  HEAP_RECURSE:
545  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
546  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
547  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
548  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
549    #define occhars            frame->Xocchars
550  #endif  #endif
551    
552  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 570  HEAP_RECURSE:
570  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
571  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
572    
573  #else  #else         /* NO_RECURSE not defined */
574  #define fi i  #define fi i
575  #define fc c  #define fc c
576    
# Line 489  recursion_info new_recursive;      /* wi Line 589  recursion_info new_recursive;      /* wi
589                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
590  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
591  BOOL condition;  BOOL condition;
 BOOL minimize;  
592  BOOL prev_is_word;  BOOL prev_is_word;
593    
594  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 600  int prop_fail_result;
600  int prop_category;  int prop_category;
601  int prop_chartype;  int prop_chartype;
602  int prop_script;  int prop_script;
603  int *prop_test_variable;  int oclength;
604    uschar occhars[8];
605  #endif  #endif
606    
607    int codelink;
608  int ctype;  int ctype;
609  int length;  int length;
610  int max;  int max;
# Line 516  int save_offset1, save_offset2, save_off Line 617  int save_offset1, save_offset2, save_off
617  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
618    
619  eptrblock newptrb;  eptrblock newptrb;
620  #endif  #endif     /* NO_RECURSE */
621    
622  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
623  variables. */  variables. */
# Line 524  variables. */ Line 625  variables. */
625  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
626  prop_value = 0;  prop_value = 0;
627  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
628  #endif  #endif
629    
630    
631    /* This label is used for tail recursion, which is used in a few cases even
632    when NO_RECURSE is not defined, in order to reduce the amount of stack that is
633    used. Thanks to Ian Taylor for noticing this possibility and sending the
634    original patch. */
635    
636    TAIL_RECURSE:
637    
638  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
639  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
640  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
641  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
642  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
643  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
644  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
645    
646    #ifdef SUPPORT_UTF8
647    utf8 = md->utf8;       /* Local copy of the flag */
648    #else
649    utf8 = FALSE;
650    #endif
651    
652  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
653  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
654    
# Line 542  if (md->match_call_count++ >= md->match_ Line 656  if (md->match_call_count++ >= md->match_
656  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
657    
658  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
659    
660  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
661  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
662  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
663  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
664    When match() is called in other circumstances, don't add to the chain. The
665    match_cbegroup flag must NOT be used with tail recursion, because the memory
666    block that is used is on the stack, so a new one may be required for each
667    match(). */
668    
669  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
670    {    {
   newptrb.epb_prev = eptrb;  
671    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
672      newptrb.epb_prev = eptrb;
673    eptrb = &newptrb;    eptrb = &newptrb;
674    }    }
675    
676  /* Now start processing the operations. */  /* Now start processing the opcodes. */
677    
678  for (;;)  for (;;)
679    {    {
680      minimize = possessive = FALSE;
681    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
682    
683    if (op > OP_BRA)    switch(op)
684      {      {
685      number = op - OP_BRA;      case OP_MARK:
686        markptr = ecode + 2;
687      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
688      number from a dummy opcode at the start. */        ims, eptrb, flags, RM51);
689    
690      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
691        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
692        argument. It is passed back in md->start_match_ptr (an overloading of that
693        variable). If it does match, we reset that variable to the current subject
694        position and return MATCH_SKIP. Otherwise, pass back the return code
695        unaltered. */
696    
697        if (rrc == MATCH_SKIP_ARG &&
698            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
699          {
700          md->start_match_ptr = eptr;
701          RRETURN(MATCH_SKIP);
702          }
703    
704        if (md->mark == NULL) md->mark = markptr;
705        RRETURN(rrc);
706    
707        case OP_FAIL:
708        MRRETURN(MATCH_NOMATCH);
709    
710        case OP_COMMIT:
711        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
712          ims, eptrb, flags, RM52);
713        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
714        MRRETURN(MATCH_COMMIT);
715    
716        case OP_PRUNE:
717        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718          ims, eptrb, flags, RM51);
719        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
720        MRRETURN(MATCH_PRUNE);
721    
722        case OP_PRUNE_ARG:
723        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
724          ims, eptrb, flags, RM51);
725        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
726        md->mark = ecode + 2;
727        RRETURN(MATCH_PRUNE);
728    
729        case OP_SKIP:
730        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
731          ims, eptrb, flags, RM53);
732        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
733        md->start_match_ptr = eptr;   /* Pass back current position */
734        MRRETURN(MATCH_SKIP);
735    
736        case OP_SKIP_ARG:
737        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
738          ims, eptrb, flags, RM53);
739        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
740    
741        /* Pass back the current skip name by overloading md->start_match_ptr and
742        returning the special MATCH_SKIP_ARG return code. This will either be
743        caught by a matching MARK, or get to the top, where it is treated the same
744        as PRUNE. */
745    
746        md->start_match_ptr = ecode + 2;
747        RRETURN(MATCH_SKIP_ARG);
748    
749        case OP_THEN:
750        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
751          ims, eptrb, flags, RM54);
752        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
753        MRRETURN(MATCH_THEN);
754    
755        case OP_THEN_ARG:
756        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757          ims, eptrb, flags, RM54);
758        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
759        md->mark = ecode + 2;
760        RRETURN(MATCH_THEN);
761    
762        /* Handle a capturing bracket. If there is space in the offset vector, save
763        the current subject position in the working slot at the top of the vector.
764        We mustn't change the current values of the data slot, because they may be
765        set from a previous iteration of this group, and be referred to by a
766        reference inside the group.
767    
768        If the bracket fails to match, we need to restore this value and also the
769        values of the final offsets, in case they were set by a previous iteration
770        of the same bracket.
771    
772        If there isn't enough space in the offset vector, treat this as if it were
773        a non-capturing bracket. Don't worry about setting the flag for the error
774        case here; that is handled in the code for KET. */
775    
776        case OP_CBRA:
777        case OP_SCBRA:
778        number = GET2(ecode, 1+LINK_SIZE);
779      offset = number << 1;      offset = number << 1;
780    
781  #ifdef DEBUG  #ifdef PCRE_DEBUG
782      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
783        printf("subject=");
784      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
785      printf("\n");      printf("\n");
786  #endif  #endif
# Line 612  for (;;) Line 795  for (;;)
795        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
796        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
797    
798          flags = (op == OP_SCBRA)? match_cbegroup : 0;
799        do        do
800          {          {
801          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
802            match_isgroup);            ims, eptrb, flags, RM1);
803          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
804          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
805          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
806          }          }
# Line 628  for (;;) Line 812  for (;;)
812        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
813        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
814    
815          if (rrc != MATCH_THEN) md->mark = markptr;
816        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
817        }        }
818    
819      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
820        as a non-capturing bracket. */
821    
822      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
824    
825    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
826    
827    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
829      case OP_BRA:     /* Non-capturing bracket: optimized */  
830      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
831      do      final alternative within the brackets, we would return the result of a
832        recursive call to match() whatever happened. We can reduce stack usage by
833        turning this into a tail recursion, except in the case when match_cbegroup
834        is set.*/
835    
836        case OP_BRA:
837        case OP_SBRA:
838        DPRINTF(("start non-capturing bracket\n"));
839        flags = (op >= OP_SBRA)? match_cbegroup : 0;
840        for (;;)
841        {        {
842        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
843          match_isgroup);          {
844        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
845              {
846              ecode += _pcre_OP_lengths[*ecode];
847              DPRINTF(("bracket 0 tail recursion\n"));
848              goto TAIL_RECURSE;
849              }
850    
851            /* Possibly empty group; can't use tail recursion. */
852    
853            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
854              eptrb, flags, RM48);
855            if (rrc == MATCH_NOMATCH) md->mark = markptr;
856            RRETURN(rrc);
857            }
858    
859          /* For non-final alternatives, continue the loop for a NOMATCH result;
860          otherwise return. */
861    
862          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
863            eptrb, flags, RM2);
864          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
865        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
866        }        }
867      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
868    
869      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
870      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
871      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
872      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
873        obeyed, we can use tail recursion to avoid using another stack frame. */
874    
875      case OP_COND:      case OP_COND:
876      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
877        codelink= GET(ecode, 1);
878    
879        /* Because of the way auto-callout works during compile, a callout item is
880        inserted between OP_COND and an assertion condition. */
881    
882        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
883          {
884          if (pcre_callout != NULL)
885            {
886            pcre_callout_block cb;
887            cb.version          = 1;   /* Version 1 of the callout block */
888            cb.callout_number   = ecode[LINK_SIZE+2];
889            cb.offset_vector    = md->offset_vector;
890            cb.subject          = (PCRE_SPTR)md->start_subject;
891            cb.subject_length   = md->end_subject - md->start_subject;
892            cb.start_match      = mstart - md->start_subject;
893            cb.current_position = eptr - md->start_subject;
894            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
895            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
896            cb.capture_top      = offset_top/2;
897            cb.capture_last     = md->capture_last;
898            cb.callout_data     = md->callout_data;
899            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
900            if (rrc < 0) RRETURN(rrc);
901            }
902          ecode += _pcre_OP_lengths[OP_CALLOUT];
903          }
904    
905        condcode = ecode[LINK_SIZE+1];
906    
907        /* Now see what the actual condition is */
908    
909        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
910          {
911          if (md->recursive == NULL)                /* Not recursing => FALSE */
912            {
913            condition = FALSE;
914            ecode += GET(ecode, 1);
915            }
916          else
917            {
918            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
919            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
920    
921            /* If the test is for recursion into a specific subpattern, and it is
922            false, but the test was set up by name, scan the table to see if the
923            name refers to any other numbers, and test them. The condition is true
924            if any one is set. */
925    
926            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
927              {
928              uschar *slotA = md->name_table;
929              for (i = 0; i < md->name_count; i++)
930                {
931                if (GET2(slotA, 0) == recno) break;
932                slotA += md->name_entry_size;
933                }
934    
935              /* Found a name for the number - there can be only one; duplicate
936              names for different numbers are allowed, but not vice versa. First
937              scan down for duplicates. */
938    
939              if (i < md->name_count)
940                {
941                uschar *slotB = slotA;
942                while (slotB > md->name_table)
943                  {
944                  slotB -= md->name_entry_size;
945                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
946                    {
947                    condition = GET2(slotB, 0) == md->recursive->group_num;
948                    if (condition) break;
949                    }
950                  else break;
951                  }
952    
953                /* Scan up for duplicates */
954    
955                if (!condition)
956                  {
957                  slotB = slotA;
958                  for (i++; i < md->name_count; i++)
959                    {
960                    slotB += md->name_entry_size;
961                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
962                      {
963                      condition = GET2(slotB, 0) == md->recursive->group_num;
964                      if (condition) break;
965                      }
966                    else break;
967                    }
968                  }
969                }
970              }
971    
972            /* Chose branch according to the condition */
973    
974            ecode += condition? 3 : GET(ecode, 1);
975            }
976          }
977    
978        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
979        {        {
980        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
981        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
982          (md->recursive != NULL) :  
983          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
984        RMATCH(rrc, eptr, ecode + (condition?        scan the table to see if the name refers to any other numbers, and test
985          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        them. The condition is true if any one is set. This is tediously similar
986          offset_top, md, ims, eptrb, match_isgroup);        to the code above, but not close enough to try to amalgamate. */
987        RRETURN(rrc);  
988          if (!condition && condcode == OP_NCREF)
989            {
990            int refno = offset >> 1;
991            uschar *slotA = md->name_table;
992    
993            for (i = 0; i < md->name_count; i++)
994              {
995              if (GET2(slotA, 0) == refno) break;
996              slotA += md->name_entry_size;
997              }
998    
999            /* Found a name for the number - there can be only one; duplicate names
1000            for different numbers are allowed, but not vice versa. First scan down
1001            for duplicates. */
1002    
1003            if (i < md->name_count)
1004              {
1005              uschar *slotB = slotA;
1006              while (slotB > md->name_table)
1007                {
1008                slotB -= md->name_entry_size;
1009                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1010                  {
1011                  offset = GET2(slotB, 0) << 1;
1012                  condition = offset < offset_top &&
1013                    md->offset_vector[offset] >= 0;
1014                  if (condition) break;
1015                  }
1016                else break;
1017                }
1018    
1019              /* Scan up for duplicates */
1020    
1021              if (!condition)
1022                {
1023                slotB = slotA;
1024                for (i++; i < md->name_count; i++)
1025                  {
1026                  slotB += md->name_entry_size;
1027                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1028                    {
1029                    offset = GET2(slotB, 0) << 1;
1030                    condition = offset < offset_top &&
1031                      md->offset_vector[offset] >= 0;
1032                    if (condition) break;
1033                    }
1034                  else break;
1035                  }
1036                }
1037              }
1038            }
1039    
1040          /* Chose branch according to the condition */
1041    
1042          ecode += condition? 3 : GET(ecode, 1);
1043          }
1044    
1045        else if (condcode == OP_DEF)     /* DEFINE - always false */
1046          {
1047          condition = FALSE;
1048          ecode += GET(ecode, 1);
1049        }        }
1050    
1051      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1052      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1053        assertion. */
1054    
1055      else      else
1056        {        {
1057        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1058            match_condassert | match_isgroup);            match_condassert, RM3);
1059        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1060          {          {
1061          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1062            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1063          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1064          }          }
1065        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1066          {          {
1067          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1068          }          }
1069        else ecode += GET(ecode, 1);        else
1070        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
1071          match_isgroup);          condition = FALSE;
1072        RRETURN(rrc);          ecode += codelink;
1073            }
1074        }        }
     /* Control never reaches here */  
1075    
1076      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
1077      encountered. */      we can use tail recursion to avoid using another stack frame, except when
1078        match_cbegroup is required for an unlimited repeat of a possibly empty
1079        group. If the second alternative doesn't exist, we can just plough on. */
1080    
1081        if (condition || *ecode == OP_ALT)
1082          {
1083          ecode += 1 + LINK_SIZE;
1084          if (op == OP_SCOND)        /* Possibly empty group */
1085            {
1086            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1087            RRETURN(rrc);
1088            }
1089          else                       /* Group must match something */
1090            {
1091            flags = 0;
1092            goto TAIL_RECURSE;
1093            }
1094          }
1095        else                         /* Condition false & no alternative */
1096          {
1097          ecode += 1 + LINK_SIZE;
1098          }
1099        break;
1100    
1101    
1102        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1103        to close any currently open capturing brackets. */
1104    
1105        case OP_CLOSE:
1106        number = GET2(ecode, 1);
1107        offset = number << 1;
1108    
1109    #ifdef PCRE_DEBUG
1110          printf("end bracket %d at *ACCEPT", number);
1111          printf("\n");
1112    #endif
1113    
1114      case OP_CREF:      md->capture_last = number;
1115      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1116          {
1117          md->offset_vector[offset] =
1118            md->offset_vector[md->offset_end - number];
1119          md->offset_vector[offset+1] = eptr - md->start_subject;
1120          if (offset_top <= offset) offset_top = offset + 2;
1121          }
1122      ecode += 3;      ecode += 3;
1123      break;      break;
1124    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1125    
1126        /* End of the pattern, either real or forced. If we are in a top-level
1127        recursion, we should restore the offsets appropriately and continue from
1128        after the call. */
1129    
1130        case OP_ACCEPT:
1131      case OP_END:      case OP_END:
1132      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1133        {        {
# Line 713  for (;;) Line 1136  for (;;)
1136        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1137        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1138          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1139        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1140        ims = original_ims;        ims = original_ims;
1141        ecode = rec->after_call;        ecode = rec->after_call;
1142        break;        break;
1143        }        }
1144    
1145      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1146      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1147        the subject. In both cases, backtracking will then try other alternatives,
1148      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if any. */
1149      md->end_match_ptr = eptr;          /* Record where we ended */  
1150      md->end_offset_top = offset_top;   /* and how many extracts were taken */      if (eptr == mstart &&
1151      RRETURN(MATCH_MATCH);          (md->notempty ||
1152              (md->notempty_atstart &&
1153                mstart == md->start_subject + md->start_offset)))
1154          MRRETURN(MATCH_NOMATCH);
1155    
1156        /* Otherwise, we have a match. */
1157    
1158        md->end_match_ptr = eptr;           /* Record where we ended */
1159        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1160        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1161        MRRETURN(((op == OP_END)? MATCH_MATCH : MATCH_ACCEPT));
1162    
1163      /* Change option settings */      /* Change option settings */
1164    
# Line 745  for (;;) Line 1178  for (;;)
1178      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1179      do      do
1180        {        {
1181        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1182          match_isgroup);          RM4);
1183        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1184        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1185            mstart = md->start_match_ptr;   /* In case \K reset it */
1186            break;
1187            }
1188          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1189        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1190        }        }
1191      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1192      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1193    
1194      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1195    
# Line 766  for (;;) Line 1203  for (;;)
1203      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1204      continue;      continue;
1205    
1206      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1207        PRUNE, or COMMIT means we must assume failure without checking subsequent
1208        branches. */
1209    
1210      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1211      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1212      do      do
1213        {        {
1214        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1215          match_isgroup);          RM5);
1216        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1217        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1218            {
1219            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1220            break;
1221            }
1222          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1223        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1224        }        }
1225      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 794  for (;;) Line 1238  for (;;)
1238  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1239      if (utf8)      if (utf8)
1240        {        {
1241        c = GET(ecode,1);        i = GET(ecode, 1);
1242        for (i = 0; i < c; i++)        while (i-- > 0)
1243          {          {
1244          eptr--;          eptr--;
1245          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1246          BACKCHAR(eptr)          BACKCHAR(eptr);
1247          }          }
1248        }        }
1249      else      else
# Line 808  for (;;) Line 1252  for (;;)
1252      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1253    
1254        {        {
1255        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1256        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1257        }        }
1258    
1259      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1260    
1261        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1262      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1263      break;      break;
1264    
# Line 830  for (;;) Line 1275  for (;;)
1275        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1276        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1277        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1278        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1279        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1280        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1281        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1282        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1283        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1284        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1285        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1286        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1287        }        }
1288      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 865  for (;;) Line 1310  for (;;)
1310      case OP_RECURSE:      case OP_RECURSE:
1311        {        {
1312        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1313        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1314            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1315    
1316        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1317    
# Line 897  for (;;) Line 1337  for (;;)
1337    
1338        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1339              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1340        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1341    
1342        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1343        restore the offset and recursion data. */        restore the offset and recursion data. */
1344    
1345        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1346          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1347        do        do
1348          {          {
1349          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1350              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1351          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1352            {            {
1353            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1354            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1355            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1356              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1357            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1358            }            }
1359          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1360            {            {
1361            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1362              if (new_recursive.offset_save != stacksave)
1363                (pcre_free)(new_recursive.offset_save);
1364            RRETURN(rrc);            RRETURN(rrc);
1365            }            }
1366    
# Line 933  for (;;) Line 1375  for (;;)
1375        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1376        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1377          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1378        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1379        }        }
1380      /* Control never reaches here */      /* Control never reaches here */
1381    
# Line 942  for (;;) Line 1384  for (;;)
1384      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1385      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1386      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1387      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1388        the start-of-match value in case it was changed by \K. */
1389    
1390      case OP_ONCE:      case OP_ONCE:
1391        {      prev = ecode;
1392        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1393    
1394        do      do
1395          {
1396          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1397          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1398          {          {
1399          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          mstart = md->start_match_ptr;
1400            eptrb, match_isgroup);          break;
         if (rrc == MATCH_MATCH) break;  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         ecode += GET(ecode,1);  
1401          }          }
1402        while (*ecode == OP_ALT);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1403          ecode += GET(ecode,1);
1404          }
1405        while (*ecode == OP_ALT);
1406    
1407        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1408    
1409        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1410    
1411        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1412        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1413    
1414        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1415    
1416        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1417        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1418    
1419        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1420        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1421        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1422        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1423        course of events. */      course of events. */
1424    
1425        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1426          {        {
1427          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1428          break;        break;
1429          }        }
1430    
1431        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1432        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1433        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1434        opcode. */      any options that changed within the bracket before re-running it, so
1435        check the next opcode. */
1436    
1437        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1438          {        {
1439          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1440          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1441          }        }
1442    
1443        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1444          {        {
1445          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1446          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1447          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1448          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1449          }        goto TAIL_RECURSE;
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1450        }        }
1451      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1452          {
1453          RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1454          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1455          ecode += 1 + LINK_SIZE;
1456          flags = 0;
1457          goto TAIL_RECURSE;
1458          }
1459        /* Control never gets here */
1460    
1461      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1462      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1018  for (;;) Line 1465  for (;;)
1465      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1466      break;      break;
1467    
1468      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1469      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1470      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1471      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1472      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1473    
1474      case OP_BRAZERO:      case OP_BRAZERO:
1475        {        {
1476        next = ecode+1;        next = ecode+1;
1477        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1478        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1479        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1480        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1481        }        }
1482      break;      break;
1483    
1484      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1485        {        {
1486        next = ecode+1;        next = ecode+1;
1487        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1488        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1489        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1490        ecode++;        ecode++;
1491        }        }
1492      break;      break;
1493    
1494      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1495      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1496      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1497      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1498          ecode = next + 1 + LINK_SIZE;
1499          }
1500        break;
1501    
1502        /* End of a group, repeated or non-repeating. */
1503    
1504      case OP_KET:      case OP_KET:
1505      case OP_KETRMIN:      case OP_KETRMIN:
1506      case OP_KETRMAX:      case OP_KETRMAX:
1507        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
   
       /* Back up the stack of bracket start pointers. */  
1508    
1509        eptrb = eptrb->epb_prev;      /* If this was a group that remembered the subject start, in order to break
1510        infinite repeats of empty string matches, retrieve the subject start from
1511        the chain. Otherwise, set it NULL. */
1512    
1513        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev >= OP_SBRA)
1514            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        {
1515            *prev == OP_ONCE)        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1516          {        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1517          md->end_match_ptr = eptr;      /* For ONCE */        }
1518          md->end_offset_top = offset_top;      else saved_eptr = NULL;
         RRETURN(MATCH_MATCH);  
         }  
1519    
1520        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group or an atomic group, stop
1521        group number back at the start and if necessary complete handling an      matching and return MATCH_MATCH, but record the current high water mark for
1522        extraction by setting the offsets and bumping the high water mark. */      use by positive assertions. We also need to record the match start in case
1523        it was changed by \K. */
1524    
1525        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1526          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1527          number = *prev - OP_BRA;          *prev == OP_ONCE)
1528          {
1529          md->end_match_ptr = eptr;      /* For ONCE */
1530          md->end_offset_top = offset_top;
1531          md->start_match_ptr = mstart;
1532          MRRETURN(MATCH_MATCH);
1533          }
1534    
1535          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1536          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1537        bumping the high water mark. Note that whole-pattern recursion is coded as
1538        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1539        when the OP_END is reached. Other recursion is handled here. */
1540    
1541          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1542          offset = number << 1;        {
1543          number = GET2(prev, 1+LINK_SIZE);
1544          offset = number << 1;
1545    
1546  #ifdef DEBUG  #ifdef PCRE_DEBUG
1547          printf("end bracket %d", number);        printf("end bracket %d", number);
1548          printf("\n");        printf("\n");
1549  #endif  #endif
1550    
1551          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1552          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
         into group 0, so it won't be picked up here. Instead, we catch it when  
         the OP_END is reached. */  
   
         if (number > 0)  
           {  
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
   
           /* Handle a recursively called group. Restore the offsets  
           appropriately and continue from after the call. */  
   
           if (md->recursive != NULL && md->recursive->group_num == number)  
             {  
             recursion_info *rec = md->recursive;  
             DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
         }  
   
       /* Reset the value of the ims flags, in case they got changed during  
       the group. */  
   
       ims = original_ims;  
       DPRINTF(("ims reset to %02lx\n", ims));  
   
       /* For a non-repeating ket, just continue at this level. This also  
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
   
       if (*ecode == OP_KET || eptr == saved_eptr)  
1553          {          {
1554          ecode += 1 + LINK_SIZE;          md->offset_vector[offset] =
1555          break;            md->offset_vector[md->offset_end - number];
1556            md->offset_vector[offset+1] = eptr - md->start_subject;
1557            if (offset_top <= offset) offset_top = offset + 2;
1558          }          }
1559    
1560        /* The repeating kets try the rest of the pattern or restart from the        /* Handle a recursively called group. Restore the offsets
1561        preceding bracket, in the appropriate order. */        appropriately and continue from after the call. */
1562    
1563        if (*ecode == OP_KETRMIN)        if (md->recursive != NULL && md->recursive->group_num == number)
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
1564          {          {
1565          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);          recursion_info *rec = md->recursive;
1566          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1567          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);          md->recursive = rec->prevrec;
1568          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          memcpy(md->offset_vector, rec->offset_save,
1569              rec->saved_max * sizeof(int));
1570            offset_top = rec->save_offset_top;
1571            ecode = rec->after_call;
1572            ims = original_ims;
1573            break;
1574          }          }
1575        }        }
1576    
1577      RRETURN(MATCH_NOMATCH);      /* For both capturing and non-capturing groups, reset the value of the ims
1578        flags, in case they got changed during the group. */
1579    
1580      /* Start of subject unless notbol, or after internal newline if multiline */      ims = original_ims;
1581        DPRINTF(("ims reset to %02lx\n", ims));
1582    
1583      case OP_CIRC:      /* For a non-repeating ket, just continue at this level. This also
1584      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      happens for a repeating ket if no characters were matched in the group.
1585      if ((ims & PCRE_MULTILINE) != 0)      This is the forcible breaking of infinite loops as implemented in Perl
1586        5.005. If there is an options reset, it will get obeyed in the normal
1587        course of events. */
1588    
1589        if (*ecode == OP_KET || eptr == saved_eptr)
1590        {        {
1591        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        ecode += 1 + LINK_SIZE;
1592          RRETURN(MATCH_NOMATCH);        break;
1593        ecode++;        }
1594    
1595        /* The repeating kets try the rest of the pattern or restart from the
1596        preceding bracket, in the appropriate order. In the second case, we can use
1597        tail recursion to avoid using another stack frame, unless we have an
1598        unlimited repeat of a group that can match an empty string. */
1599    
1600        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1601    
1602        if (*ecode == OP_KETRMIN)
1603          {
1604          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1605          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1606          if (flags != 0)    /* Could match an empty string */
1607            {
1608            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1609            RRETURN(rrc);
1610            }
1611          ecode = prev;
1612          goto TAIL_RECURSE;
1613          }
1614        else  /* OP_KETRMAX */
1615          {
1616          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1617          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1618          ecode += 1 + LINK_SIZE;
1619          flags = 0;
1620          goto TAIL_RECURSE;
1621          }
1622        /* Control never gets here */
1623    
1624        /* Start of subject unless notbol, or after internal newline if multiline */
1625    
1626        case OP_CIRC:
1627        if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1628        if ((ims & PCRE_MULTILINE) != 0)
1629          {
1630          if (eptr != md->start_subject &&
1631              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1632            MRRETURN(MATCH_NOMATCH);
1633          ecode++;
1634        break;        break;
1635        }        }
1636      /* ... else fall through */      /* ... else fall through */
# Line 1178  for (;;) Line 1638  for (;;)
1638      /* Start of subject assertion */      /* Start of subject assertion */
1639    
1640      case OP_SOD:      case OP_SOD:
1641      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1642      ecode++;      ecode++;
1643      break;      break;
1644    
1645      /* Start of match assertion */      /* Start of match assertion */
1646    
1647      case OP_SOM:      case OP_SOM:
1648      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1649        ecode++;
1650        break;
1651    
1652        /* Reset the start of match point */
1653    
1654        case OP_SET_SOM:
1655        mstart = eptr;
1656      ecode++;      ecode++;
1657      break;      break;
1658    
# Line 1196  for (;;) Line 1663  for (;;)
1663      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1664        {        {
1665        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1666          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1667        else        else
1668          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1669        ecode++;        ecode++;
1670        break;        break;
1671        }        }
1672      else      else
1673        {        {
1674        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1675        if (!md->endonly)        if (!md->endonly)
1676          {          {
1677          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1678             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1679            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1680          ecode++;          ecode++;
1681          break;          break;
1682          }          }
1683        }        }
1684      /* ... else fall through */      /* ... else fall through for endonly */
1685    
1686      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1687    
1688      case OP_EOD:      case OP_EOD:
1689      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1690      ecode++;      ecode++;
1691      break;      break;
1692    
1693      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1694    
1695      case OP_EODN:      case OP_EODN:
1696      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1697         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1698          MRRETURN(MATCH_NOMATCH);
1699      ecode++;      ecode++;
1700      break;      break;
1701    
# Line 1239  for (;;) Line 1707  for (;;)
1707    
1708        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1709        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1710        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1711          partial matching. */
1712    
1713  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1714        if (utf8)        if (utf8)
1715          {          {
1716          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1717            {            {
1718            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1719            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1720              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1721            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1722            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1723            }            }
1724          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1725              {
1726              SCHECK_PARTIAL();
1727              cur_is_word = FALSE;
1728              }
1729            else
1730            {            {
1731            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1732            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1260  for (;;) Line 1735  for (;;)
1735        else        else
1736  #endif  #endif
1737    
1738        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1739    
1740          {          {
1741          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1742            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1743          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1744            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1745              }
1746            if (eptr >= md->end_subject)
1747              {
1748              SCHECK_PARTIAL();
1749              cur_is_word = FALSE;
1750              }
1751            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1752          }          }
1753    
1754        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1755    
1756        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1757             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1758          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1759        }        }
1760      break;      break;
1761    
1762      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1763    
1764      case OP_ANY:      case OP_ANY:
1765      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1766        RRETURN(MATCH_NOMATCH);      /* Fall through */
1767      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);  
1768  #ifdef SUPPORT_UTF8      case OP_ALLANY:
1769      if (utf8)      if (eptr++ >= md->end_subject)
1770        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        {
1771  #endif        SCHECK_PARTIAL();
1772          MRRETURN(MATCH_NOMATCH);
1773          }
1774        if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1775      ecode++;      ecode++;
1776      break;      break;
1777    
# Line 1294  for (;;) Line 1779  for (;;)
1779      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1780    
1781      case OP_ANYBYTE:      case OP_ANYBYTE:
1782      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1783          {
1784          SCHECK_PARTIAL();
1785          MRRETURN(MATCH_NOMATCH);
1786          }
1787      ecode++;      ecode++;
1788      break;      break;
1789    
1790      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1791      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1792          {
1793          SCHECK_PARTIAL();
1794          MRRETURN(MATCH_NOMATCH);
1795          }
1796      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1797      if (      if (
1798  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1307  for (;;) Line 1800  for (;;)
1800  #endif  #endif
1801         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1802         )         )
1803        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1804      ecode++;      ecode++;
1805      break;      break;
1806    
1807      case OP_DIGIT:      case OP_DIGIT:
1808      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1809          {
1810          SCHECK_PARTIAL();
1811          MRRETURN(MATCH_NOMATCH);
1812          }
1813      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1814      if (      if (
1815  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 1817  for (;;)
1817  #endif  #endif
1818         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1819         )         )
1820        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1821      ecode++;      ecode++;
1822      break;      break;
1823    
1824      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1825      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1826          {
1827          SCHECK_PARTIAL();
1828          MRRETURN(MATCH_NOMATCH);
1829          }
1830      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1831      if (      if (
1832  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1333  for (;;) Line 1834  for (;;)
1834  #endif  #endif
1835         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1836         )         )
1837        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1838      ecode++;      ecode++;
1839      break;      break;
1840    
1841      case OP_WHITESPACE:      case OP_WHITESPACE:
1842      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1843          {
1844          SCHECK_PARTIAL();
1845          MRRETURN(MATCH_NOMATCH);
1846          }
1847      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1848      if (      if (
1849  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1346  for (;;) Line 1851  for (;;)
1851  #endif  #endif
1852         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1853         )         )
1854        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1855      ecode++;      ecode++;
1856      break;      break;
1857    
1858      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1859      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1860          {
1861          SCHECK_PARTIAL();
1862          MRRETURN(MATCH_NOMATCH);
1863          }
1864      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1865      if (      if (
1866  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1359  for (;;) Line 1868  for (;;)
1868  #endif  #endif
1869         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1870         )         )
1871        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1872      ecode++;      ecode++;
1873      break;      break;
1874    
1875      case OP_WORDCHAR:      case OP_WORDCHAR:
1876      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1877          {
1878          SCHECK_PARTIAL();
1879          MRRETURN(MATCH_NOMATCH);
1880          }
1881      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1882      if (      if (
1883  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1372  for (;;) Line 1885  for (;;)
1885  #endif  #endif
1886         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1887         )         )
1888        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1889        ecode++;
1890        break;
1891    
1892        case OP_ANYNL:
1893        if (eptr >= md->end_subject)
1894          {
1895          SCHECK_PARTIAL();
1896          MRRETURN(MATCH_NOMATCH);
1897          }
1898        GETCHARINCTEST(c, eptr);
1899        switch(c)
1900          {
1901          default: MRRETURN(MATCH_NOMATCH);
1902          case 0x000d:
1903          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1904          break;
1905    
1906          case 0x000a:
1907          break;
1908    
1909          case 0x000b:
1910          case 0x000c:
1911          case 0x0085:
1912          case 0x2028:
1913          case 0x2029:
1914          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1915          break;
1916          }
1917        ecode++;
1918        break;
1919    
1920        case OP_NOT_HSPACE:
1921        if (eptr >= md->end_subject)
1922          {
1923          SCHECK_PARTIAL();
1924          MRRETURN(MATCH_NOMATCH);
1925          }
1926        GETCHARINCTEST(c, eptr);
1927        switch(c)
1928          {
1929          default: break;
1930          case 0x09:      /* HT */
1931          case 0x20:      /* SPACE */
1932          case 0xa0:      /* NBSP */
1933          case 0x1680:    /* OGHAM SPACE MARK */
1934          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1935          case 0x2000:    /* EN QUAD */
1936          case 0x2001:    /* EM QUAD */
1937          case 0x2002:    /* EN SPACE */
1938          case 0x2003:    /* EM SPACE */
1939          case 0x2004:    /* THREE-PER-EM SPACE */
1940          case 0x2005:    /* FOUR-PER-EM SPACE */
1941          case 0x2006:    /* SIX-PER-EM SPACE */
1942          case 0x2007:    /* FIGURE SPACE */
1943          case 0x2008:    /* PUNCTUATION SPACE */
1944          case 0x2009:    /* THIN SPACE */
1945          case 0x200A:    /* HAIR SPACE */
1946          case 0x202f:    /* NARROW NO-BREAK SPACE */
1947          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1948          case 0x3000:    /* IDEOGRAPHIC SPACE */
1949          MRRETURN(MATCH_NOMATCH);
1950          }
1951        ecode++;
1952        break;
1953    
1954        case OP_HSPACE:
1955        if (eptr >= md->end_subject)
1956          {
1957          SCHECK_PARTIAL();
1958          MRRETURN(MATCH_NOMATCH);
1959          }
1960        GETCHARINCTEST(c, eptr);
1961        switch(c)
1962          {
1963          default: MRRETURN(MATCH_NOMATCH);
1964          case 0x09:      /* HT */
1965          case 0x20:      /* SPACE */
1966          case 0xa0:      /* NBSP */
1967          case 0x1680:    /* OGHAM SPACE MARK */
1968          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1969          case 0x2000:    /* EN QUAD */
1970          case 0x2001:    /* EM QUAD */
1971          case 0x2002:    /* EN SPACE */
1972          case 0x2003:    /* EM SPACE */
1973          case 0x2004:    /* THREE-PER-EM SPACE */
1974          case 0x2005:    /* FOUR-PER-EM SPACE */
1975          case 0x2006:    /* SIX-PER-EM SPACE */
1976          case 0x2007:    /* FIGURE SPACE */
1977          case 0x2008:    /* PUNCTUATION SPACE */
1978          case 0x2009:    /* THIN SPACE */
1979          case 0x200A:    /* HAIR SPACE */
1980          case 0x202f:    /* NARROW NO-BREAK SPACE */
1981          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1982          case 0x3000:    /* IDEOGRAPHIC SPACE */
1983          break;
1984          }
1985        ecode++;
1986        break;
1987    
1988        case OP_NOT_VSPACE:
1989        if (eptr >= md->end_subject)
1990          {
1991          SCHECK_PARTIAL();
1992          MRRETURN(MATCH_NOMATCH);
1993          }
1994        GETCHARINCTEST(c, eptr);
1995        switch(c)
1996          {
1997          default: break;
1998          case 0x0a:      /* LF */
1999          case 0x0b:      /* VT */
2000          case 0x0c:      /* FF */
2001          case 0x0d:      /* CR */
2002          case 0x85:      /* NEL */
2003          case 0x2028:    /* LINE SEPARATOR */
2004          case 0x2029:    /* PARAGRAPH SEPARATOR */
2005          MRRETURN(MATCH_NOMATCH);
2006          }
2007        ecode++;
2008        break;
2009    
2010        case OP_VSPACE:
2011        if (eptr >= md->end_subject)
2012          {
2013          SCHECK_PARTIAL();
2014          MRRETURN(MATCH_NOMATCH);
2015          }
2016        GETCHARINCTEST(c, eptr);
2017        switch(c)
2018          {
2019          default: MRRETURN(MATCH_NOMATCH);
2020          case 0x0a:      /* LF */
2021          case 0x0b:      /* VT */
2022          case 0x0c:      /* FF */
2023          case 0x0d:      /* CR */
2024          case 0x85:      /* NEL */
2025          case 0x2028:    /* LINE SEPARATOR */
2026          case 0x2029:    /* PARAGRAPH SEPARATOR */
2027          break;
2028          }
2029      ecode++;      ecode++;
2030      break;      break;
2031    
# Line 1382  for (;;) Line 2035  for (;;)
2035    
2036      case OP_PROP:      case OP_PROP:
2037      case OP_NOTPROP:      case OP_NOTPROP:
2038      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2039          {
2040          SCHECK_PARTIAL();
2041          MRRETURN(MATCH_NOMATCH);
2042          }
2043      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2044        {        {
2045        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2046    
2047        switch(ecode[1])        switch(ecode[1])
2048          {          {
2049          case PT_ANY:          case PT_ANY:
2050          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2051          break;          break;
2052    
2053          case PT_LAMP:          case PT_LAMP:
2054          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2055               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2056               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2057            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2058           break;           break;
2059    
2060          case PT_GC:          case PT_GC:
2061          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2062            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2063          break;          break;
2064    
2065          case PT_PC:          case PT_PC:
2066          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2067            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2068          break;          break;
2069    
2070          case PT_SC:          case PT_SC:
2071          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2072            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2073          break;          break;
2074    
2075          default:          default:
2076          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
2077          }          }
2078    
2079        ecode += 3;        ecode += 3;
# Line 1429  for (;;) Line 2084  for (;;)
2084      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2085    
2086      case OP_EXTUNI:      case OP_EXTUNI:
2087      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2088          {
2089          SCHECK_PARTIAL();
2090          MRRETURN(MATCH_NOMATCH);
2091          }
2092      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2093        {        {
2094        int chartype, script;        int category = UCD_CATEGORY(c);
2095        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2096        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2097          {          {
2098          int len = 1;          int len = 1;
# Line 1442  for (;;) Line 2100  for (;;)
2100            {            {
2101            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2102            }            }
2103          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2104          if (category != ucp_M) break;          if (category != ucp_M) break;
2105          eptr += len;          eptr += len;
2106          }          }
# Line 1463  for (;;) Line 2121  for (;;)
2121      case OP_REF:      case OP_REF:
2122        {        {
2123        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2124        ecode += 3;                                 /* Advance past item */        ecode += 3;
2125    
2126          /* If the reference is unset, there are two possibilities:
2127    
2128        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
2129        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
2130        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
2131        minima. */        quantifiers with zero minima.
2132    
2133        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
2134          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
2135          md->offset_vector[offset+1] - md->offset_vector[offset];  
2136          Otherwise, set the length to the length of what was matched by the
2137          referenced subpattern. */
2138    
2139          if (offset >= offset_top || md->offset_vector[offset] < 0)
2140            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2141          else
2142            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2143    
2144        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2145    
# Line 1501  for (;;) Line 2168  for (;;)
2168          break;          break;
2169    
2170          default:               /* No repeat follows */          default:               /* No repeat follows */
2171          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2172              {
2173              CHECK_PARTIAL();
2174              MRRETURN(MATCH_NOMATCH);
2175              }
2176          eptr += length;          eptr += length;
2177          continue;              /* With the main loop */          continue;              /* With the main loop */
2178          }          }
# Line 1517  for (;;) Line 2188  for (;;)
2188    
2189        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2190          {          {
2191          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2192              {
2193              CHECK_PARTIAL();
2194              MRRETURN(MATCH_NOMATCH);
2195              }
2196          eptr += length;          eptr += length;
2197          }          }
2198    
# Line 1532  for (;;) Line 2207  for (;;)
2207          {          {
2208          for (fi = min;; fi++)          for (fi = min;; fi++)
2209            {            {
2210            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2211            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2213              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2214                {
2215                CHECK_PARTIAL();
2216                MRRETURN(MATCH_NOMATCH);
2217                }
2218            eptr += length;            eptr += length;
2219            }            }
2220          /* Control never gets here */          /* Control never gets here */
# Line 1548  for (;;) Line 2227  for (;;)
2227          pp = eptr;          pp = eptr;
2228          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2229            {            {
2230            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2231                {
2232                CHECK_PARTIAL();
2233                break;
2234                }
2235            eptr += length;            eptr += length;
2236            }            }
2237          while (eptr >= pp)          while (eptr >= pp)
2238            {            {
2239            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2240            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2241            eptr -= length;            eptr -= length;
2242            }            }
2243          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2244          }          }
2245        }        }
2246      /* Control never gets here */      /* Control never gets here */
2247    
   
   
2248      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2249      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2250      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1618  for (;;) Line 2299  for (;;)
2299          {          {
2300          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2301            {            {
2302            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2303                {
2304                SCHECK_PARTIAL();
2305                MRRETURN(MATCH_NOMATCH);
2306                }
2307            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2308            if (c > 255)            if (c > 255)
2309              {              {
2310              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2311              }              }
2312            else            else
2313              {              {
2314              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2315              }              }
2316            }            }
2317          }          }
# Line 1636  for (;;) Line 2321  for (;;)
2321          {          {
2322          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2323            {            {
2324            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2325                {
2326                SCHECK_PARTIAL();
2327                MRRETURN(MATCH_NOMATCH);
2328                }
2329            c = *eptr++;            c = *eptr++;
2330            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2331            }            }
2332          }          }
2333    
# Line 1658  for (;;) Line 2347  for (;;)
2347            {            {
2348            for (fi = min;; fi++)            for (fi = min;; fi++)
2349              {              {
2350              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2351              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2352              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2353                if (eptr >= md->end_subject)
2354                  {
2355                  SCHECK_PARTIAL();
2356                  MRRETURN(MATCH_NOMATCH);
2357                  }
2358              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2359              if (c > 255)              if (c > 255)
2360                {                {
2361                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2362                }                }
2363              else              else
2364                {                {
2365                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2366                }                }
2367              }              }
2368            }            }
# Line 1678  for (;;) Line 2372  for (;;)
2372            {            {
2373            for (fi = min;; fi++)            for (fi = min;; fi++)
2374              {              {
2375              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2376              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2377              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2378                if (eptr >= md->end_subject)
2379                  {
2380                  SCHECK_PARTIAL();
2381                  MRRETURN(MATCH_NOMATCH);
2382                  }
2383              c = *eptr++;              c = *eptr++;
2384              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2385              }              }
2386            }            }
2387          /* Control never gets here */          /* Control never gets here */
# Line 1701  for (;;) Line 2400  for (;;)
2400            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2401              {              {
2402              int len = 1;              int len = 1;
2403              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2404                  {
2405                  SCHECK_PARTIAL();
2406                  break;
2407                  }
2408              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2409              if (c > 255)              if (c > 255)
2410                {                {
# Line 1715  for (;;) Line 2418  for (;;)
2418              }              }
2419            for (;;)            for (;;)
2420              {              {
2421              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2422              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2423              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2424              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1727  for (;;) Line 2430  for (;;)
2430            {            {
2431            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2432              {              {
2433              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2434                  {
2435                  SCHECK_PARTIAL();
2436                  break;
2437                  }
2438              c = *eptr;              c = *eptr;
2439              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2440              eptr++;              eptr++;
2441              }              }
2442            while (eptr >= pp)            while (eptr >= pp)
2443              {              {
2444              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2445              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2446              eptr--;              eptr--;
2447              }              }
2448            }            }
2449    
2450          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2451          }          }
2452        }        }
2453      /* Control never gets here */      /* Control never gets here */
2454    
2455    
2456      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2457      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2458        mode, because Unicode properties are supported in non-UTF-8 mode. */
2459    
2460  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2461      case OP_XCLASS:      case OP_XCLASS:
# Line 1788  for (;;) Line 2496  for (;;)
2496    
2497        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2498          {          {
2499          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2500          GETCHARINC(c, eptr);            {
2501          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2502              MRRETURN(MATCH_NOMATCH);
2503              }
2504            GETCHARINCTEST(c, eptr);
2505            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2506          }          }
2507    
2508        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1805  for (;;) Line 2517  for (;;)
2517          {          {
2518          for (fi = min;; fi++)          for (fi = min;; fi++)
2519            {            {
2520            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2521            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2522            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2523            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2524            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2525                SCHECK_PARTIAL();
2526                MRRETURN(MATCH_NOMATCH);
2527                }
2528              GETCHARINCTEST(c, eptr);
2529              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2530            }            }
2531          /* Control never gets here */          /* Control never gets here */
2532          }          }
# Line 1822  for (;;) Line 2539  for (;;)
2539          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2540            {            {
2541            int len = 1;            int len = 1;
2542            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2543            GETCHARLEN(c, eptr, len);              {
2544                SCHECK_PARTIAL();
2545                break;
2546                }
2547              GETCHARLENTEST(c, eptr, len);
2548            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2549            eptr += len;            eptr += len;
2550            }            }
2551          for(;;)          for(;;)
2552            {            {
2553            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2554            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2555            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2556            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2557            }            }
2558          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2559          }          }
2560    
2561        /* Control never gets here */        /* Control never gets here */
# Line 1850  for (;;) Line 2571  for (;;)
2571        length = 1;        length = 1;
2572        ecode++;        ecode++;
2573        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2574        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2575        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2576            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2577            MRRETURN(MATCH_NOMATCH);
2578            }
2579          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2580        }        }
2581      else      else
2582  #endif  #endif
2583    
2584      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2585        {        {
2586        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2587        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2588            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2589            MRRETURN(MATCH_NOMATCH);
2590            }
2591          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2592        ecode += 2;        ecode += 2;
2593        }        }
2594      break;      break;
# Line 1874  for (;;) Line 2603  for (;;)
2603        ecode++;        ecode++;
2604        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2605    
2606        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2607            {
2608            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2609            MRRETURN(MATCH_NOMATCH);
2610            }
2611    
2612        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2613        can use the fast lookup table. */        can use the fast lookup table. */
2614    
2615        if (fc < 128)        if (fc < 128)
2616          {          {
2617          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2618          }          }
2619    
2620        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2621    
2622        else        else
2623          {          {
2624          int dc;          unsigned int dc;
2625          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2626          ecode += length;          ecode += length;
2627    
# Line 1898  for (;;) Line 2631  for (;;)
2631          if (fc != dc)          if (fc != dc)
2632            {            {
2633  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2634            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2635  #endif  #endif
2636              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2637            }            }
2638          }          }
2639        }        }
# Line 1909  for (;;) Line 2642  for (;;)
2642    
2643      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2644        {        {
2645        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2646        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2647            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2648            MRRETURN(MATCH_NOMATCH);
2649            }
2650          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2651        ecode += 2;        ecode += 2;
2652        }        }
2653      break;      break;
2654    
2655      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2656    
2657      case OP_EXACT:      case OP_EXACT:
2658      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2659      ecode += 3;      ecode += 3;
2660      goto REPEATCHAR;      goto REPEATCHAR;
2661    
2662        case OP_POSUPTO:
2663        possessive = TRUE;
2664        /* Fall through */
2665    
2666      case OP_UPTO:      case OP_UPTO:
2667      case OP_MINUPTO:      case OP_MINUPTO:
2668      min = 0;      min = 0;
# Line 1930  for (;;) Line 2671  for (;;)
2671      ecode += 3;      ecode += 3;
2672      goto REPEATCHAR;      goto REPEATCHAR;
2673    
2674        case OP_POSSTAR:
2675        possessive = TRUE;
2676        min = 0;
2677        max = INT_MAX;
2678        ecode++;
2679        goto REPEATCHAR;
2680    
2681        case OP_POSPLUS:
2682        possessive = TRUE;
2683        min = 1;
2684        max = INT_MAX;
2685        ecode++;
2686        goto REPEATCHAR;
2687    
2688        case OP_POSQUERY:
2689        possessive = TRUE;
2690        min = 0;
2691        max = 1;
2692        ecode++;
2693        goto REPEATCHAR;
2694    
2695      case OP_STAR:      case OP_STAR:
2696      case OP_MINSTAR:      case OP_MINSTAR:
2697      case OP_PLUS:      case OP_PLUS:
# Line 1938  for (;;) Line 2700  for (;;)
2700      case OP_MINQUERY:      case OP_MINQUERY:
2701      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2702      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2703    
2704      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2705      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2706      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2707    
2708      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2709    
2710      REPEATCHAR:      REPEATCHAR:
2711  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1953  for (;;) Line 2714  for (;;)
2714        length = 1;        length = 1;
2715        charptr = ecode;        charptr = ecode;
2716        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2717        ecode += length;        ecode += length;
2718    
2719        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1961  for (;;) Line 2721  for (;;)
2721    
2722        if (length > 1)        if (length > 1)
2723          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2724  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2725          int othercase;          unsigned int othercase;
2726          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2727              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase >= 0)  
2728            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2729            else oclength = 0;
2730  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2731    
2732          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2733            {            {
2734            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2735            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2736            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2737              else if (oclength > 0 &&
2738                       eptr <= md->end_subject - oclength &&
2739                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2740    #endif  /* SUPPORT_UCP */
2741            else            else
2742              {              {
2743              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2744              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2745              }              }
2746            }            }
2747    
# Line 1990  for (;;) Line 2751  for (;;)
2751            {            {
2752            for (fi = min;; fi++)            for (fi = min;; fi++)
2753              {              {
2754              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2755              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2756              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2757              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2758              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2759              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2760                else if (oclength > 0 &&
2761                         eptr <= md->end_subject - oclength &&
2762                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2763    #endif  /* SUPPORT_UCP */
2764              else              else
2765                {                {
2766                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2767                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2768                }                }
2769              }              }
2770            /* Control never gets here */            /* Control never gets here */
2771            }            }
2772          else  
2773            else  /* Maximize */
2774            {            {
2775            pp = eptr;            pp = eptr;
2776            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2777              {              {
2778              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2779              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2780              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2781                else if (oclength > 0 &&
2782                         eptr <= md->end_subject - oclength &&
2783                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2784    #endif  /* SUPPORT_UCP */
2785              else              else
2786                {                {
2787                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2788                eptr += oclength;                break;
2789                }                }
2790              }              }
2791            while (eptr >= pp)  
2792             {            if (possessive) continue;
2793             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2794             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2795             eptr -= length;              {
2796             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2797            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2798                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2799    #ifdef SUPPORT_UCP
2800                eptr--;
2801                BACKCHAR(eptr);
2802    #else   /* without SUPPORT_UCP */
2803                eptr -= length;
2804    #endif  /* SUPPORT_UCP */
2805                }
2806            }            }
2807          /* Control never gets here */          /* Control never gets here */
2808          }          }
# Line 2037  for (;;) Line 2815  for (;;)
2815  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2816    
2817      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2818        {  
2819        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2820    
2821      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2822      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2058  for (;;) Line 2834  for (;;)
2834        {        {
2835        fc = md->lcc[fc];        fc = md->lcc[fc];
2836        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2837          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2838            if (eptr >= md->end_subject)
2839              {
2840              SCHECK_PARTIAL();
2841              MRRETURN(MATCH_NOMATCH);
2842              }
2843            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2844            }
2845        if (min == max) continue;        if (min == max) continue;
2846        if (minimize)        if (minimize)
2847          {          {
2848          for (fi = min;; fi++)          for (fi = min;; fi++)
2849            {            {
2850            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2851            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2852            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2853                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2854              RRETURN(MATCH_NOMATCH);              {
2855                SCHECK_PARTIAL();
2856                MRRETURN(MATCH_NOMATCH);
2857                }
2858              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2859            }            }
2860          /* Control never gets here */          /* Control never gets here */
2861          }          }
2862        else        else  /* Maximize */
2863          {          {
2864          pp = eptr;          pp = eptr;
2865          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2866            {            {
2867            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2868                {
2869                SCHECK_PARTIAL();
2870                break;
2871                }
2872              if (fc != md->lcc[*eptr]) break;
2873            eptr++;            eptr++;
2874            }            }
2875    
2876            if (possessive) continue;
2877    
2878          while (eptr >= pp)          while (eptr >= pp)
2879            {            {
2880            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2881            eptr--;            eptr--;
2882            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2883            }            }
2884          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2885          }          }
2886        /* Control never gets here */        /* Control never gets here */
2887        }        }
# Line 2095  for (;;) Line 2890  for (;;)
2890    
2891      else      else
2892        {        {
2893        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2894            {
2895            if (eptr >= md->end_subject)
2896              {
2897              SCHECK_PARTIAL();
2898              MRRETURN(MATCH_NOMATCH);
2899              }
2900            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2901            }
2902    
2903        if (min == max) continue;        if (min == max) continue;
2904    
2905        if (minimize)        if (minimize)
2906          {          {
2907          for (fi = min;; fi++)          for (fi = min;; fi++)
2908            {            {
2909            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2910            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2911            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2912              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2913                {
2914                SCHECK_PARTIAL();
2915                MRRETURN(MATCH_NOMATCH);
2916                }
2917              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2918            }            }
2919          /* Control never gets here */          /* Control never gets here */
2920          }          }
2921        else        else  /* Maximize */
2922          {          {
2923          pp = eptr;          pp = eptr;
2924          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2925            {            {
2926            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2927                {
2928                SCHECK_PARTIAL();
2929                break;
2930                }
2931              if (fc != *eptr) break;
2932            eptr++;            eptr++;
2933            }            }
2934            if (possessive) continue;
2935    
2936          while (eptr >= pp)          while (eptr >= pp)
2937            {            {
2938            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2939            eptr--;            eptr--;
2940            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2941            }            }
2942          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2943          }          }
2944        }        }
2945      /* Control never gets here */      /* Control never gets here */
# Line 2131  for (;;) Line 2948  for (;;)
2948      checking can be multibyte. */      checking can be multibyte. */
2949    
2950      case OP_NOT:      case OP_NOT:
2951      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2952          {
2953          SCHECK_PARTIAL();
2954          MRRETURN(MATCH_NOMATCH);
2955          }
2956      ecode++;      ecode++;
2957      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2958      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2140  for (;;) Line 2961  for (;;)
2961        if (c < 256)        if (c < 256)
2962  #endif  #endif
2963        c = md->lcc[c];        c = md->lcc[c];
2964        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
2965        }        }
2966      else      else
2967        {        {
2968        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
2969        }        }
2970      break;      break;
2971    
# Line 2168  for (;;) Line 2989  for (;;)
2989      ecode += 3;      ecode += 3;
2990      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2991    
2992        case OP_NOTPOSSTAR:
2993        possessive = TRUE;
2994        min = 0;
2995        max = INT_MAX;
2996        ecode++;
2997        goto REPEATNOTCHAR;
2998    
2999        case OP_NOTPOSPLUS:
3000        possessive = TRUE;
3001        min = 1;
3002        max = INT_MAX;
3003        ecode++;
3004        goto REPEATNOTCHAR;
3005    
3006        case OP_NOTPOSQUERY:
3007        possessive = TRUE;
3008        min = 0;
3009        max = 1;
3010        ecode++;
3011        goto REPEATNOTCHAR;
3012    
3013        case OP_NOTPOSUPTO:
3014        possessive = TRUE;
3015        min = 0;
3016        max = GET2(ecode, 1);
3017        ecode += 3;
3018        goto REPEATNOTCHAR;
3019    
3020      case OP_NOTSTAR:      case OP_NOTSTAR:
3021      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3022      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2180  for (;;) Line 3029  for (;;)
3029      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3030      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3031    
3032      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3033    
3034      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3035      fc = *ecode++;      fc = *ecode++;
3036    
3037      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2207  for (;;) Line 3053  for (;;)
3053        /* UTF-8 mode */        /* UTF-8 mode */
3054        if (utf8)        if (utf8)
3055          {          {
3056          register int d;          register unsigned int d;
3057          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3058            {            {
3059              if (eptr >= md->end_subject)
3060                {
3061                SCHECK_PARTIAL();
3062                MRRETURN(MATCH_NOMATCH);
3063                }
3064            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3065            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3066            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3067            }            }
3068          }          }
3069        else        else
# Line 2221  for (;;) Line 3072  for (;;)
3072        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3073          {          {
3074          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3075            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3076              if (eptr >= md->end_subject)
3077                {
3078                SCHECK_PARTIAL();
3079                MRRETURN(MATCH_NOMATCH);
3080                }
3081              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3082              }
3083          }          }
3084    
3085        if (min == max) continue;        if (min == max) continue;
# Line 2232  for (;;) Line 3090  for (;;)
3090          /* UTF-8 mode */          /* UTF-8 mode */
3091          if (utf8)          if (utf8)
3092            {            {
3093            register int d;            register unsigned int d;
3094            for (fi = min;; fi++)            for (fi = min;; fi++)
3095              {              {
3096              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3097              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3098                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3099                if (eptr >= md->end_subject)
3100                  {
3101                  SCHECK_PARTIAL();
3102                  MRRETURN(MATCH_NOMATCH);
3103                  }
3104              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3105              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3106              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3107              }              }
3108            }            }
3109          else          else
# Line 2249  for (;;) Line 3112  for (;;)
3112            {            {
3113            for (fi = min;; fi++)            for (fi = min;; fi++)
3114              {              {
3115              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3116              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3117              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3118                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3119                  {
3120                  SCHECK_PARTIAL();
3121                  MRRETURN(MATCH_NOMATCH);
3122                  }
3123                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3124              }              }
3125            }            }
3126          /* Control never gets here */          /* Control never gets here */
# Line 2268  for (;;) Line 3136  for (;;)
3136          /* UTF-8 mode */          /* UTF-8 mode */
3137          if (utf8)          if (utf8)
3138            {            {
3139            register int d;            register unsigned int d;
3140            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3141              {              {
3142              int len = 1;              int len = 1;
3143              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3144              GETCHARLEN(d, eptr, len);                {
3145                  SCHECK_PARTIAL();
3146                  break;
3147                  }
3148                GETCHARLEN(d, eptr, len);
3149              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3150              if (fc == d) break;              if (fc == d) break;
3151              eptr += len;              eptr += len;
3152              }              }
3153            for(;;)          if (possessive) continue;
3154            for(;;)
3155              {              {
3156              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3157              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3158              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3159              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2292  for (;;) Line 3165  for (;;)
3165            {            {
3166            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3167              {              {
3168              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3169                  {
3170                  SCHECK_PARTIAL();
3171                  break;
3172                  }
3173                if (fc == md->lcc[*eptr]) break;
3174              eptr++;              eptr++;
3175              }              }
3176              if (possessive) continue;
3177            while (eptr >= pp)            while (eptr >= pp)
3178              {              {
3179              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3180              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3181              eptr--;              eptr--;
3182              }              }
3183            }            }
3184    
3185          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3186          }          }
3187        /* Control never gets here */        /* Control never gets here */
3188        }        }
# Line 2316  for (;;) Line 3195  for (;;)
3195        /* UTF-8 mode */        /* UTF-8 mode */
3196        if (utf8)        if (utf8)
3197          {          {
3198          register int d;          register unsigned int d;
3199          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3200            {            {
3201              if (eptr >= md->end_subject)
3202                {
3203                SCHECK_PARTIAL();
3204                MRRETURN(MATCH_NOMATCH);
3205                }
3206            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3207            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3208            }            }
3209          }          }
3210        else        else
# Line 2328  for (;;) Line 3212  for (;;)
3212        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3213          {          {
3214          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3215            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3216              if (eptr >= md->end_subject)
3217                {
3218                SCHECK_PARTIAL();
3219                MRRETURN(MATCH_NOMATCH);
3220                }
3221              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3222              }
3223          }          }
3224    
3225        if (min == max) continue;        if (min == max) continue;
# Line 2339  for (;;) Line 3230  for (;;)
3230          /* UTF-8 mode */          /* UTF-8 mode */
3231          if (utf8)          if (utf8)
3232            {            {
3233            register int d;            register unsigned int d;
3234            for (fi = min;; fi++)            for (fi = min;; fi++)
3235              {              {
3236              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3237              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3238                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3239                if (eptr >= md->end_subject)
3240                  {
3241                  SCHECK_PARTIAL();
3242                  MRRETURN(MATCH_NOMATCH);
3243                  }
3244              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3245              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3246              }              }
3247            }            }
3248          else          else
# Line 2355  for (;;) Line 3251  for (;;)
3251            {            {
3252            for (fi = min;; fi++)            for (fi = min;; fi++)
3253              {              {
3254              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3255              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3256              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3257                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3258                  {
3259                  SCHECK_PARTIAL();
3260                  MRRETURN(MATCH_NOMATCH);
3261                  }
3262                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3263              }              }
3264            }            }
3265          /* Control never gets here */          /* Control never gets here */
# Line 2374  for (;;) Line 3275  for (;;)
3275          /* UTF-8 mode */          /* UTF-8 mode */
3276          if (utf8)          if (utf8)
3277            {            {
3278            register int d;            register unsigned int d;
3279            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3280              {              {
3281              int len = 1;              int len = 1;
3282              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3283                  {
3284                  SCHECK_PARTIAL();
3285                  break;
3286                  }
3287              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3288              if (fc == d) break;              if (fc == d) break;
3289              eptr += len;              eptr += len;
3290              }              }
3291              if (possessive) continue;
3292            for(;;)            for(;;)
3293              {              {
3294              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3295              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3296              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3297              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2397  for (;;) Line 3303  for (;;)
3303            {            {
3304            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3305              {              {
3306              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3307                  {
3308                  SCHECK_PARTIAL();
3309                  break;
3310                  }
3311                if (fc == *eptr) break;
3312              eptr++;              eptr++;
3313              }              }
3314              if (possessive) continue;
3315            while (eptr >= pp)            while (eptr >= pp)
3316              {              {
3317              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3318              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3319              eptr--;              eptr--;
3320              }              }
3321            }            }
3322    
3323          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3324          }          }
3325        }        }
3326      /* Control never gets here */      /* Control never gets here */
# Line 2431  for (;;) Line 3343  for (;;)
3343      ecode += 3;      ecode += 3;
3344      goto REPEATTYPE;      goto REPEATTYPE;
3345    
3346        case OP_TYPEPOSSTAR:
3347        possessive = TRUE;
3348        min = 0;
3349        max = INT_MAX;
3350        ecode++;
3351        goto REPEATTYPE;
3352    
3353        case OP_TYPEPOSPLUS:
3354        possessive = TRUE;
3355        min = 1;
3356        max = INT_MAX;
3357        ecode++;
3358        goto REPEATTYPE;
3359    
3360        case OP_TYPEPOSQUERY:
3361        possessive = TRUE;
3362        min = 0;
3363        max = 1;
3364        ecode++;
3365        goto REPEATTYPE;
3366    
3367        case OP_TYPEPOSUPTO:
3368        possessive = TRUE;
3369        min = 0;
3370        max = GET2(ecode, 1);
3371        ecode += 3;
3372        goto REPEATTYPE;
3373    
3374      case OP_TYPESTAR:      case OP_TYPESTAR:
3375      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3376      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2462  for (;;) Line 3402  for (;;)
3402    
3403      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3404      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3405      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3406      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3407      and single-bytes. */      and single-bytes. */
3408    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3409      if (min > 0)      if (min > 0)
3410        {        {
3411  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2477  for (;;) Line 3414  for (;;)
3414          switch(prop_type)          switch(prop_type)
3415            {            {
3416            case PT_ANY:            case PT_ANY:
3417            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3418            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3419              {              {
3420              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3421              GETCHARINC(c, eptr);                {
3422                  SCHECK_PARTIAL();
3423                  MRRETURN(MATCH_NOMATCH);
3424                  }
3425                GETCHARINCTEST(c, eptr);
3426              }              }
3427            break;            break;
3428    
3429            case PT_LAMP:            case PT_LAMP:
3430            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3431              {              {
3432              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3433              GETCHARINC(c, eptr);                {
3434              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3435                  MRRETURN(MATCH_NOMATCH);
3436                  }
3437                GETCHARINCTEST(c, eptr);
3438                prop_chartype = UCD_CHARTYPE(c);
3439              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3440                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3441                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3442                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3443              }              }
3444            break;            break;
3445    
3446            case PT_GC:            case PT_GC:
3447            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3448              {              {
3449              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3450              GETCHARINC(c, eptr);                {
3451              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3452                  MRRETURN(MATCH_NOMATCH);
3453                  }
3454                GETCHARINCTEST(c, eptr);
3455                prop_category = UCD_CATEGORY(c);
3456              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3457                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3458              }              }
3459            break;            break;
3460    
3461            case PT_PC:            case PT_PC:
3462            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3463              {              {
3464              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3465              GETCHARINC(c, eptr);                {
3466              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3467                  MRRETURN(MATCH_NOMATCH);
3468                  }
3469                GETCHARINCTEST(c, eptr);
3470                prop_chartype = UCD_CHARTYPE(c);
3471              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3472                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3473              }              }
3474            break;            break;
3475    
3476            case PT_SC:            case PT_SC:
3477            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3478              {              {
3479              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3480              GETCHARINC(c, eptr);                {
3481              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3482                  MRRETURN(MATCH_NOMATCH);
3483                  }
3484                GETCHARINCTEST(c, eptr);
3485                prop_script = UCD_SCRIPT(c);
3486              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3487                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3488              }              }
3489            break;            break;
3490    
3491            default:            default:
3492            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3493            }            }
3494          }          }
3495    
# Line 2544  for (;;) Line 3500  for (;;)
3500          {          {
3501          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3502            {            {
3503              if (eptr >= md->end_subject)
3504                {
3505                SCHECK_PARTIAL();
3506                MRRETURN(MATCH_NOMATCH);
3507                }
3508            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3509            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3510            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3511            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3512              {              {
3513              int len = 1;              int len = 1;
3514              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3515                {                else { GETCHARLEN(c, eptr, len); }
3516                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3517              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3518              eptr += len;              eptr += len;
3519              }              }
# Line 2572  for (;;) Line 3531  for (;;)
3531          case OP_ANY:          case OP_ANY:
3532          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3533            {            {
3534            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3535               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))              {
3536              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3537                MRRETURN(MATCH_NOMATCH);
3538                }
3539              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3540              eptr++;
3541              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3542              }
3543            break;
3544    
3545            case OP_ALLANY:
3546            for (i = 1; i <= min; i++)
3547              {
3548              if (eptr >= md->end_subject)
3549                {
3550                SCHECK_PARTIAL();
3551                MRRETURN(MATCH_NOMATCH);
3552                }
3553              eptr++;
3554            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3555            }            }
3556          break;          break;
3557    
3558          case OP_ANYBYTE:          case OP_ANYBYTE:
3559            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3560          eptr += min;          eptr += min;
3561          break;          break;
3562    
3563            case OP_ANYNL:
3564            for (i = 1; i <= min; i++)
3565              {
3566              if (eptr >= md->end_subject)
3567                {
3568                SCHECK_PARTIAL();
3569                MRRETURN(MATCH_NOMATCH);
3570                }
3571              GETCHARINC(c, eptr);
3572              switch(c)
3573                {
3574                default: MRRETURN(MATCH_NOMATCH);
3575                case 0x000d:
3576                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3577                break;
3578    
3579                case 0x000a:
3580                break;
3581    
3582                case 0x000b:
3583                case 0x000c:
3584                case 0x0085:
3585                case 0x2028:
3586                case 0x2029:
3587                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3588                break;
3589                }
3590              }
3591            break;
3592    
3593            case OP_NOT_HSPACE:
3594            for (i = 1; i <= min; i++)
3595              {
3596              if (eptr >= md->end_subject)
3597                {
3598                SCHECK_PARTIAL();
3599                MRRETURN(MATCH_NOMATCH);
3600                }
3601              GETCHARINC(c, eptr);
3602              switch(c)
3603                {
3604                default: break;
3605                case 0x09:      /* HT */
3606                case 0x20:      /* SPACE */
3607                case 0xa0:      /* NBSP */
3608                case 0x1680:    /* OGHAM SPACE MARK */
3609                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3610                case 0x2000:    /* EN QUAD */
3611                case 0x2001:    /* EM QUAD */
3612                case 0x2002:    /* EN SPACE */
3613                case 0x2003:    /* EM SPACE */
3614                case 0x2004:    /* THREE-PER-EM SPACE */
3615                case 0x2005:    /* FOUR-PER-EM SPACE */
3616                case 0x2006:    /* SIX-PER-EM SPACE */
3617                case 0x2007:    /* FIGURE SPACE */
3618                case 0x2008:    /* PUNCTUATION SPACE */
3619                case 0x2009:    /* THIN SPACE */
3620                case 0x200A:    /* HAIR SPACE */
3621                case 0x202f:    /* NARROW NO-BREAK SPACE */
3622                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3623                case 0x3000:    /* IDEOGRAPHIC SPACE */
3624                MRRETURN(MATCH_NOMATCH);
3625                }
3626              }
3627            break;
3628    
3629            case OP_HSPACE:
3630            for (i = 1; i <= min; i++)
3631              {
3632              if (eptr >= md->end_subject)
3633                {
3634                SCHECK_PARTIAL();
3635                MRRETURN(MATCH_NOMATCH);
3636                }
3637              GETCHARINC(c, eptr);
3638              switch(c)
3639                {
3640                default: MRRETURN(MATCH_NOMATCH);
3641                case 0x09:      /* HT */
3642                case 0x20:      /* SPACE */
3643                case 0xa0:      /* NBSP */
3644                case 0x1680:    /* OGHAM SPACE MARK */
3645                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3646                case 0x2000:    /* EN QUAD */
3647                case 0x2001:    /* EM QUAD */
3648                case 0x2002:    /* EN SPACE */
3649                case 0x2003:    /* EM SPACE */
3650                case 0x2004:    /* THREE-PER-EM SPACE */
3651                case 0x2005:    /* FOUR-PER-EM SPACE */
3652                case 0x2006:    /* SIX-PER-EM SPACE */
3653                case 0x2007:    /* FIGURE SPACE */
3654                case 0x2008:    /* PUNCTUATION SPACE */
3655                case 0x2009:    /* THIN SPACE */
3656                case 0x200A:    /* HAIR SPACE */
3657                case 0x202f:    /* NARROW NO-BREAK SPACE */
3658                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3659                case 0x3000:    /* IDEOGRAPHIC SPACE */
3660                break;
3661                }
3662              }
3663            break;
3664    
3665            case OP_NOT_VSPACE:
3666            for (i = 1; i <= min; i++)
3667              {
3668              if (eptr >= md->end_subject)
3669                {
3670                SCHECK_PARTIAL();
3671                MRRETURN(MATCH_NOMATCH);
3672                }
3673              GETCHARINC(c, eptr);
3674              switch(c)
3675                {
3676                default: break;
3677                case 0x0a:      /* LF */
3678                case 0x0b:      /* VT */
3679                case 0x0c:      /* FF */
3680                case 0x0d:      /* CR */
3681                case 0x85:      /* NEL */
3682                case 0x2028:    /* LINE SEPARATOR */
3683                case 0x2029:    /* PARAGRAPH SEPARATOR */
3684                MRRETURN(MATCH_NOMATCH);
3685                }
3686              }
3687            break;
3688    
3689            case OP_VSPACE:
3690            for (i = 1; i <= min; i++)
3691              {
3692              if (eptr >= md->end_subject)
3693                {
3694                SCHECK_PARTIAL();
3695                MRRETURN(MATCH_NOMATCH);
3696                }
3697              GETCHARINC(c, eptr);
3698              switch(c)
3699                {
3700                default: MRRETURN(MATCH_NOMATCH);
3701                case 0x0a:      /* LF */
3702                case 0x0b:      /* VT */
3703                case 0x0c:      /* FF */
3704                case 0x0d:      /* CR */
3705                case 0x85:      /* NEL */
3706                case 0x2028:    /* LINE SEPARATOR */
3707                case 0x2029:    /* PARAGRAPH SEPARATOR */
3708                break;
3709                }
3710              }
3711            break;
3712    
3713          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3714          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3715            {            {
3716            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3717                {
3718                SCHECK_PARTIAL();
3719                MRRETURN(MATCH_NOMATCH);
3720                }
3721            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3722            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3723              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3724            }            }
3725          break;          break;
3726    
3727          case OP_DIGIT:          case OP_DIGIT:
3728          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3729            {            {
3730            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3731               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3732              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3733                MRRETURN(MATCH_NOMATCH);
3734                }
3735              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3736                MRRETURN(MATCH_NOMATCH);
3737            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3738            }            }
3739          break;          break;
# Line 2606  for (;;) Line 3741  for (;;)
3741          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3742          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3743            {            {
3744            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3745               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3746              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3747            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3748                }
3749              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3750                MRRETURN(MATCH_NOMATCH);
3751              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3752            }            }
3753          break;          break;
3754    
3755          case OP_WHITESPACE:          case OP_WHITESPACE:
3756          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3757            {            {
3758            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3759               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3760              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3761                MRRETURN(MATCH_NOMATCH);
3762                }
3763              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3764                MRRETURN(MATCH_NOMATCH);
3765            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3766            }            }
3767          break;          break;
# Line 2626  for (;;) Line 3769  for (;;)
3769          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3770          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3771            {            {
3772            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3773               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3774              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3775            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3776                }
3777              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3778                MRRETURN(MATCH_NOMATCH);
3779              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3780            }            }
3781          break;          break;
3782    
3783          case OP_WORDCHAR:          case OP_WORDCHAR:
3784          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)