/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 550 by ph10, Sun Oct 10 16:24:11 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 119  Returns:     nothing
119  static void  static void
120  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121  {  {
122  int c;  unsigned int c;
123  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124  while (length-- > 0)  while (length-- > 0)
125    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 146  Returns:      TRUE if matched
146  */  */
147    
148  static BOOL  static BOOL
149  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
150    unsigned long int ims)    unsigned long int ims)
151  {  {
152  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 150  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 169  return TRUE; Line 213  return TRUE;
213  ****************************************************************************  ****************************************************************************
214                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
215    
216  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
217  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
218  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
219  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
220  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
221    fine.
222  It turns out that on non-Unix systems there are problems with programs that  
223  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
224  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
225  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
226    been known for decades.) So....
227    
228  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
230  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253  /* These versions of the macros use the stack, as normal */  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261    /* These versions of the macros use the stack, as normal. There are debugging
262    versions and production versions. Note that the "rw" argument of RMATCH isn't
263    actually used in this definition. */
264    
265  #ifndef NO_RECURSE  #ifndef NO_RECURSE
266  #define REGISTER register  #define REGISTER register
267  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
268    #ifdef PCRE_DEBUG
269    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270      { \
271      printf("match() called in line %d\n", __LINE__); \
272      rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273      printf("to line %d\n", __LINE__); \
274      }
275    #define RRETURN(ra) \
276      { \
277      printf("match() returned %d from line %d ", ra, __LINE__); \
278      return ra; \
279      }
280    #else
281    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282      rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
284    #endif
285    
286  #else  #else
287    
288    
289  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
290  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291  match(), which never changes. */  argument of match(), which never changes. */
292    
293  #define REGISTER  #define REGISTER
294    
295  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298    if (setjmp(frame->Xwhere) == 0)\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299      {\    frame->Xwhere = rw; \
300      newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301      newframe->Xecode = rb;\    newframe->Xecode = rb;\
302      newframe->Xoffset_top = rc;\    newframe->Xmstart = mstart;\
303      newframe->Xims = re;\    newframe->Xmarkptr = markptr;\
304      newframe->Xeptrb = rf;\    newframe->Xoffset_top = rc;\
305      newframe->Xflags = rg;\    newframe->Xims = re;\
306      newframe->Xprevframe = frame;\    newframe->Xeptrb = rf;\
307      frame = newframe;\    newframe->Xflags = rg;\
308      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xrdepth = frame->Xrdepth + 1;\
309      goto HEAP_RECURSE;\    newframe->Xprevframe = frame;\
310      }\    frame = newframe;\
311    else\    DPRINTF(("restarting from line %d\n", __LINE__));\
312      {\    goto HEAP_RECURSE;\
313      DPRINTF(("longjumped back to line %d\n", __LINE__));\    L_##rw:\
314      frame = md->thisframe;\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     rx = frame->Xresult;\  
     }\  
315    }    }
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      frame->Xresult = ra;\      rrc = ra;\
325      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
326      }\      }\
327    return ra;\    return ra;\
328    }    }
# Line 250  typedef struct heapframe { Line 335  typedef struct heapframe {
335    
336    /* Function arguments that may change */    /* Function arguments that may change */
337    
338    const uschar *Xeptr;    USPTR Xeptr;
339    const uschar *Xecode;    const uschar *Xecode;
340      USPTR Xmstart;
341      USPTR Xmarkptr;
342    int Xoffset_top;    int Xoffset_top;
343    long int Xims;    long int Xims;
344    eptrblock *Xeptrb;    eptrblock *Xeptrb;
345    int Xflags;    int Xflags;
346      unsigned int Xrdepth;
347    
348    /* Function local variables */    /* Function local variables */
349    
350    const uschar *Xcallpat;    USPTR Xcallpat;
351    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
352    const uschar *Xdata;    USPTR Xcharptr;
353    const uschar *Xnext;  #endif
354    const uschar *Xpp;    USPTR Xdata;
355    const uschar *Xprev;    USPTR Xnext;
356    const uschar *Xsaved_eptr;    USPTR Xpp;
357      USPTR Xprev;
358      USPTR Xsaved_eptr;
359    
360    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
361    
362    BOOL Xcur_is_word;    BOOL Xcur_is_word;
363    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
364    BOOL Xprev_is_word;    BOOL Xprev_is_word;
365    
366    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
367    
368  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
369    int Xprop_type;    int Xprop_type;
370      int Xprop_value;
371    int Xprop_fail_result;    int Xprop_fail_result;
372    int Xprop_category;    int Xprop_category;
373    int Xprop_chartype;    int Xprop_chartype;
374    int Xprop_othercase;    int Xprop_script;
375    int Xprop_test_against;    int Xoclength;
376    int *Xprop_test_variable;    uschar Xocchars[8];
377  #endif  #endif
378    
379      int Xcodelink;
380    int Xctype;    int Xctype;
381    int Xfc;    unsigned int Xfc;
382    int Xfi;    int Xfi;
383    int Xlength;    int Xlength;
384    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 392  typedef struct heapframe {
392    
393    eptrblock Xnewptrb;    eptrblock Xnewptrb;
394    
395    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
396    
397    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
398    
399  } heapframe;  } heapframe;
400    
# Line 320  typedef struct heapframe { Line 410  typedef struct heapframe {
410  *         Match from current position            *  *         Match from current position            *
411  *************************************************/  *************************************************/
412    
413  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
414  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
415  same response.  same response. */
416    
417    /* These macros pack up tests that are used for partial matching, and which
418    appears several times in the code. We set the "hit end" flag if the pointer is
419    at the end of the subject and also past the start of the subject (i.e.
420    something has been matched). For hard partial matching, we then return
421    immediately. The second one is used when we already know we are past the end of
422    the subject. */
423    
424    #define CHECK_PARTIAL()\
425      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
426        {\
427        md->hitend = TRUE;\
428        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
429        }
430    
431    #define SCHECK_PARTIAL()\
432      if (md->partial != 0 && eptr > mstart)\
433        {\
434        md->hitend = TRUE;\
435        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
436        }
437    
438  Performance note: It might be tempting to extract commonly used fields from the  
439  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
440    the md structure (e.g. utf8, end_subject) into individual variables to improve
441  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
442  made performance worse.  made performance worse.
443    
444  Arguments:  Arguments:
445     eptr        pointer in subject     eptr        pointer to current character in subject
446     ecode       position in code     ecode       pointer to current position in compiled code
447       mstart      pointer to the current match start position (can be modified
448                     by encountering \K)
449       markptr     pointer to the most recent MARK name, or NULL
450     offset_top  current top pointer     offset_top  current top pointer
451     md          pointer to "static" info for the match     md          pointer to "static" info for the match
452     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 454  Arguments:
454                   brackets - for testing for empty matches                   brackets - for testing for empty matches
455     flags       can contain     flags       can contain
456                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
457                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
458                       group that can match an empty string
459       rdepth      the recursion depth
460    
461  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
462                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
463                   a negative MATCH_xxx value for PRUNE, SKIP, etc
464                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
465                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
466  */  */
467    
468  static int  static int
469  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
470    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
471    int flags)    eptrblock *eptrb, int flags, unsigned int rdepth)
472  {  {
473  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
474  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
475  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
476    
477    register int  rrc;         /* Returns from recursive calls */
478    register int  i;           /* Used for loops not involving calls to RMATCH() */
479    register unsigned int c;   /* Character values not kept over RMATCH() calls */
480    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
481    
482  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
483  register int  i;      /* Used for loops not involving calls to RMATCH() */  int condcode;
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
484    
485  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
486  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 371  heap whenever RMATCH() does a "recursion Line 489  heap whenever RMATCH() does a "recursion
489    
490  #ifdef NO_RECURSE  #ifdef NO_RECURSE
491  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
494    
495  /* Copy in the original argument variables */  /* Copy in the original argument variables */
496    
497  frame->Xeptr = eptr;  frame->Xeptr = eptr;
498  frame->Xecode = ecode;  frame->Xecode = ecode;
499    frame->Xmstart = mstart;
500    frame->Xmarkptr = markptr;
501  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
502  frame->Xims = ims;  frame->Xims = ims;
503  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
504  frame->Xflags = flags;  frame->Xflags = flags;
505    frame->Xrdepth = rdepth;
506    
507  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
508    
# Line 390  HEAP_RECURSE: Line 512  HEAP_RECURSE:
512    
513  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
514  #define ecode              frame->Xecode  #define ecode              frame->Xecode
515    #define mstart             frame->Xmstart
516    #define markptr            frame->Xmarkptr
517  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
518  #define ims                frame->Xims  #define ims                frame->Xims
519  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
520  #define flags              frame->Xflags  #define flags              frame->Xflags
521    #define rdepth             frame->Xrdepth
522    
523  /* Ditto for the local variables */  /* Ditto for the local variables */
524    
# Line 401  HEAP_RECURSE: Line 526  HEAP_RECURSE:
526  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
527  #endif  #endif
528  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
529    #define codelink           frame->Xcodelink
530  #define data               frame->Xdata  #define data               frame->Xdata
531  #define next               frame->Xnext  #define next               frame->Xnext
532  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 411  HEAP_RECURSE: Line 537  HEAP_RECURSE:
537    
538  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
539  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
540  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
541    
542  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
543    
544  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
545  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
546    #define prop_value         frame->Xprop_value
547  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
548  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
549  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
550  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
551  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
552  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
553  #endif  #endif
554    
555  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 573  HEAP_RECURSE:
573  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
574  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
575    
576  #else  #else         /* NO_RECURSE not defined */
577  #define fi i  #define fi i
578  #define fc c  #define fc c
579    
580    
581  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
582  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
583  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
584  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
585  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
586  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
587  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
588  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
589  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
590                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
591  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
592                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
593  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
594  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
595  BOOL prev_is_word;  BOOL prev_is_word;
596    
597  unsigned long int original_ims;  unsigned long int original_ims;
598    
599  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
600  int prop_type;  int prop_type;
601    int prop_value;
602  int prop_fail_result;  int prop_fail_result;
603  int prop_category;  int prop_category;
604  int prop_chartype;  int prop_chartype;
605  int prop_othercase;  int prop_script;
606  int prop_test_against;  int oclength;
607  int *prop_test_variable;  uschar occhars[8];
608  #endif  #endif
609    
610    int codelink;
611  int ctype;  int ctype;
612  int length;  int length;
613  int max;  int max;
# Line 493  int save_offset1, save_offset2, save_off Line 620  int save_offset1, save_offset2, save_off
620  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
621    
622  eptrblock newptrb;  eptrblock newptrb;
623  #endif  #endif     /* NO_RECURSE */
624    
625  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
626  variables. */  variables. */
627    
628  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
629    prop_value = 0;
630  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
631  #endif  #endif
632    
633  /* OK, now we can get on with the real code of the function. Recursion is  
634  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
635  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
636  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
637  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
638  performance when true recursion is being used. */  
639    TAIL_RECURSE:
640    
641    /* OK, now we can get on with the real code of the function. Recursive calls
642    are specified by the macro RMATCH and RRETURN is used to return. When
643    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
644    and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
645    defined). However, RMATCH isn't like a function call because it's quite a
646    complicated macro. It has to be used in one particular way. This shouldn't,
647    however, impact performance when true recursion is being used. */
648    
649    #ifdef SUPPORT_UTF8
650    utf8 = md->utf8;       /* Local copy of the flag */
651    #else
652    utf8 = FALSE;
653    #endif
654    
655    /* First check that we haven't called match() too many times, or that we
656    haven't exceeded the recursive call limit. */
657    
658  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
659    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
660    
661  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
662    
663  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
664  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
665  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
666  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
667    When match() is called in other circumstances, don't add to the chain. The
668    match_cbegroup flag must NOT be used with tail recursion, because the memory
669    block that is used is on the stack, so a new one may be required for each
670    match(). */
671    
672  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
673    {    {
   newptrb.epb_prev = eptrb;  
674    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
675      newptrb.epb_prev = eptrb;
676    eptrb = &newptrb;    eptrb = &newptrb;
677    }    }
678    
679  /* Now start processing the operations. */  /* Now start processing the opcodes. */
680    
681  for (;;)  for (;;)
682    {    {
683      minimize = possessive = FALSE;
684    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
685    
686    if (op > OP_BRA)    switch(op)
687      {      {
688      number = op - OP_BRA;      case OP_MARK:
689        markptr = ecode + 2;
690      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691      number from a dummy opcode at the start. */        ims, eptrb, flags, RM55);
692    
693      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
695        argument. It is passed back in md->start_match_ptr (an overloading of that
696        variable). If it does match, we reset that variable to the current subject
697        position and return MATCH_SKIP. Otherwise, pass back the return code
698        unaltered. */
699    
700        if (rrc == MATCH_SKIP_ARG &&
701            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702          {
703          md->start_match_ptr = eptr;
704          RRETURN(MATCH_SKIP);
705          }
706    
707        if (md->mark == NULL) md->mark = markptr;
708        RRETURN(rrc);
709    
710        case OP_FAIL:
711        MRRETURN(MATCH_NOMATCH);
712    
713        case OP_COMMIT:
714        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
715          ims, eptrb, flags, RM52);
716        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
717        MRRETURN(MATCH_COMMIT);
718    
719        case OP_PRUNE:
720        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
721          ims, eptrb, flags, RM51);
722        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
723        MRRETURN(MATCH_PRUNE);
724    
725        case OP_PRUNE_ARG:
726        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
727          ims, eptrb, flags, RM56);
728        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
729        md->mark = ecode + 2;
730        RRETURN(MATCH_PRUNE);
731    
732        case OP_SKIP:
733        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734          ims, eptrb, flags, RM53);
735        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
736        md->start_match_ptr = eptr;   /* Pass back current position */
737        MRRETURN(MATCH_SKIP);
738    
739        case OP_SKIP_ARG:
740        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
741          ims, eptrb, flags, RM57);
742        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
743    
744        /* Pass back the current skip name by overloading md->start_match_ptr and
745        returning the special MATCH_SKIP_ARG return code. This will either be
746        caught by a matching MARK, or get to the top, where it is treated the same
747        as PRUNE. */
748    
749        md->start_match_ptr = ecode + 2;
750        RRETURN(MATCH_SKIP_ARG);
751    
752        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
753        the alt that is at the start of the current branch. This makes it possible
754        to skip back past alternatives that precede the THEN within the current
755        branch. */
756    
757        case OP_THEN:
758        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
759          ims, eptrb, flags, RM54);
760        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
761        md->start_match_ptr = ecode - GET(ecode, 1);
762        MRRETURN(MATCH_THEN);
763    
764        case OP_THEN_ARG:
765        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
766          offset_top, md, ims, eptrb, flags, RM58);
767        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
768        md->start_match_ptr = ecode - GET(ecode, 1);
769        md->mark = ecode + LINK_SIZE + 2;
770        RRETURN(MATCH_THEN);
771    
772        /* Handle a capturing bracket. If there is space in the offset vector, save
773        the current subject position in the working slot at the top of the vector.
774        We mustn't change the current values of the data slot, because they may be
775        set from a previous iteration of this group, and be referred to by a
776        reference inside the group.
777    
778        If the bracket fails to match, we need to restore this value and also the
779        values of the final offsets, in case they were set by a previous iteration
780        of the same bracket.
781    
782        If there isn't enough space in the offset vector, treat this as if it were
783        a non-capturing bracket. Don't worry about setting the flag for the error
784        case here; that is handled in the code for KET. */
785    
786        case OP_CBRA:
787        case OP_SCBRA:
788        number = GET2(ecode, 1+LINK_SIZE);
789      offset = number << 1;      offset = number << 1;
790    
791  #ifdef DEBUG  #ifdef PCRE_DEBUG
792      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
793        printf("subject=");
794      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
795      printf("\n");      printf("\n");
796  #endif  #endif
# Line 582  for (;;) Line 803  for (;;)
803        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
804    
805        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
806        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
807            (int)(eptr - md->start_subject);
808    
809          flags = (op == OP_SCBRA)? match_cbegroup : 0;
810        do        do
811          {          {
812          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
813            match_isgroup);            ims, eptrb, flags, RM1);
814          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
815                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
816              RRETURN(rrc);
817          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
818          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
819          }          }
# Line 600  for (;;) Line 825  for (;;)
825        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
826        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
827    
828          if (rrc != MATCH_THEN) md->mark = markptr;
829        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
830        }        }
831    
832      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
833        as a non-capturing bracket. */
834    
835      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
836      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
837    
838    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
839    
840    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
841      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
842      case OP_BRA:     /* Non-capturing bracket: optimized */  
843      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
844      do      final alternative within the brackets, we would return the result of a
845        recursive call to match() whatever happened. We can reduce stack usage by
846        turning this into a tail recursion, except in the case when match_cbegroup
847        is set.*/
848    
849        case OP_BRA:
850        case OP_SBRA:
851        DPRINTF(("start non-capturing bracket\n"));
852        flags = (op >= OP_SBRA)? match_cbegroup : 0;
853        for (;;)
854        {        {
855        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
856          match_isgroup);          {
857        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
858              {
859              ecode += _pcre_OP_lengths[*ecode];
860              DPRINTF(("bracket 0 tail recursion\n"));
861              goto TAIL_RECURSE;
862              }
863    
864            /* Possibly empty group; can't use tail recursion. */
865    
866            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
867              eptrb, flags, RM48);
868            if (rrc == MATCH_NOMATCH) md->mark = markptr;
869            RRETURN(rrc);
870            }
871    
872          /* For non-final alternatives, continue the loop for a NOMATCH result;
873          otherwise return. */
874    
875          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
876            eptrb, flags, RM2);
877          if (rrc != MATCH_NOMATCH &&
878              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
879            RRETURN(rrc);
880        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
881        }        }
882      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
883    
884      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
885      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
886      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
887      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
888        obeyed, we can use tail recursion to avoid using another stack frame. */
889    
890      case OP_COND:      case OP_COND:
891      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
892        codelink= GET(ecode, 1);
893    
894        /* Because of the way auto-callout works during compile, a callout item is
895        inserted between OP_COND and an assertion condition. */
896    
897        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
898          {
899          if (pcre_callout != NULL)
900            {
901            pcre_callout_block cb;
902            cb.version          = 1;   /* Version 1 of the callout block */
903            cb.callout_number   = ecode[LINK_SIZE+2];
904            cb.offset_vector    = md->offset_vector;
905            cb.subject          = (PCRE_SPTR)md->start_subject;
906            cb.subject_length   = (int)(md->end_subject - md->start_subject);
907            cb.start_match      = (int)(mstart - md->start_subject);
908            cb.current_position = (int)(eptr - md->start_subject);
909            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
910            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
911            cb.capture_top      = offset_top/2;
912            cb.capture_last     = md->capture_last;
913            cb.callout_data     = md->callout_data;
914            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
915            if (rrc < 0) RRETURN(rrc);
916            }
917          ecode += _pcre_OP_lengths[OP_CALLOUT];
918          }
919    
920        condcode = ecode[LINK_SIZE+1];
921    
922        /* Now see what the actual condition is */
923    
924        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
925          {
926          if (md->recursive == NULL)                /* Not recursing => FALSE */
927            {
928            condition = FALSE;
929            ecode += GET(ecode, 1);
930            }
931          else
932            {
933            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
934            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
935    
936            /* If the test is for recursion into a specific subpattern, and it is
937            false, but the test was set up by name, scan the table to see if the
938            name refers to any other numbers, and test them. The condition is true
939            if any one is set. */
940    
941            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
942              {
943              uschar *slotA = md->name_table;
944              for (i = 0; i < md->name_count; i++)
945                {
946                if (GET2(slotA, 0) == recno) break;
947                slotA += md->name_entry_size;
948                }
949    
950              /* Found a name for the number - there can be only one; duplicate
951              names for different numbers are allowed, but not vice versa. First
952              scan down for duplicates. */
953    
954              if (i < md->name_count)
955                {
956                uschar *slotB = slotA;
957                while (slotB > md->name_table)
958                  {
959                  slotB -= md->name_entry_size;
960                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961                    {
962                    condition = GET2(slotB, 0) == md->recursive->group_num;
963                    if (condition) break;
964                    }
965                  else break;
966                  }
967    
968                /* Scan up for duplicates */
969    
970                if (!condition)
971                  {
972                  slotB = slotA;
973                  for (i++; i < md->name_count; i++)
974                    {
975                    slotB += md->name_entry_size;
976                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
977                      {
978                      condition = GET2(slotB, 0) == md->recursive->group_num;
979                      if (condition) break;
980                      }
981                    else break;
982                    }
983                  }
984                }
985              }
986    
987            /* Chose branch according to the condition */
988    
989            ecode += condition? 3 : GET(ecode, 1);
990            }
991          }
992    
993        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
994        {        {
995        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
996        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
997          (md->recursive != NULL) :  
998          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
999        RMATCH(rrc, eptr, ecode + (condition?        scan the table to see if the name refers to any other numbers, and test
1000          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        them. The condition is true if any one is set. This is tediously similar
1001          offset_top, md, ims, eptrb, match_isgroup);        to the code above, but not close enough to try to amalgamate. */
1002        RRETURN(rrc);  
1003          if (!condition && condcode == OP_NCREF)
1004            {
1005            int refno = offset >> 1;
1006            uschar *slotA = md->name_table;
1007    
1008            for (i = 0; i < md->name_count; i++)
1009              {
1010              if (GET2(slotA, 0) == refno) break;
1011              slotA += md->name_entry_size;
1012              }
1013    
1014            /* Found a name for the number - there can be only one; duplicate names
1015            for different numbers are allowed, but not vice versa. First scan down
1016            for duplicates. */
1017    
1018            if (i < md->name_count)
1019              {
1020              uschar *slotB = slotA;
1021              while (slotB > md->name_table)
1022                {
1023                slotB -= md->name_entry_size;
1024                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1025                  {
1026                  offset = GET2(slotB, 0) << 1;
1027                  condition = offset < offset_top &&
1028                    md->offset_vector[offset] >= 0;
1029                  if (condition) break;
1030                  }
1031                else break;
1032                }
1033    
1034              /* Scan up for duplicates */
1035    
1036              if (!condition)
1037                {
1038                slotB = slotA;
1039                for (i++; i < md->name_count; i++)
1040                  {
1041                  slotB += md->name_entry_size;
1042                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1043                    {
1044                    offset = GET2(slotB, 0) << 1;
1045                    condition = offset < offset_top &&
1046                      md->offset_vector[offset] >= 0;
1047                    if (condition) break;
1048                    }
1049                  else break;
1050                  }
1051                }
1052              }
1053            }
1054    
1055          /* Chose branch according to the condition */
1056    
1057          ecode += condition? 3 : GET(ecode, 1);
1058          }
1059    
1060        else if (condcode == OP_DEF)     /* DEFINE - always false */
1061          {
1062          condition = FALSE;
1063          ecode += GET(ecode, 1);
1064        }        }
1065    
1066      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1067      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1068        assertion. */
1069    
1070      else      else
1071        {        {
1072        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1073            match_condassert | match_isgroup);            match_condassert, RM3);
1074        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1075          {          {
1076          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1077            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1078          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1079          }          }
1080        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH &&
1081                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1082          {          {
1083          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1084          }          }
1085        else ecode += GET(ecode, 1);        else
1086        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
1087          match_isgroup);          condition = FALSE;
1088        RRETURN(rrc);          ecode += codelink;
1089            }
1090        }        }
     /* Control never reaches here */  
1091    
1092      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
1093      encountered. */      we can use tail recursion to avoid using another stack frame, except when
1094        match_cbegroup is required for an unlimited repeat of a possibly empty
1095        group. If the second alternative doesn't exist, we can just plough on. */
1096    
1097        if (condition || *ecode == OP_ALT)
1098          {
1099          ecode += 1 + LINK_SIZE;
1100          if (op == OP_SCOND)        /* Possibly empty group */
1101            {
1102            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1103            RRETURN(rrc);
1104            }
1105          else                       /* Group must match something */
1106            {
1107            flags = 0;
1108            goto TAIL_RECURSE;
1109            }
1110          }
1111        else                         /* Condition false & no alternative */
1112          {
1113          ecode += 1 + LINK_SIZE;
1114          }
1115        break;
1116    
1117    
1118        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1119        to close any currently open capturing brackets. */
1120    
1121        case OP_CLOSE:
1122        number = GET2(ecode, 1);
1123        offset = number << 1;
1124    
1125    #ifdef PCRE_DEBUG
1126          printf("end bracket %d at *ACCEPT", number);
1127          printf("\n");
1128    #endif
1129    
1130      case OP_CREF:      md->capture_last = number;
1131      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1132          {
1133          md->offset_vector[offset] =
1134            md->offset_vector[md->offset_end - number];
1135          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1136          if (offset_top <= offset) offset_top = offset + 2;
1137          }
1138      ecode += 3;      ecode += 3;
1139      break;      break;
1140    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1141    
1142        /* End of the pattern, either real or forced. If we are in a top-level
1143        recursion, we should restore the offsets appropriately and continue from
1144        after the call. */
1145    
1146        case OP_ACCEPT:
1147      case OP_END:      case OP_END:
1148      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1149        {        {
1150        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
1151        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
1152        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1153        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1154          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1155        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1156        ims = original_ims;        ims = original_ims;
1157        ecode = rec->after_call;        ecode = rec->after_call;
1158        break;        break;
1159        }        }
1160    
1161      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1162      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1163        the subject. In both cases, backtracking will then try other alternatives,
1164        if any. */
1165    
1166        if (eptr == mstart &&
1167            (md->notempty ||
1168              (md->notempty_atstart &&
1169                mstart == md->start_subject + md->start_offset)))
1170          MRRETURN(MATCH_NOMATCH);
1171    
1172        /* Otherwise, we have a match. */
1173    
1174        md->end_match_ptr = eptr;           /* Record where we ended */
1175        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1176        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1177    
1178        /* For some reason, the macros don't work properly if an expression is
1179        given as the argument to MRRETURN when the heap is in use. */
1180    
1181      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1182      md->end_match_ptr = eptr;          /* Record where we ended */      MRRETURN(rrc);
     md->end_offset_top = offset_top;   /* and how many extracts were taken */  
     RRETURN(MATCH_MATCH);  
1183    
1184      /* Change option settings */      /* Change option settings */
1185    
# Line 717  for (;;) Line 1199  for (;;)
1199      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1200      do      do
1201        {        {
1202        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1203          match_isgroup);          RM4);
1204        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1205        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1206            mstart = md->start_match_ptr;   /* In case \K reset it */
1207            break;
1208            }
1209          if (rrc != MATCH_NOMATCH &&
1210              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1211            RRETURN(rrc);
1212        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1213        }        }
1214      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1215      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1216    
1217      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1218    
# Line 738  for (;;) Line 1226  for (;;)
1226      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1227      continue;      continue;
1228    
1229      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1230        PRUNE, or COMMIT means we must assume failure without checking subsequent
1231        branches. */
1232    
1233      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1234      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1235      do      do
1236        {        {
1237        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1238          match_isgroup);          RM5);
1239        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1240        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1241            {
1242            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1243            break;
1244            }
1245          if (rrc != MATCH_NOMATCH &&
1246              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1247            RRETURN(rrc);
1248        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1249        }        }
1250      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 766  for (;;) Line 1263  for (;;)
1263  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1264      if (utf8)      if (utf8)
1265        {        {
1266        c = GET(ecode,1);        i = GET(ecode, 1);
1267        for (i = 0; i < c; i++)        while (i-- > 0)
1268          {          {
1269          eptr--;          eptr--;
1270          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1271          BACKCHAR(eptr)          BACKCHAR(eptr);
1272          }          }
1273        }        }
1274      else      else
# Line 780  for (;;) Line 1277  for (;;)
1277      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1278    
1279        {        {
1280        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1281        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1282        }        }
1283    
1284      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1285    
1286        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1287      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1288      break;      break;
1289    
# Line 800  for (;;) Line 1298  for (;;)
1298        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
1299        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1300        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1301        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1302        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1303        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1304        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1305        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1306        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1307        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1308        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1309        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1310        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1311        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1312        }        }
1313      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 837  for (;;) Line 1335  for (;;)
1335      case OP_RECURSE:      case OP_RECURSE:
1336        {        {
1337        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1338        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1339            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1340    
1341        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1342    
# Line 869  for (;;) Line 1362  for (;;)
1362    
1363        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1364              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1365        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1366    
1367        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1368        restore the offset and recursion data. */        restore the offset and recursion data. */
1369    
1370        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1371          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1372        do        do
1373          {          {
1374          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1375              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1376          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1377            {            {
1378              DPRINTF(("Recursion matched\n"));
1379            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1380            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1381              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1382            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1383              }
1384            else if (rrc != MATCH_NOMATCH &&
1385                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1386              {
1387              DPRINTF(("Recursion gave error %d\n", rrc));
1388              if (new_recursive.offset_save != stacksave)
1389                (pcre_free)(new_recursive.offset_save);
1390              RRETURN(rrc);
1391            }            }
         else if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1392    
1393          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1394          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 900  for (;;) Line 1401  for (;;)
1401        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1402        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1403          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1404        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1405        }        }
1406      /* Control never reaches here */      /* Control never reaches here */
1407    
# Line 909  for (;;) Line 1410  for (;;)
1410      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1411      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1412      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1413      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1414        the start-of-match value in case it was changed by \K. */
1415    
1416      case OP_ONCE:      case OP_ONCE:
1417        {      prev = ecode;
1418        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1419    
1420        do      do
1421          {
1422          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1423          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1424          {          {
1425          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          mstart = md->start_match_ptr;
1426            eptrb, match_isgroup);          break;
         if (rrc == MATCH_MATCH) break;  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         ecode += GET(ecode,1);  
1427          }          }
1428        while (*ecode == OP_ALT);        if (rrc != MATCH_NOMATCH &&
1429              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1430            RRETURN(rrc);
1431          ecode += GET(ecode,1);
1432          }
1433        while (*ecode == OP_ALT);
1434    
1435        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1436    
1437        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1438    
1439        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1440        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1441    
1442        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1443    
1444        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1445        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1446    
1447        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1448        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1449        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1450        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1451        course of events. */      course of events. */
1452    
1453        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1454          {        {
1455          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1456          break;        break;
1457          }        }
1458    
1459        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1460        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1461        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1462        opcode. */      any options that changed within the bracket before re-running it, so
1463        check the next opcode. */
1464    
1465        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1466          {        {
1467          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1468          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1469          }        }
1470    
1471        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1472          {        {
1473          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1474          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1475          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1476          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1477          }        goto TAIL_RECURSE;
1478        else  /* OP_KETRMAX */        }
1479          {      else  /* OP_KETRMAX */
1480          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1481          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1482          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1483          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1484          }        flags = 0;
1485          goto TAIL_RECURSE;
1486        }        }
1487      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1488    
1489      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1490      bracketed group and go to there. */      bracketed group and go to there. */
# Line 985  for (;;) Line 1493  for (;;)
1493      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1494      break;      break;
1495    
1496      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1497      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1498      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1499      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1500      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1501    
1502      case OP_BRAZERO:      case OP_BRAZERO:
1503        {        {
1504        next = ecode+1;        next = ecode+1;
1505        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1506        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1507        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1508        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1509        }        }
1510      break;      break;
1511    
1512      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1513        {        {
1514        next = ecode+1;        next = ecode+1;
1515        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1516        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1517        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518        ecode++;        ecode++;
1519        }        }
1520      break;      break;
1521    
1522      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1523      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1524      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1525      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1526          ecode = next + 1 + LINK_SIZE;
1527          }
1528        break;
1529    
1530        /* End of a group, repeated or non-repeating. */
1531    
1532      case OP_KET:      case OP_KET:
1533      case OP_KETRMIN:      case OP_KETRMIN:
1534      case OP_KETRMAX:      case OP_KETRMAX:
1535        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1536    
1537        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1538        infinite repeats of empty string matches, retrieve the subject start from
1539        the chain. Otherwise, set it NULL. */
1540    
1541        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1542          {
1543        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1544            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1545            *prev == OP_ONCE)        }
1546          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1547    
1548        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group or an atomic group, stop
1549        group number back at the start and if necessary complete handling an      matching and return MATCH_MATCH, but record the current high water mark for
1550        extraction by setting the offsets and bumping the high water mark. */      use by positive assertions. We also need to record the match start in case
1551        it was changed by \K. */
1552    
1553        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1554          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1555          number = *prev - OP_BRA;          *prev == OP_ONCE)
1556          {
1557          md->end_match_ptr = eptr;      /* For ONCE */
1558          md->end_offset_top = offset_top;
1559          md->start_match_ptr = mstart;
1560          MRRETURN(MATCH_MATCH);
1561          }
1562    
1563          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1564          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1565        bumping the high water mark. Note that whole-pattern recursion is coded as
1566        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1567        when the OP_END is reached. Other recursion is handled here. */
1568    
1569          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1570          offset = number << 1;        {
1571          number = GET2(prev, 1+LINK_SIZE);
1572          offset = number << 1;
1573    
1574  #ifdef DEBUG  #ifdef PCRE_DEBUG
1575          printf("end bracket %d", number);        printf("end bracket %d", number);
1576          printf("\n");        printf("\n");
1577  #endif  #endif
1578    
1579          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1580          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1581          into group 0, so it won't be picked up here. Instead, we catch it when          {
1582          the OP_END is reached. */          md->offset_vector[offset] =
1583              md->offset_vector[md->offset_end - number];
1584            md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1585            if (offset_top <= offset) offset_top = offset + 2;
1586            }
1587    
1588          /* Handle a recursively called group. Restore the offsets
1589          appropriately and continue from after the call. */
1590    
1591          if (md->recursive != NULL && md->recursive->group_num == number)
1592            {
1593            recursion_info *rec = md->recursive;
1594            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1595            md->recursive = rec->prevrec;
1596            memcpy(md->offset_vector, rec->offset_save,
1597              rec->saved_max * sizeof(int));
1598            offset_top = rec->save_offset_top;
1599            ecode = rec->after_call;
1600            ims = original_ims;
1601            break;
1602            }
1603          }
1604    
1605          if (number > 0)      /* For both capturing and non-capturing groups, reset the value of the ims
1606            {      flags, in case they got changed during the group. */
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
1607    
1608            /* Handle a recursively called group. Restore the offsets      ims = original_ims;
1609            appropriately and continue from after the call. */      DPRINTF(("ims reset to %02lx\n", ims));
1610    
1611            if (md->recursive != NULL && md->recursive->group_num == number)      /* For a non-repeating ket, just continue at this level. This also
1612              {      happens for a repeating ket if no characters were matched in the group.
1613              recursion_info *rec = md->recursive;      This is the forcible breaking of infinite loops as implemented in Perl
1614              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));      5.005. If there is an options reset, it will get obeyed in the normal
1615              md->recursive = rec->prevrec;      course of events. */
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
         }  
1616    
1617        /* Reset the value of the ims flags, in case they got changed during      if (*ecode == OP_KET || eptr == saved_eptr)
1618        the group. */        {
1619          ecode += 1 + LINK_SIZE;
1620          break;
1621          }
1622    
1623        ims = original_ims;      /* The repeating kets try the rest of the pattern or restart from the
1624        DPRINTF(("ims reset to %02lx\n", ims));      preceding bracket, in the appropriate order. In the second case, we can use
1625        tail recursion to avoid using another stack frame, unless we have an
1626        unlimited repeat of a group that can match an empty string. */
1627    
1628        /* For a non-repeating ket, just continue at this level. This also      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
1629    
1630        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KETRMIN)
1631          {
1632          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1633          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1634          if (flags != 0)    /* Could match an empty string */
1635          {          {
1636          ecode += 1 + LINK_SIZE;          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1637          break;          RRETURN(rrc);
         }  
   
       /* The repeating kets try the rest of the pattern or restart from the  
       preceding bracket, in the appropriate order. */  
   
       if (*ecode == OP_KETRMIN)  
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1638          }          }
1639          ecode = prev;
1640          goto TAIL_RECURSE;
1641        }        }
1642        else  /* OP_KETRMAX */
1643      RRETURN(MATCH_NOMATCH);        {
1644          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1645          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1646          ecode += 1 + LINK_SIZE;
1647          flags = 0;
1648          goto TAIL_RECURSE;
1649          }
1650        /* Control never gets here */
1651    
1652      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1653    
1654      case OP_CIRC:      case OP_CIRC:
1655      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1656      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1657        {        {
1658        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1659          RRETURN(MATCH_NOMATCH);            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1660            MRRETURN(MATCH_NOMATCH);
1661        ecode++;        ecode++;
1662        break;        break;
1663        }        }
# Line 1145  for (;;) Line 1666  for (;;)
1666      /* Start of subject assertion */      /* Start of subject assertion */
1667    
1668      case OP_SOD:      case OP_SOD:
1669      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1670      ecode++;      ecode++;
1671      break;      break;
1672    
1673      /* Start of match assertion */      /* Start of match assertion */
1674    
1675      case OP_SOM:      case OP_SOM:
1676      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1677        ecode++;
1678        break;
1679    
1680        /* Reset the start of match point */
1681    
1682        case OP_SET_SOM:
1683        mstart = eptr;
1684      ecode++;      ecode++;
1685      break;      break;
1686    
# Line 1163  for (;;) Line 1691  for (;;)
1691      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1692        {        {
1693        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1694          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1695        else        else
1696          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1697        ecode++;        ecode++;
1698        break;        break;
1699        }        }
1700      else      else
1701        {        {
1702        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1703        if (!md->endonly)        if (!md->endonly)
1704          {          {
1705          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1706             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1707            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1708          ecode++;          ecode++;
1709          break;          break;
1710          }          }
1711        }        }
1712      /* ... else fall through */      /* ... else fall through for endonly */
1713    
1714      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1715    
1716      case OP_EOD:      case OP_EOD:
1717      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1718      ecode++;      ecode++;
1719      break;      break;
1720    
1721      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1722    
1723      case OP_EODN:      case OP_EODN:
1724      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1725         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1726          MRRETURN(MATCH_NOMATCH);
1727      ecode++;      ecode++;
1728      break;      break;
1729    
# Line 1206  for (;;) Line 1735  for (;;)
1735    
1736        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1737        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1738        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1739          partial matching. */
1740    
1741  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1742        if (utf8)        if (utf8)
1743          {          {
1744            /* Get status of previous character */
1745    
1746          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1747            {            {
1748            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1749            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1750              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1751            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1752    #ifdef SUPPORT_UCP
1753              if (md->use_ucp)
1754                {
1755                if (c == '_') prev_is_word = TRUE; else
1756                  {
1757                  int cat = UCD_CATEGORY(c);
1758                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1759                  }
1760                }
1761              else
1762    #endif
1763            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1764            }            }
1765          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1766            /* Get status of next character */
1767    
1768            if (eptr >= md->end_subject)
1769              {
1770              SCHECK_PARTIAL();
1771              cur_is_word = FALSE;
1772              }
1773            else
1774            {            {
1775            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1776    #ifdef SUPPORT_UCP
1777              if (md->use_ucp)
1778                {
1779                if (c == '_') cur_is_word = TRUE; else
1780                  {
1781                  int cat = UCD_CATEGORY(c);
1782                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1783                  }
1784                }
1785              else
1786    #endif
1787            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1788            }            }
1789          }          }
1790        else        else
1791  #endif  #endif
1792    
1793        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1794          consistency with the behaviour of \w we do use it in this case. */
1795    
1796          {          {
1797          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1798            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1799          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1800            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1801              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1802    #ifdef SUPPORT_UCP
1803              if (md->use_ucp)
1804                {
1805                c = eptr[-1];
1806                if (c == '_') prev_is_word = TRUE; else
1807                  {
1808                  int cat = UCD_CATEGORY(c);
1809                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1810                  }
1811                }
1812              else
1813    #endif
1814              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1815              }
1816    
1817            /* Get status of next character */
1818    
1819            if (eptr >= md->end_subject)
1820              {
1821              SCHECK_PARTIAL();
1822              cur_is_word = FALSE;
1823              }
1824            else
1825    #ifdef SUPPORT_UCP
1826            if (md->use_ucp)
1827              {
1828              c = *eptr;
1829              if (c == '_') cur_is_word = TRUE; else
1830                {
1831                int cat = UCD_CATEGORY(c);
1832                cur_is_word = (cat == ucp_L || cat == ucp_N);
1833                }
1834              }
1835            else
1836    #endif
1837            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1838          }          }
1839    
1840        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1841    
1842        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1843             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1844          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1845        }        }
1846      break;      break;
1847    
1848      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1849    
1850      case OP_ANY:      case OP_ANY:
1851      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1852        RRETURN(MATCH_NOMATCH);      /* Fall through */
1853      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);  
1854  #ifdef SUPPORT_UTF8      case OP_ALLANY:
1855      if (utf8)      if (eptr++ >= md->end_subject)
1856        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        {
1857  #endif        SCHECK_PARTIAL();
1858          MRRETURN(MATCH_NOMATCH);
1859          }
1860        if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1861      ecode++;      ecode++;
1862      break;      break;
1863    
# Line 1261  for (;;) Line 1865  for (;;)
1865      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1866    
1867      case OP_ANYBYTE:      case OP_ANYBYTE:
1868      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1869          {
1870          SCHECK_PARTIAL();
1871          MRRETURN(MATCH_NOMATCH);
1872          }
1873      ecode++;      ecode++;
1874      break;      break;
1875    
1876      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1877      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1878          {
1879          SCHECK_PARTIAL();
1880          MRRETURN(MATCH_NOMATCH);
1881          }
1882      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1883      if (      if (
1884  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1274  for (;;) Line 1886  for (;;)
1886  #endif  #endif
1887         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1888         )         )
1889        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1890      ecode++;      ecode++;
1891      break;      break;
1892    
1893      case OP_DIGIT:      case OP_DIGIT:
1894      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1895          {
1896          SCHECK_PARTIAL();
1897          MRRETURN(MATCH_NOMATCH);
1898          }
1899      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1900      if (      if (
1901  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1287  for (;;) Line 1903  for (;;)
1903  #endif  #endif
1904         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1905         )         )
1906        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1907      ecode++;      ecode++;
1908      break;      break;
1909    
1910      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1911      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1912          {
1913          SCHECK_PARTIAL();
1914          MRRETURN(MATCH_NOMATCH);
1915          }
1916      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1917      if (      if (
1918  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1300  for (;;) Line 1920  for (;;)
1920  #endif  #endif
1921         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1922         )         )
1923        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1924      ecode++;      ecode++;
1925      break;      break;
1926    
1927      case OP_WHITESPACE:      case OP_WHITESPACE:
1928      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1929          {
1930          SCHECK_PARTIAL();
1931          MRRETURN(MATCH_NOMATCH);
1932          }
1933      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1934      if (      if (
1935  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1313  for (;;) Line 1937  for (;;)
1937  #endif  #endif
1938         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1939         )         )
1940        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1941      ecode++;      ecode++;
1942      break;      break;
1943    
1944      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1945      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1946          {
1947          SCHECK_PARTIAL();
1948          MRRETURN(MATCH_NOMATCH);
1949          }
1950      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1951      if (      if (
1952  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1326  for (;;) Line 1954  for (;;)
1954  #endif  #endif
1955         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1956         )         )
1957        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1958      ecode++;      ecode++;
1959      break;      break;
1960    
1961      case OP_WORDCHAR:      case OP_WORDCHAR:
1962      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1963          {
1964          SCHECK_PARTIAL();
1965          MRRETURN(MATCH_NOMATCH);
1966          }
1967      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1968      if (      if (
1969  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1339  for (;;) Line 1971  for (;;)
1971  #endif  #endif
1972         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1973         )         )
1974        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1975        ecode++;
1976        break;
1977    
1978        case OP_ANYNL:
1979        if (eptr >= md->end_subject)
1980          {
1981          SCHECK_PARTIAL();
1982          MRRETURN(MATCH_NOMATCH);
1983          }
1984        GETCHARINCTEST(c, eptr);
1985        switch(c)
1986          {
1987          default: MRRETURN(MATCH_NOMATCH);
1988          case 0x000d:
1989          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1990          break;
1991    
1992          case 0x000a:
1993          break;
1994    
1995          case 0x000b:
1996          case 0x000c:
1997          case 0x0085:
1998          case 0x2028:
1999          case 0x2029:
2000          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2001          break;
2002          }
2003        ecode++;
2004        break;
2005    
2006        case OP_NOT_HSPACE:
2007        if (eptr >= md->end_subject)
2008          {
2009          SCHECK_PARTIAL();
2010          MRRETURN(MATCH_NOMATCH);
2011          }
2012        GETCHARINCTEST(c, eptr);
2013        switch(c)
2014          {
2015          default: break;
2016          case 0x09:      /* HT */
2017          case 0x20:      /* SPACE */
2018          case 0xa0:      /* NBSP */
2019          case 0x1680:    /* OGHAM SPACE MARK */
2020          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2021          case 0x2000:    /* EN QUAD */
2022          case 0x2001:    /* EM QUAD */
2023          case 0x2002:    /* EN SPACE */
2024          case 0x2003:    /* EM SPACE */
2025          case 0x2004:    /* THREE-PER-EM SPACE */
2026          case 0x2005:    /* FOUR-PER-EM SPACE */
2027          case 0x2006:    /* SIX-PER-EM SPACE */
2028          case 0x2007:    /* FIGURE SPACE */
2029          case 0x2008:    /* PUNCTUATION SPACE */
2030          case 0x2009:    /* THIN SPACE */
2031          case 0x200A:    /* HAIR SPACE */
2032          case 0x202f:    /* NARROW NO-BREAK SPACE */
2033          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2034          case 0x3000:    /* IDEOGRAPHIC SPACE */
2035          MRRETURN(MATCH_NOMATCH);
2036          }
2037        ecode++;
2038        break;
2039    
2040        case OP_HSPACE:
2041        if (eptr >= md->end_subject)
2042          {
2043          SCHECK_PARTIAL();
2044          MRRETURN(MATCH_NOMATCH);
2045          }
2046        GETCHARINCTEST(c, eptr);
2047        switch(c)
2048          {
2049          default: MRRETURN(MATCH_NOMATCH);
2050          case 0x09:      /* HT */
2051          case 0x20:      /* SPACE */
2052          case 0xa0:      /* NBSP */
2053          case 0x1680:    /* OGHAM SPACE MARK */
2054          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2055          case 0x2000:    /* EN QUAD */
2056          case 0x2001:    /* EM QUAD */
2057          case 0x2002:    /* EN SPACE */
2058          case 0x2003:    /* EM SPACE */
2059          case 0x2004:    /* THREE-PER-EM SPACE */
2060          case 0x2005:    /* FOUR-PER-EM SPACE */
2061          case 0x2006:    /* SIX-PER-EM SPACE */
2062          case 0x2007:    /* FIGURE SPACE */
2063          case 0x2008:    /* PUNCTUATION SPACE */
2064          case 0x2009:    /* THIN SPACE */
2065          case 0x200A:    /* HAIR SPACE */
2066          case 0x202f:    /* NARROW NO-BREAK SPACE */
2067          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2068          case 0x3000:    /* IDEOGRAPHIC SPACE */
2069          break;
2070          }
2071        ecode++;
2072        break;
2073    
2074        case OP_NOT_VSPACE:
2075        if (eptr >= md->end_subject)
2076          {
2077          SCHECK_PARTIAL();
2078          MRRETURN(MATCH_NOMATCH);
2079          }
2080        GETCHARINCTEST(c, eptr);
2081        switch(c)
2082          {
2083          default: break;
2084          case 0x0a:      /* LF */
2085          case 0x0b:      /* VT */
2086          case 0x0c:      /* FF */
2087          case 0x0d:      /* CR */
2088          case 0x85:      /* NEL */
2089          case 0x2028:    /* LINE SEPARATOR */
2090          case 0x2029:    /* PARAGRAPH SEPARATOR */
2091          MRRETURN(MATCH_NOMATCH);
2092          }
2093        ecode++;
2094        break;
2095    
2096        case OP_VSPACE:
2097        if (eptr >= md->end_subject)
2098          {
2099          SCHECK_PARTIAL();
2100          MRRETURN(MATCH_NOMATCH);
2101          }
2102        GETCHARINCTEST(c, eptr);
2103        switch(c)
2104          {
2105          default: MRRETURN(MATCH_NOMATCH);
2106          case 0x0a:      /* LF */
2107          case 0x0b:      /* VT */
2108          case 0x0c:      /* FF */
2109          case 0x0d:      /* CR */
2110          case 0x85:      /* NEL */
2111          case 0x2028:    /* LINE SEPARATOR */
2112          case 0x2029:    /* PARAGRAPH SEPARATOR */
2113          break;
2114          }
2115      ecode++;      ecode++;
2116      break;      break;
2117    
# Line 1349  for (;;) Line 2121  for (;;)
2121    
2122      case OP_PROP:      case OP_PROP:
2123      case OP_NOTPROP:      case OP_NOTPROP:
2124      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2125          {
2126          SCHECK_PARTIAL();
2127          MRRETURN(MATCH_NOMATCH);
2128          }
2129      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2130        {        {
2131        int chartype, rqdtype;        const ucd_record *prop = GET_UCD(c);
       int othercase;  
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
2132    
2133        if (rqdtype >= 128)        switch(ecode[1])
         {  
         if ((rqdtype - 128 != category) == (op == OP_PROP))  
           RRETURN(MATCH_NOMATCH);  
         }  
       else  
2134          {          {
2135          if ((rqdtype != chartype) == (op == OP_PROP))          case PT_ANY:
2136            RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2137            break;
2138    
2139            case PT_LAMP:
2140            if ((prop->chartype == ucp_Lu ||
2141                 prop->chartype == ucp_Ll ||
2142                 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2143              MRRETURN(MATCH_NOMATCH);
2144            break;
2145    
2146            case PT_GC:
2147            if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2148              MRRETURN(MATCH_NOMATCH);
2149            break;
2150    
2151            case PT_PC:
2152            if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2153              MRRETURN(MATCH_NOMATCH);
2154            break;
2155    
2156            case PT_SC:
2157            if ((ecode[2] != prop->script) == (op == OP_PROP))
2158              MRRETURN(MATCH_NOMATCH);
2159            break;
2160    
2161            /* These are specials */
2162    
2163            case PT_ALNUM:
2164            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2165                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2166              MRRETURN(MATCH_NOMATCH);
2167            break;
2168    
2169            case PT_SPACE:    /* Perl space */
2170            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2171                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2172                   == (op == OP_NOTPROP))
2173              MRRETURN(MATCH_NOMATCH);
2174            break;
2175    
2176            case PT_PXSPACE:  /* POSIX space */
2177            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2178                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2179                 c == CHAR_FF || c == CHAR_CR)
2180                   == (op == OP_NOTPROP))
2181              MRRETURN(MATCH_NOMATCH);
2182            break;
2183    
2184            case PT_WORD:
2185            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2186                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2187                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2188              MRRETURN(MATCH_NOMATCH);
2189            break;
2190    
2191            /* This should never occur */
2192    
2193            default:
2194            RRETURN(PCRE_ERROR_INTERNAL);
2195          }          }
2196    
2197          ecode += 3;
2198        }        }
2199      break;      break;
2200    
# Line 1376  for (;;) Line 2202  for (;;)
2202      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2203    
2204      case OP_EXTUNI:      case OP_EXTUNI:
2205      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2206          {
2207          SCHECK_PARTIAL();
2208          MRRETURN(MATCH_NOMATCH);
2209          }
2210      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2211        {        {
2212        int chartype;        int category = UCD_CATEGORY(c);
2213        int othercase;        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2214        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2215          {          {
2216          int len = 1;          int len = 1;
# Line 1390  for (;;) Line 2218  for (;;)
2218            {            {
2219            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2220            }            }
2221          category = _pcre_ucp_findchar(c, &chartype, &othercase);          category = UCD_CATEGORY(c);
2222          if (category != ucp_M) break;          if (category != ucp_M) break;
2223          eptr += len;          eptr += len;
2224          }          }
# Line 1411  for (;;) Line 2239  for (;;)
2239      case OP_REF:      case OP_REF:
2240        {        {
2241        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2242        ecode += 3;                                 /* Advance past item */        ecode += 3;
2243    
2244          /* If the reference is unset, there are two possibilities:
2245    
2246        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
2247        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
2248        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
2249        minima. */        quantifiers with zero minima.
2250    
2251        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
2252          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
2253          md->offset_vector[offset+1] - md->offset_vector[offset];  
2254          Otherwise, set the length to the length of what was matched by the
2255          referenced subpattern. */
2256    
2257          if (offset >= offset_top || md->offset_vector[offset] < 0)
2258            length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2259          else
2260            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2261    
2262        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2263    
# Line 1449  for (;;) Line 2286  for (;;)
2286          break;          break;
2287    
2288          default:               /* No repeat follows */          default:               /* No repeat follows */
2289          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2290              {
2291              CHECK_PARTIAL();
2292              MRRETURN(MATCH_NOMATCH);
2293              }
2294          eptr += length;          eptr += length;
2295          continue;              /* With the main loop */          continue;              /* With the main loop */
2296          }          }
# Line 1465  for (;;) Line 2306  for (;;)
2306    
2307        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2308          {          {
2309          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2310              {
2311              CHECK_PARTIAL();
2312              MRRETURN(MATCH_NOMATCH);
2313              }
2314          eptr += length;          eptr += length;
2315          }          }
2316    
# Line 1480  for (;;) Line 2325  for (;;)
2325          {          {
2326          for (fi = min;; fi++)          for (fi = min;; fi++)
2327            {            {
2328            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2329            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2330            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2331              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2332                {
2333                CHECK_PARTIAL();
2334                MRRETURN(MATCH_NOMATCH);
2335                }
2336            eptr += length;            eptr += length;
2337            }            }
2338          /* Control never gets here */          /* Control never gets here */
# Line 1496  for (;;) Line 2345  for (;;)
2345          pp = eptr;          pp = eptr;
2346          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2347            {            {
2348            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2349                {
2350                CHECK_PARTIAL();
2351                break;
2352                }
2353            eptr += length;            eptr += length;
2354            }            }
2355          while (eptr >= pp)          while (eptr >= pp)
2356            {            {
2357            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2358            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2359            eptr -= length;            eptr -= length;
2360            }            }
2361          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2362          }          }
2363        }        }
2364      /* Control never gets here */      /* Control never gets here */
2365    
   
   
2366      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2367      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2368      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1566  for (;;) Line 2417  for (;;)
2417          {          {
2418          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2419            {            {
2420            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2421                {
2422                SCHECK_PARTIAL();
2423                MRRETURN(MATCH_NOMATCH);
2424                }
2425            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2426            if (c > 255)            if (c > 255)
2427              {              {
2428              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2429              }              }
2430            else            else
2431              {              {
2432              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2433              }              }
2434            }            }
2435          }          }
# Line 1584  for (;;) Line 2439  for (;;)
2439          {          {
2440          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2441            {            {
2442            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2443                {
2444                SCHECK_PARTIAL();
2445                MRRETURN(MATCH_NOMATCH);
2446                }
2447            c = *eptr++;            c = *eptr++;
2448            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2449            }            }
2450          }          }
2451    
# Line 1606  for (;;) Line 2465  for (;;)
2465            {            {
2466            for (fi = min;; fi++)            for (fi = min;; fi++)
2467              {              {
2468              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2469              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2470              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2471                if (eptr >= md->end_subject)
2472                  {
2473                  SCHECK_PARTIAL();
2474                  MRRETURN(MATCH_NOMATCH);
2475                  }
2476              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2477              if (c > 255)              if (c > 255)
2478                {                {
2479                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2480                }                }
2481              else              else
2482                {                {
2483                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2484                }                }
2485              }              }
2486            }            }
# Line 1626  for (;;) Line 2490  for (;;)
2490            {            {
2491            for (fi = min;; fi++)            for (fi = min;; fi++)
2492              {              {
2493              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2494              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2495              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2496                if (eptr >= md->end_subject)
2497                  {
2498                  SCHECK_PARTIAL();
2499                  MRRETURN(MATCH_NOMATCH);
2500                  }
2501              c = *eptr++;              c = *eptr++;
2502              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2503              }              }
2504            }            }
2505          /* Control never gets here */          /* Control never gets here */
# Line 1649  for (;;) Line 2518  for (;;)
2518            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2519              {              {
2520              int len = 1;              int len = 1;
2521              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2522                  {
2523                  SCHECK_PARTIAL();
2524                  break;
2525                  }
2526              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2527              if (c > 255)              if (c > 255)
2528                {                {
# Line 1663  for (;;) Line 2536  for (;;)
2536              }              }
2537            for (;;)            for (;;)
2538              {              {
2539              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2540              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2541              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2542              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1675  for (;;) Line 2548  for (;;)
2548            {            {
2549            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2550              {              {
2551              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2552                  {
2553                  SCHECK_PARTIAL();
2554                  break;
2555                  }
2556              c = *eptr;              c = *eptr;
2557              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2558              eptr++;              eptr++;
2559              }              }
2560            while (eptr >= pp)            while (eptr >= pp)
2561              {              {
2562              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
2563              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2564                eptr--;
2565              }              }
2566            }            }
2567    
2568          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2569          }          }
2570        }        }
2571      /* Control never gets here */      /* Control never gets here */
2572    
2573    
2574      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2575      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2576        mode, because Unicode properties are supported in non-UTF-8 mode. */
2577    
2578  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2579      case OP_XCLASS:      case OP_XCLASS:
# Line 1736  for (;;) Line 2614  for (;;)
2614    
2615        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2616          {          {
2617          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2618          GETCHARINC(c, eptr);            {
2619          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2620              MRRETURN(MATCH_NOMATCH);
2621              }
2622            GETCHARINCTEST(c, eptr);
2623            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2624          }          }
2625    
2626        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1753  for (;;) Line 2635  for (;;)
2635          {          {
2636          for (fi = min;; fi++)          for (fi = min;; fi++)
2637            {            {
2638            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2639            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2640            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2641            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2642            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2643                SCHECK_PARTIAL();
2644                MRRETURN(MATCH_NOMATCH);
2645                }
2646              GETCHARINCTEST(c, eptr);
2647              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2648            }            }
2649          /* Control never gets here */          /* Control never gets here */
2650          }          }
# Line 1770  for (;;) Line 2657  for (;;)
2657          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2658            {            {
2659            int len = 1;            int len = 1;
2660            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2661            GETCHARLEN(c, eptr, len);              {
2662                SCHECK_PARTIAL();
2663                break;
2664                }
2665              GETCHARLENTEST(c, eptr, len);
2666            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2667            eptr += len;            eptr += len;
2668            }            }
2669          for(;;)          for(;;)
2670            {            {
2671            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2672            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2673            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2674            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2675            }            }
2676          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2677          }          }
2678    
2679        /* Control never gets here */        /* Control never gets here */
# Line 1798  for (;;) Line 2689  for (;;)
2689        length = 1;        length = 1;
2690        ecode++;        ecode++;
2691        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2692        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2693        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2694            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2695            MRRETURN(MATCH_NOMATCH);
2696            }
2697          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2698        }        }
2699      else      else
2700  #endif  #endif
2701    
2702      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2703        {        {
2704        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2705        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2706            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2707            MRRETURN(MATCH_NOMATCH);
2708            }
2709          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2710        ecode += 2;        ecode += 2;
2711        }        }
2712      break;      break;
# Line 1822  for (;;) Line 2721  for (;;)
2721        ecode++;        ecode++;
2722        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2723    
2724        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2725            {
2726            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2727            MRRETURN(MATCH_NOMATCH);
2728            }
2729    
2730        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2731        can use the fast lookup table. */        can use the fast lookup table. */
2732    
2733        if (fc < 128)        if (fc < 128)
2734          {          {
2735          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2736          }          }
2737    
2738        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2739    
2740        else        else
2741          {          {
2742          int dc;          unsigned int dc;
2743          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2744          ecode += length;          ecode += length;
2745    
2746          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2747          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2748    
2749          if (fc != dc)          if (fc != dc)
2750            {            {
2751  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2752            int chartype;            if (dc != UCD_OTHERCASE(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2753  #endif  #endif
2754              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2755            }            }
2756          }          }
2757        }        }
# Line 1861  for (;;) Line 2760  for (;;)
2760    
2761      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2762        {        {
2763        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2764        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2765            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2766            MRRETURN(MATCH_NOMATCH);
2767            }
2768          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2769        ecode += 2;        ecode += 2;
2770        }        }
2771      break;      break;
2772    
2773      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2774    
2775      case OP_EXACT:      case OP_EXACT:
2776      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2777      ecode += 3;      ecode += 3;
2778      goto REPEATCHAR;      goto REPEATCHAR;
2779    
2780        case OP_POSUPTO:
2781        possessive = TRUE;
2782        /* Fall through */
2783    
2784      case OP_UPTO:      case OP_UPTO:
2785      case OP_MINUPTO:      case OP_MINUPTO:
2786      min = 0;      min = 0;
# Line 1882  for (;;) Line 2789  for (;;)
2789      ecode += 3;      ecode += 3;
2790      goto REPEATCHAR;      goto REPEATCHAR;
2791    
2792        case OP_POSSTAR:
2793        possessive = TRUE;
2794        min = 0;
2795        max = INT_MAX;
2796        ecode++;
2797        goto REPEATCHAR;
2798    
2799        case OP_POSPLUS:
2800        possessive = TRUE;
2801        min = 1;
2802        max = INT_MAX;
2803        ecode++;
2804        goto REPEATCHAR;
2805    
2806        case OP_POSQUERY:
2807        possessive = TRUE;
2808        min = 0;
2809        max = 1;
2810        ecode++;
2811        goto REPEATCHAR;
2812    
2813      case OP_STAR:      case OP_STAR:
2814      case OP_MINSTAR:      case OP_MINSTAR:
2815      case OP_PLUS:      case OP_PLUS:
# Line 1890  for (;;) Line 2818  for (;;)
2818      case OP_MINQUERY:      case OP_MINQUERY:
2819      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2820      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2821    
2822      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2823      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2824      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2825    
2826      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2827    
2828      REPEATCHAR:      REPEATCHAR:
2829  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1905  for (;;) Line 2832  for (;;)
2832        length = 1;        length = 1;
2833        charptr = ecode;        charptr = ecode;
2834        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2835        ecode += length;        ecode += length;
2836    
2837        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1913  for (;;) Line 2839  for (;;)
2839    
2840        if (length > 1)        if (length > 1)
2841          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2842  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2843          int othercase;          unsigned int othercase;
         int chartype;  
2844          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2845               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase > 0)  
2846            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2847            else oclength = 0;
2848  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2849    
2850          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2851            {            {
2852            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2853            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2854            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2855              else if (oclength > 0 &&
2856                       eptr <= md->end_subject - oclength &&
2857                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2858    #endif  /* SUPPORT_UCP */
2859            else            else
2860              {              {
2861              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2862              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2863              }              }
2864            }            }
2865    
# Line 1943  for (;;) Line 2869  for (;;)
2869            {            {
2870            for (fi = min;; fi++)            for (fi = min;; fi++)
2871              {              {
2872              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2873              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2874              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2875              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2876              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2877              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2878                else if (oclength > 0 &&
2879                         eptr <= md->end_subject - oclength &&
2880                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2881    #endif  /* SUPPORT_UCP */
2882              else              else
2883                {                {
2884                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2885                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2886                }                }
2887              }              }
2888            /* Control never gets here */            /* Control never gets here */
2889            }            }
2890          else  
2891            else  /* Maximize */
2892            {            {
2893            pp = eptr;            pp = eptr;
2894            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2895              {              {
2896              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2897              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2898              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2899                else if (oclength > 0 &&
2900                         eptr <= md->end_subject - oclength &&
2901                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2902    #endif  /* SUPPORT_UCP */
2903              else              else
2904                {                {
2905                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2906                eptr += oclength;                break;
2907                }                }
2908              }              }
2909            while (eptr >= pp)  
2910             {            if (possessive) continue;
2911             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2912             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2913             eptr -= length;              {
2914             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2915            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2916                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2917    #ifdef SUPPORT_UCP
2918                eptr--;
2919                BACKCHAR(eptr);
2920    #else   /* without SUPPORT_UCP */
2921                eptr -= length;
2922    #endif  /* SUPPORT_UCP */
2923                }
2924            }            }
2925          /* Control never gets here */          /* Control never gets here */
2926          }          }
# Line 1990  for (;;) Line 2933  for (;;)
2933  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2934    
2935      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2936        {  
2937        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2938    
2939      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2940      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2011  for (;;) Line 2952  for (;;)
2952        {        {
2953        fc = md->lcc[fc];        fc = md->lcc[fc];
2954        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2955          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2956            if (eptr >= md->end_subject)
2957              {
2958              SCHECK_PARTIAL();
2959              MRRETURN(MATCH_NOMATCH);
2960              }
2961            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2962            }
2963        if (min == max) continue;        if (min == max) continue;
2964        if (minimize)        if (minimize)
2965          {          {
2966          for (fi = min;; fi++)          for (fi = min;; fi++)
2967            {            {
2968            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2969            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2970            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2971                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2972              RRETURN(MATCH_NOMATCH);              {
2973            }              SCHECK_PARTIAL();
2974                MRRETURN(MATCH_NOMATCH);
2975                }
2976              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2977              }
2978          /* Control never gets here */          /* Control never gets here */
2979          }          }
2980        else        else  /* Maximize */
2981          {          {
2982          pp = eptr;          pp = eptr;
2983          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2984            {            {
2985            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2986                {
2987                SCHECK_PARTIAL();
2988                break;
2989                }
2990              if (fc != md->lcc[*eptr]) break;
2991            eptr++;            eptr++;
2992            }            }
2993    
2994            if (possessive) continue;
2995    
2996          while (eptr >= pp)          while (eptr >= pp)
2997            {            {
2998            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2999            eptr--;            eptr--;
3000            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3001            }            }
3002          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3003          }          }
3004        /* Control never gets here */        /* Control never gets here */
3005        }        }
# Line 2048  for (;;) Line 3008  for (;;)
3008    
3009      else      else
3010        {        {
3011        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3012            {
3013            if (eptr >= md->end_subject)
3014              {
3015              SCHECK_PARTIAL();
3016              MRRETURN(MATCH_NOMATCH);
3017              }
3018            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3019            }
3020    
3021        if (min == max) continue;        if (min == max) continue;
3022    
3023        if (minimize)        if (minimize)
3024          {          {
3025          for (fi = min;; fi++)          for (fi = min;; fi++)
3026            {            {
3027            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3028            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3029            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3030              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3031                {
3032                SCHECK_PARTIAL();
3033                MRRETURN(MATCH_NOMATCH);
3034                }
3035              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3036            }            }
3037          /* Control never gets here */          /* Control never gets here */
3038          }          }
3039        else        else  /* Maximize */
3040          {          {
3041          pp = eptr;          pp = eptr;
3042          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3043            {            {
3044            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3045                {
3046                SCHECK_PARTIAL();
3047                break;
3048                }
3049              if (fc != *eptr) break;
3050            eptr++;            eptr++;
3051            }            }
3052            if (possessive) continue;
3053    
3054          while (eptr >= pp)          while (eptr >= pp)
3055            {            {
3056            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3057            eptr--;            eptr--;
3058            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3059            }            }
3060          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3061          }          }
3062        }        }
3063      /* Control never gets here */      /* Control never gets here */
# Line 2084  for (;;) Line 3066  for (;;)
3066      checking can be multibyte. */      checking can be multibyte. */
3067    
3068      case OP_NOT:      case OP_NOT:
3069      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3070          {
3071          SCHECK_PARTIAL();
3072          MRRETURN(MATCH_NOMATCH);
3073          }
3074      ecode++;      ecode++;
3075      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3076      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2093  for (;;) Line 3079  for (;;)
3079        if (c < 256)        if (c < 256)
3080  #endif  #endif
3081        c = md->lcc[c];        c = md->lcc[c];
3082        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3083        }        }
3084      else      else
3085        {        {
3086        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3087        }        }
3088      break;      break;
3089    
# Line 2121  for (;;) Line 3107  for (;;)
3107      ecode += 3;      ecode += 3;
3108      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3109    
3110        case OP_NOTPOSSTAR:
3111        possessive = TRUE;
3112        min = 0;
3113        max = INT_MAX;
3114        ecode++;
3115        goto REPEATNOTCHAR;
3116    
3117        case OP_NOTPOSPLUS:
3118        possessive = TRUE;
3119        min = 1;
3120        max = INT_MAX;
3121        ecode++;
3122        goto REPEATNOTCHAR;
3123    
3124        case OP_NOTPOSQUERY:
3125        possessive = TRUE;
3126        min = 0;
3127        max = 1;
3128        ecode++;
3129        goto REPEATNOTCHAR;
3130    
3131        case OP_NOTPOSUPTO:
3132        possessive = TRUE;
3133        min = 0;
3134        max = GET2(ecode, 1);
3135        ecode += 3;
3136        goto REPEATNOTCHAR;
3137    
3138      case OP_NOTSTAR:      case OP_NOTSTAR:
3139      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3140      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2133  for (;;) Line 3147  for (;;)
3147      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3148      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3149    
3150      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3151    
3152      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3153      fc = *ecode++;      fc = *ecode++;
3154    
3155      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2160  for (;;) Line 3171  for (;;)
3171        /* UTF-8 mode */        /* UTF-8 mode */
3172        if (utf8)        if (utf8)
3173          {          {
3174          register int d;          register unsigned int d;
3175          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3176            {            {
3177              if (eptr >= md->end_subject)
3178                {
3179                SCHECK_PARTIAL();
3180                MRRETURN(MATCH_NOMATCH);
3181                }
3182            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3183            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3184            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3185            }            }
3186          }          }
3187        else        else
# Line 2174  for (;;) Line 3190  for (;;)
3190        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3191          {          {
3192          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3193            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3194              if (eptr >= md->end_subject)
3195                {
3196                SCHECK_PARTIAL();
3197                MRRETURN(MATCH_NOMATCH);
3198                }
3199              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3200              }
3201          }          }
3202    
3203        if (min == max) continue;        if (min == max) continue;
# Line 2185  for (;;) Line 3208  for (;;)
3208          /* UTF-8 mode */          /* UTF-8 mode */
3209          if (utf8)          if (utf8)
3210            {            {
3211            register int d;            register unsigned int d;
3212            for (fi = min;; fi++)            for (fi = min;; fi++)
3213              {              {
3214              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3215              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3216                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3217                if (eptr >= md->end_subject)
3218                  {
3219                  SCHECK_PARTIAL();
3220                  MRRETURN(MATCH_NOMATCH);
3221                  }
3222              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3223              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3224              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3225              }              }
3226            }            }
3227          else          else
# Line 2202  for (;;) Line 3230  for (;;)
3230            {            {
3231            for (fi = min;; fi++)            for (fi = min;; fi++)
3232              {              {
3233              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3234              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3235              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3236                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3237                  {
3238                  SCHECK_PARTIAL();
3239                  MRRETURN(MATCH_NOMATCH);
3240                  }
3241                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3242              }              }
3243            }            }
3244          /* Control never gets here */          /* Control never gets here */
# Line 2221  for (;;) Line 3254  for (;;)
3254          /* UTF-8 mode */          /* UTF-8 mode */
3255          if (utf8)          if (utf8)
3256            {            {
3257            register int d;            register unsigned int d;
3258            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3259              {              {
3260              int len = 1;              int len = 1;
3261              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3262                  {
3263                  SCHECK_PARTIAL();
3264                  break;
3265                  }
3266              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3267              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3268              if (fc == d) break;              if (fc == d) break;
3269              eptr += len;              eptr += len;
3270              }              }
3271            for(;;)          if (possessive) continue;
3272            for(;;)
3273              {              {
3274              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3275              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3276              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3277              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2245  for (;;) Line 3283  for (;;)
3283            {            {
3284            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3285              {              {
3286              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3287                  {
3288                  SCHECK_PARTIAL();
3289                  break;
3290                  }
3291                if (fc == md->lcc[*eptr]) break;
3292              eptr++;              eptr++;
3293              }              }
3294              if (possessive) continue;
3295            while (eptr >= pp)            while (eptr >= pp)
3296              {              {
3297              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3298              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3299              eptr--;              eptr--;
3300              }              }
3301            }            }
3302    
3303          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3304          }          }
3305        /* Control never gets here */        /* Control never gets here */
3306        }        }
# Line 2269  for (;;) Line 3313  for (;;)
3313        /* UTF-8 mode */        /* UTF-8 mode */
3314        if (utf8)        if (utf8)
3315          {          {
3316          register int d;          register unsigned int d;
3317          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3318            {            {
3319              if (eptr >= md->end_subject)
3320                {
3321                SCHECK_PARTIAL();
3322                MRRETURN(MATCH_NOMATCH);
3323                }
3324            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3325            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3326            }            }
3327          }          }
3328        else        else
# Line 2281  for (;;) Line 3330  for (;;)
3330        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3331          {          {
3332          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3333            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3334              if (eptr >= md->end_subject)
3335                {
3336                SCHECK_PARTIAL();
3337                MRRETURN(MATCH_NOMATCH);
3338                }
3339              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3340              }
3341          }          }
3342    
3343        if (min == max) continue;        if (min == max) continue;
# Line 2292  for (;;) Line 3348  for (;;)
3348          /* UTF-8 mode */          /* UTF-8 mode */
3349          if (utf8)          if (utf8)
3350            {            {
3351            register int d;            register unsigned int d;
3352            for (fi = min;; fi++)            for (fi = min;; fi++)
3353              {              {
3354              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3355              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3356                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3357                if (eptr >= md->end_subject)
3358                  {
3359                  SCHECK_PARTIAL();
3360                  MRRETURN(MATCH_NOMATCH);
3361                  }
3362              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3363              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3364              }              }
3365            }            }
3366          else          else
# Line 2308  for (;;) Line 3369  for (;;)
3369            {            {
3370            for (fi = min;; fi++)            for (fi = min;; fi++)
3371              {              {
3372              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3373              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3374              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3375                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3376                  {
3377                  SCHECK_PARTIAL();
3378                  MRRETURN(MATCH_NOMATCH);
3379                  }
3380                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3381              }              }
3382            }            }
3383          /* Control never gets here */          /* Control never gets here */
# Line 2327  for (;;) Line 3393  for (;;)
3393          /* UTF-8 mode */          /* UTF-8 mode */
3394          if (utf8)          if (utf8)
3395            {            {
3396            register int d;            register unsigned int d;
3397            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3398              {              {
3399              int len = 1;              int len = 1;
3400              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3401                  {
3402                  SCHECK_PARTIAL();
3403                  break;
3404                  }
3405              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3406              if (fc == d) break;              if (fc == d) break;
3407              eptr += len;              eptr += len;
3408              }              }
3409              if (possessive) continue;
3410            for(;;)            for(;;)
3411              {              {
3412              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3413              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3414              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3415              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2350  for (;;) Line 3421  for (;;)
3421            {            {
3422            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3423              {              {
3424              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3425                  {
3426                  SCHECK_PARTIAL();
3427                  break;
3428                  }
3429                if (fc == *eptr) break;
3430              eptr++;              eptr++;
3431              }              }
3432              if (possessive) continue;
3433            while (eptr >= pp)            while (eptr >= pp)
3434              {              {
3435              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3436              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3437              eptr--;              eptr--;
3438              }              }
3439            }            }
3440    
3441          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3442          }          }
3443        }        }
3444      /* Control never gets here */      /* Control never gets here */
# Line 2384  for (;;) Line 3461  for (;;)
3461      ecode += 3;      ecode += 3;
3462      goto REPEATTYPE;      goto REPEATTYPE;
3463    
3464        case OP_TYPEPOSSTAR:
3465        possessive = TRUE;
3466        min = 0;
3467        max = INT_MAX;
3468        ecode++;
3469        goto REPEATTYPE;
3470    
3471        case OP_TYPEPOSPLUS:
3472        possessive = TRUE;
3473        min = 1;
3474        max = INT_MAX;
3475        ecode++;
3476        goto REPEATTYPE;
3477    
3478        case OP_TYPEPOSQUERY:
3479        possessive = TRUE;
3480        min = 0;
3481        max = 1;
3482        ecode++;
3483        goto REPEATTYPE;
3484    
3485        case OP_TYPEPOSUPTO:
3486        possessive = TRUE;
3487        min = 0;
3488        max = GET2(ecode, 1);
3489        ecode += 3;
3490        goto REPEATTYPE;
3491    
3492      case OP_TYPESTAR:      case OP_TYPESTAR:
3493      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3494      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 3513  for (;;)
3513        {        {
3514        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
3515        prop_type = *ecode++;        prop_type = *ecode++;
3516        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
3517        }        }
3518      else prop_type = -1;      else prop_type = -1;
3519  #endif  #endif
3520    
3521      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3522      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3523      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3524      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3525      and single-bytes. */      and single-bytes. */
3526    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3527      if (min > 0)      if (min > 0)
3528        {        {
3529  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3530        if (prop_type > 0)        if (prop_type >= 0)
3531          {          {
3532          for (i = 1; i <= min; i++)          switch(prop_type)
3533            {            {
3534            GETCHARINC(c, eptr);            case PT_ANY:
3535            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3536            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
3537              RRETURN(MATCH_NOMATCH);              {
3538                if (eptr >= md->end_subject)
3539                  {
3540                  SCHECK_PARTIAL();
3541                  MRRETURN(MATCH_NOMATCH);
3542                  }
3543                GETCHARINCTEST(c, eptr);
3544                }
3545              break;
3546    
3547              case PT_LAMP:
3548              for (i = 1; i <= min; i++)
3549                {
3550                if (eptr >= md->end_subject)
3551                  {
3552                  SCHECK_PARTIAL();
3553                  MRRETURN(MATCH_NOMATCH);
3554                  }
3555                GETCHARINCTEST(c, eptr);
3556                prop_chartype = UCD_CHARTYPE(c);
3557                if ((prop_chartype == ucp_Lu ||
3558                     prop_chartype == ucp_Ll ||
3559                     prop_chartype == ucp_Lt) == prop_fail_result)
3560                  MRRETURN(MATCH_NOMATCH);
3561                }
3562              break;
3563    
3564              case PT_GC:
3565              for (i = 1; i <= min; i++)
3566                {
3567                if (eptr >= md->end_subject)
3568                  {
3569                  SCHECK_PARTIAL();
3570                  MRRETURN(MATCH_NOMATCH);
3571                  }
3572                GETCHARINCTEST(c, eptr);
3573                prop_category = UCD_CATEGORY(c);
3574                if ((prop_category == prop_value) == prop_fail_result)
3575                  MRRETURN(MATCH_NOMATCH);
3576                }
3577              break;
3578    
3579              case PT_PC:
3580              for (i = 1; i <= min; i++)
3581                {
3582                if (eptr >= md->end_subject)
3583                  {
3584                  SCHECK_PARTIAL();
3585                  MRRETURN(MATCH_NOMATCH);
3586                  }
3587                GETCHARINCTEST(c, eptr);
3588                prop_chartype = UCD_CHARTYPE(c);
3589                if ((prop_chartype == prop_value) == prop_fail_result)
3590                  MRRETURN(MATCH_NOMATCH);
3591                }
3592              break;
3593    
3594              case PT_SC:
3595              for (i = 1; i <= min; i++)
3596                {
3597                if (eptr >= md->end_subject)
3598                  {
3599                  SCHECK_PARTIAL();
3600                  MRRETURN(MATCH_NOMATCH);
3601                  }
3602                GETCHARINCTEST(c, eptr);
3603                prop_script = UCD_SCRIPT(c);
3604                if ((prop_script == prop_value) == prop_fail_result)
3605                  MRRETURN(MATCH_NOMATCH);
3606                }
3607              break;
3608    
3609              case PT_ALNUM:
3610              for (i = 1; i <= min; i++)
3611                {
3612                if (eptr >= md->end_subject)
3613                  {
3614                  SCHECK_PARTIAL();
3615                  MRRETURN(MATCH_NOMATCH);
3616                  }
3617                GETCHARINCTEST(c, eptr);
3618                prop_category = UCD_CATEGORY(c);
3619                if ((prop_category == ucp_L || prop_category == ucp_N)
3620                       == prop_fail_result)
3621                  MRRETURN(MATCH_NOMATCH);
3622                }
3623              break;
3624    
3625              case PT_SPACE:    /* Perl space */
3626              for (i = 1; i <= min; i++)
3627                {
3628                if (eptr >= md->end_subject)
3629                  {
3630                  SCHECK_PARTIAL();
3631                  MRRETURN(MATCH_NOMATCH);
3632                  }
3633                GETCHARINCTEST(c, eptr);
3634                prop_category = UCD_CATEGORY(c);
3635                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3636                     c == CHAR_FF || c == CHAR_CR)
3637                       == prop_fail_result)
3638                  MRRETURN(MATCH_NOMATCH);
3639                }
3640              break;
3641    
3642              case PT_PXSPACE:  /* POSIX space */
3643              for (i = 1; i <= min; i++)
3644                {
3645                if (eptr >= md->end_subject)
3646                  {
3647                  SCHECK_PARTIAL();
3648                  MRRETURN(MATCH_NOMATCH);
3649                  }
3650                GETCHARINCTEST(c, eptr);
3651                prop_category = UCD_CATEGORY(c);
3652                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3653                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3654                       == prop_fail_result)
3655                  MRRETURN(MATCH_NOMATCH);
3656                }
3657              break;
3658    
3659              case PT_WORD:
3660              for (i = 1; i <= min; i++)
3661                {
3662                if (eptr >= md->end_subject)
3663                  {
3664                  SCHECK_PARTIAL();
3665                  MRRETURN(MATCH_NOMATCH);
3666                  }
3667                GETCHARINCTEST(c, eptr);
3668                prop_category = UCD_CATEGORY(c);
3669                if ((prop_category == ucp_L || prop_category == ucp_N ||
3670                     c == CHAR_UNDERSCORE)
3671                       == prop_fail_result)
3672                  MRRETURN(MATCH_NOMATCH);
3673                }
3674              break;
3675    
3676              /* This should not occur */
3677    
3678              default:
3679              RRETURN(PCRE_ERROR_INTERNAL);
3680            }            }
3681          }          }
3682    
# Line 2452  for (;;) Line 3687  for (;;)
3687          {          {
3688          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3689            {            {
3690              if (eptr >= md->end_subject)
3691                {
3692                SCHECK_PARTIAL();
3693                MRRETURN(MATCH_NOMATCH);
3694                }
3695            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3696            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
3697            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3698            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3699              {              {
3700              int len = 1;              int len = 1;
3701              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3702                {                else { GETCHARLEN(c, eptr, len); }
3703                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);  
3704              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3705              eptr += len;              eptr += len;
3706              }              }
# Line 2480  for (;;) Line 3718  for (;;)
3718          case OP_ANY:          case OP_ANY:
3719          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3720            {            {
3721            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3722               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))              {
3723              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3724                MRRETURN(MATCH_NOMATCH);
3725                }
3726              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3727              eptr++;
3728              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3729              }
3730            break;
3731    
3732            case OP_ALLANY:
3733            for (i = 1; i <= min; i++)
3734              {
3735              if (eptr >= md->end_subject)
3736                {
3737                SCHECK_PARTIAL();
3738                MRRETURN(MATCH_NOMATCH);
3739                }
3740              eptr++;
3741            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3742            }            }
3743          break;          break;
3744    
3745          case OP_ANYBYTE:          case OP_ANYBYTE:
3746            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3747          eptr += min;          eptr += min;
3748          break;          break;
3749    
3750            case OP_ANYNL:
3751            for (i = 1; i <= min; i++)
3752              {
3753              if (eptr >= md->end_subject)
3754                {
3755                SCHECK_PARTIAL();
3756                MRRETURN(MATCH_NOMATCH);
3757                }
3758              GETCHARINC(c, eptr);
3759              switch(c)
3760                {
3761                default: MRRETURN(MATCH_NOMATCH);
3762                case 0x000d:
3763                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3764                break;
3765    
3766                case 0x000a:
3767                break;
3768    
3769                case 0x000b:
3770                case 0x000c:
3771                case 0x0085:
3772                case 0x2028:
3773                case 0x2029:
3774                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3775                break;
3776                }
3777              }
3778            break;
3779    
3780            case OP_NOT_HSPACE:
3781            for (i = 1; i <= min; i++)
3782              {
3783              if (eptr >= md->end_subject)
3784                {
3785                SCHECK_PARTIAL();
3786                MRRETURN(MATCH_NOMATCH);
3787                }
3788              GETCHARINC(c, eptr);
3789              switch(c)
3790                {
3791                default: break;
3792                case 0x09:      /* HT */
3793                case 0x20:      /* SPACE */
3794                case 0xa0:      /* NBSP */
3795                case 0x1680:    /* OGHAM SPACE MARK */
3796                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3797                case 0x2000:    /* EN QUAD */
3798                case 0x2001:    /* EM QUAD */
3799                case 0x2002:    /* EN SPACE */
3800                case 0x2003:    /* EM SPACE */
3801                case 0x2004:    /* THREE-PER-EM SPACE */
3802                case 0x2005:    /* FOUR-PER-EM SPACE */
3803                case 0x2006:    /* SIX-PER-EM SPACE */
3804                case 0x2007:    /* FIGURE SPACE */
3805                case 0x2008:    /* PUNCTUATION SPACE */
3806                case 0x2009:    /* THIN SPACE */
3807                case 0x200A:    /* HAIR SPACE */
3808                case 0x202f:    /* NARROW NO-BREAK SPACE */
3809                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3810                case 0x3000:    /* IDEOGRAPHIC SPACE */
3811                MRRETURN(MATCH_NOMATCH);
3812                }
3813              }
3814            break;
3815    
3816            case OP_HSPACE:
3817            for (i = 1; i <= min; i++)
3818              {
3819              if (eptr >= md->end_subject)
3820                {
3821                SCHECK_PARTIAL();
3822                MRRETURN(MATCH_NOMATCH);
3823                }
3824              GETCHARINC(c, eptr);
3825              switch(c)
3826                {
3827                default: MRRETURN(MATCH_NOMATCH);
3828                case 0x09:      /* HT */
3829                case 0x20:      /* SPACE */
3830                case 0xa0:      /* NBSP */
3831                case 0x1680:    /* OGHAM SPACE MARK */
3832                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3833                case 0x2000:    /* EN QUAD */
3834                case 0x2001:    /* EM QUAD */
3835                case 0x2002:    /* EN SPACE */
3836                case 0x2003:    /* EM SPACE */
3837                case 0x2004:    /* THREE-PER-EM SPACE */
3838                case 0x2005:    /* FOUR-PER-EM SPACE */
3839                case 0x2006:    /* SIX-PER-EM SPACE */
3840                case 0x2007:    /* FIGURE SPACE */
3841                case 0x2008:    /* PUNCTUATION SPACE */
3842                case 0x2009:    /* THIN SPACE */
3843                case 0x200A:    /* HAIR SPACE */
3844                case 0x202f:    /* NARROW NO-BREAK SPACE */
3845                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3846                case 0x3000:    /* IDEOGRAPHIC SPACE */
3847                break;
3848                }
3849              }
3850            break;
3851    
3852            case OP_NOT_VSPACE:
3853            for (i = 1; i <= min; i++)
3854              {
3855              if (eptr >= md->end_subject)
3856                {
3857                SCHECK_PARTIAL();
3858                MRRETURN(MATCH_NOMATCH);
3859                }
3860              GETCHARINC(c, eptr);
3861              switch(c)
3862                {
3863                default: break;
3864                case 0x0a:      /* LF */
3865                case 0x0b:      /* VT */
3866                case 0x0c:      /* FF */
3867                case 0x0d:      /* CR */
3868                case 0x85:      /* NEL */
3869                case 0x2028:    /* LINE SEPARATOR */
3870                case 0x2029:    /* PARAGRAPH SEPARATOR */
3871                MRRETURN(MATCH_NOMATCH);
3872                }
3873              }
3874            break;
3875    
3876            case OP_VSPACE:
3877            for (i = 1; i <= min; i++)
3878              {
3879              if (eptr >= md->end_subject)
3880                {
3881                SCHECK_PARTIAL();
3882                MRRETURN(MATCH_NOMATCH);
3883                }
3884              GETCHARINC(c, eptr);
3885              switch(c)
3886                {