/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 510 by ph10, Sat Mar 27 17:45:29 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_SKIP_ARG     (-996)
78    #define MATCH_THEN         (-995)
79    
80    /* This is a convenience macro for code that occurs many times. */
81    
82    #define MRRETURN(ra) \
83      { \
84      md->mark = markptr; \
85      RRETURN(ra); \
86      }
87    
88  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
89  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
90  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 98  static const char rep_max[] = { 0, 0, 0,
98    
99    
100    
101  #ifdef DEBUG  #ifdef PCRE_DEBUG
102  /*************************************************  /*************************************************
103  *        Debugging function to print chars       *  *        Debugging function to print chars       *
104  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 118  Returns:     nothing
118  static void  static void
119  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
120  {  {
121  int c;  unsigned int c;
122  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
123  while (length-- > 0)  while (length-- > 0)
124    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 145  Returns:      TRUE if matched
145  */  */
146    
147  static BOOL  static BOOL
148  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
149    unsigned long int ims)    unsigned long int ims)
150  {  {
151  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef DEBUG  #ifdef PCRE_DEBUG
154  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
155    printf("matching subject <null>");    printf("matching subject <null>");
156  else  else
# Line 150  printf("\n"); Line 167  printf("\n");
167    
168  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
169    
170  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
171    properly if Unicode properties are supported. Otherwise, we can check only
172    ASCII characters. */
173    
174  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
175    {    {
176    #ifdef SUPPORT_UTF8
177    #ifdef SUPPORT_UCP
178      if (md->utf8)
179        {
180        USPTR endptr = eptr + length;
181        while (eptr < endptr)
182          {
183          int c, d;
184          GETCHARINC(c, eptr);
185          GETCHARINC(d, p);
186          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
187          }
188        }
189      else
190    #endif
191    #endif
192    
193      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
194      is no UCP support. */
195    
196    while (length-- > 0)    while (length-- > 0)
197      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
198    }    }
199    
200    /* In the caseful case, we can just compare the bytes, whether or not we
201    are in UTF-8 mode. */
202    
203  else  else
204    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
205    
# Line 169  return TRUE; Line 212  return TRUE;
212  ****************************************************************************  ****************************************************************************
213                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
214    
215  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
216  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
217  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
218  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
219  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
220    fine.
221  It turns out that on non-Unix systems there are problems with programs that  
222  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
223  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
224  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
225    been known for decades.) So....
226    
227  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
228  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
229  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
230  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
231  always used to.  always used to.
232    
233    The original heap-recursive code used longjmp(). However, it seems that this
234    can be very slow on some operating systems. Following a suggestion from Stan
235    Switzer, the use of longjmp() has been abolished, at the cost of having to
236    provide a unique number for each call to RMATCH. There is no way of generating
237    a sequence of numbers at compile time in C. I have given them names, to make
238    them stand out more clearly.
239    
240    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
241    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
242    tests. Furthermore, not using longjmp() means that local dynamic variables
243    don't have indeterminate values; this has meant that the frame size can be
244    reduced because the result can be "passed back" by straight setting of the
245    variable instead of being passed in the frame.
246  ****************************************************************************  ****************************************************************************
247  ***************************************************************************/  ***************************************************************************/
248    
249    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
250    below must be updated in sync.  */
251    
252  /* These versions of the macros use the stack, as normal */  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
253           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
254           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
255           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
256           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
257           RM51,  RM52, RM53, RM54 };
258    
259    /* These versions of the macros use the stack, as normal. There are debugging
260    versions and production versions. Note that the "rw" argument of RMATCH isn't
261    actually used in this definition. */
262    
263  #ifndef NO_RECURSE  #ifndef NO_RECURSE
264  #define REGISTER register  #define REGISTER register
265  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
266    #ifdef PCRE_DEBUG
267    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
268      { \
269      printf("match() called in line %d\n", __LINE__); \
270      rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
271      printf("to line %d\n", __LINE__); \
272      }
273    #define RRETURN(ra) \
274      { \
275      printf("match() returned %d from line %d ", ra, __LINE__); \
276      return ra; \
277      }
278    #else
279    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
280      rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
281  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
282    #endif
283    
284  #else  #else
285    
286    
287  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
288  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
289  match(), which never changes. */  argument of match(), which never changes. */
290    
291  #define REGISTER  #define REGISTER
292    
293  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
294    {\    {\
295    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
296    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
297      {\    newframe->Xeptr = ra;\
298      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
299      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
300      newframe->Xoffset_top = rc;\    newframe->Xmarkptr = markptr;\
301      newframe->Xims = re;\    newframe->Xoffset_top = rc;\
302      newframe->Xeptrb = rf;\    newframe->Xims = re;\
303      newframe->Xflags = rg;\    newframe->Xeptrb = rf;\
304      newframe->Xprevframe = frame;\    newframe->Xflags = rg;\
305      frame = newframe;\    newframe->Xrdepth = frame->Xrdepth + 1;\
306      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xprevframe = frame;\
307      goto HEAP_RECURSE;\    frame = newframe;\
308      }\    DPRINTF(("restarting from line %d\n", __LINE__));\
309    else\    goto HEAP_RECURSE;\
310      {\    L_##rw:\
311      DPRINTF(("longjumped back to line %d\n", __LINE__));\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
312    }    }
313    
314  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 235  match(), which never changes. */ Line 318  match(), which never changes. */
318    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
319    if (frame != NULL)\    if (frame != NULL)\
320      {\      {\
321      frame->Xresult = ra;\      rrc = ra;\
322      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
323      }\      }\
324    return ra;\    return ra;\
325    }    }
# Line 250  typedef struct heapframe { Line 332  typedef struct heapframe {
332    
333    /* Function arguments that may change */    /* Function arguments that may change */
334    
335    const uschar *Xeptr;    USPTR Xeptr;
336    const uschar *Xecode;    const uschar *Xecode;
337      USPTR Xmstart;
338      USPTR Xmarkptr;
339    int Xoffset_top;    int Xoffset_top;
340    long int Xims;    long int Xims;
341    eptrblock *Xeptrb;    eptrblock *Xeptrb;
342    int Xflags;    int Xflags;
343      unsigned int Xrdepth;
344    
345    /* Function local variables */    /* Function local variables */
346    
347    const uschar *Xcallpat;    USPTR Xcallpat;
348    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
349    const uschar *Xdata;    USPTR Xcharptr;
350    const uschar *Xnext;  #endif
351    const uschar *Xpp;    USPTR Xdata;
352    const uschar *Xprev;    USPTR Xnext;
353    const uschar *Xsaved_eptr;    USPTR Xpp;
354      USPTR Xprev;
355      USPTR Xsaved_eptr;
356    
357    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
358    
359    BOOL Xcur_is_word;    BOOL Xcur_is_word;
360    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
361    BOOL Xprev_is_word;    BOOL Xprev_is_word;
362    
363    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
364    
365  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
366    int Xprop_type;    int Xprop_type;
367      int Xprop_value;
368    int Xprop_fail_result;    int Xprop_fail_result;
369    int Xprop_category;    int Xprop_category;
370    int Xprop_chartype;    int Xprop_chartype;
371    int Xprop_othercase;    int Xprop_script;
372    int Xprop_test_against;    int Xoclength;
373    int *Xprop_test_variable;    uschar Xocchars[8];
374  #endif  #endif
375    
376      int Xcodelink;
377    int Xctype;    int Xctype;
378    int Xfc;    unsigned int Xfc;
379    int Xfi;    int Xfi;
380    int Xlength;    int Xlength;
381    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 389  typedef struct heapframe {
389    
390    eptrblock Xnewptrb;    eptrblock Xnewptrb;
391    
392    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
393    
394    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
395    
396  } heapframe;  } heapframe;
397    
# Line 320  typedef struct heapframe { Line 407  typedef struct heapframe {
407  *         Match from current position            *  *         Match from current position            *
408  *************************************************/  *************************************************/
409    
410  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
411  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
412  same response.  same response. */
413    
414    /* These macros pack up tests that are used for partial matching, and which
415    appears several times in the code. We set the "hit end" flag if the pointer is
416    at the end of the subject and also past the start of the subject (i.e.
417    something has been matched). For hard partial matching, we then return
418    immediately. The second one is used when we already know we are past the end of
419    the subject. */
420    
421    #define CHECK_PARTIAL()\
422      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
423        {\
424        md->hitend = TRUE;\
425        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
426        }
427    
428    #define SCHECK_PARTIAL()\
429      if (md->partial != 0 && eptr > mstart)\
430        {\
431        md->hitend = TRUE;\
432        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
433        }
434    
435  Performance note: It might be tempting to extract commonly used fields from the  
436  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
437    the md structure (e.g. utf8, end_subject) into individual variables to improve
438  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
439  made performance worse.  made performance worse.
440    
441  Arguments:  Arguments:
442     eptr        pointer in subject     eptr        pointer to current character in subject
443     ecode       position in code     ecode       pointer to current position in compiled code
444       mstart      pointer to the current match start position (can be modified
445                     by encountering \K)
446       markptr     pointer to the most recent MARK name, or NULL
447     offset_top  current top pointer     offset_top  current top pointer
448     md          pointer to "static" info for the match     md          pointer to "static" info for the match
449     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 451  Arguments:
451                   brackets - for testing for empty matches                   brackets - for testing for empty matches
452     flags       can contain     flags       can contain
453                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
454                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
455                       group that can match an empty string
456       rdepth      the recursion depth
457    
458  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
459                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
460                   a negative MATCH_xxx value for PRUNE, SKIP, etc
461                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
462                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
463  */  */
464    
465  static int  static int
466  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
467    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
468    int flags)    eptrblock *eptrb, int flags, unsigned int rdepth)
469  {  {
470  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
471  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
472  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
473    
474    register int  rrc;         /* Returns from recursive calls */
475    register int  i;           /* Used for loops not involving calls to RMATCH() */
476    register unsigned int c;   /* Character values not kept over RMATCH() calls */
477    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
478    
479  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
480  register int  i;      /* Used for loops not involving calls to RMATCH() */  int condcode;
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
481    
482  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
483  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 377  frame->Xprevframe = NULL;            /* Line 492  frame->Xprevframe = NULL;            /*
492    
493  frame->Xeptr = eptr;  frame->Xeptr = eptr;
494  frame->Xecode = ecode;  frame->Xecode = ecode;
495    frame->Xmstart = mstart;
496    frame->Xmarkptr = markptr;
497  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
498  frame->Xims = ims;  frame->Xims = ims;
499  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
500  frame->Xflags = flags;  frame->Xflags = flags;
501    frame->Xrdepth = rdepth;
502    
503  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
504    
# Line 390  HEAP_RECURSE: Line 508  HEAP_RECURSE:
508    
509  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
510  #define ecode              frame->Xecode  #define ecode              frame->Xecode
511    #define mstart             frame->Xmstart
512    #define markptr            frame->Xmarkptr
513  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
514  #define ims                frame->Xims  #define ims                frame->Xims
515  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
516  #define flags              frame->Xflags  #define flags              frame->Xflags
517    #define rdepth             frame->Xrdepth
518    
519  /* Ditto for the local variables */  /* Ditto for the local variables */
520    
# Line 401  HEAP_RECURSE: Line 522  HEAP_RECURSE:
522  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
523  #endif  #endif
524  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
525    #define codelink           frame->Xcodelink
526  #define data               frame->Xdata  #define data               frame->Xdata
527  #define next               frame->Xnext  #define next               frame->Xnext
528  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 411  HEAP_RECURSE: Line 533  HEAP_RECURSE:
533    
534  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
535  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
536  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
537    
538  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
539    
540  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
541  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
542    #define prop_value         frame->Xprop_value
543  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
544  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
545  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
546  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
547  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
548  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
549  #endif  #endif
550    
551  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 569  HEAP_RECURSE:
569  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
570  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
571    
572  #else  #else         /* NO_RECURSE not defined */
573  #define fi i  #define fi i
574  #define fc c  #define fc c
575    
576    
577  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
578  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
579  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
580  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
581  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
582  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
583  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
584  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
585  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
586                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
587  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
588                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
589  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
590  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
591  BOOL prev_is_word;  BOOL prev_is_word;
592    
593  unsigned long int original_ims;  unsigned long int original_ims;
594    
595  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
596  int prop_type;  int prop_type;
597    int prop_value;
598  int prop_fail_result;  int prop_fail_result;
599  int prop_category;  int prop_category;
600  int prop_chartype;  int prop_chartype;
601  int prop_othercase;  int prop_script;
602  int prop_test_against;  int oclength;
603  int *prop_test_variable;  uschar occhars[8];
604  #endif  #endif
605    
606    int codelink;
607  int ctype;  int ctype;
608  int length;  int length;
609  int max;  int max;
# Line 493  int save_offset1, save_offset2, save_off Line 616  int save_offset1, save_offset2, save_off
616  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
617    
618  eptrblock newptrb;  eptrblock newptrb;
619  #endif  #endif     /* NO_RECURSE */
620    
621  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
622  variables. */  variables. */
623    
624  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
625    prop_value = 0;
626  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
627  #endif  #endif
628    
629  /* OK, now we can get on with the real code of the function. Recursion is  
630  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
631  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
632  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
633  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
634  performance when true recursion is being used. */  
635    TAIL_RECURSE:
636    
637    /* OK, now we can get on with the real code of the function. Recursive calls
638    are specified by the macro RMATCH and RRETURN is used to return. When
639    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
640    and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
641    defined). However, RMATCH isn't like a function call because it's quite a
642    complicated macro. It has to be used in one particular way. This shouldn't,
643    however, impact performance when true recursion is being used. */
644    
645    #ifdef SUPPORT_UTF8
646    utf8 = md->utf8;       /* Local copy of the flag */
647    #else
648    utf8 = FALSE;
649    #endif
650    
651    /* First check that we haven't called match() too many times, or that we
652    haven't exceeded the recursive call limit. */
653    
654  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
655    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
656    
657  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
658    
659  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
660  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
661  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
662  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
663    When match() is called in other circumstances, don't add to the chain. The
664    match_cbegroup flag must NOT be used with tail recursion, because the memory
665    block that is used is on the stack, so a new one may be required for each
666    match(). */
667    
668  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
669    {    {
   newptrb.epb_prev = eptrb;  
670    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
671      newptrb.epb_prev = eptrb;
672    eptrb = &newptrb;    eptrb = &newptrb;
673    }    }
674    
675  /* Now start processing the operations. */  /* Now start processing the opcodes. */
676    
677  for (;;)  for (;;)
678    {    {
679      minimize = possessive = FALSE;
680    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
681    
682    if (op > OP_BRA)    switch(op)
683      {      {
684      number = op - OP_BRA;      case OP_MARK:
685        markptr = ecode + 2;
686      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
687      number from a dummy opcode at the start. */        ims, eptrb, flags, RM51);
688    
689      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
690        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
691        argument. It is passed back in md->start_match_ptr (an overloading of that
692        variable). If it does match, we reset that variable to the current subject
693        position and return MATCH_SKIP. Otherwise, pass back the return code
694        unaltered. */
695    
696        if (rrc == MATCH_SKIP_ARG &&
697            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
698          {
699          md->start_match_ptr = eptr;
700          RRETURN(MATCH_SKIP);
701          }
702    
703        if (md->mark == NULL) md->mark = markptr;
704        RRETURN(rrc);
705    
706        case OP_FAIL:
707        MRRETURN(MATCH_NOMATCH);
708    
709        case OP_COMMIT:
710        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
711          ims, eptrb, flags, RM52);
712        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
713        MRRETURN(MATCH_COMMIT);
714    
715        case OP_PRUNE:
716        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717          ims, eptrb, flags, RM51);
718        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
719        MRRETURN(MATCH_PRUNE);
720    
721        case OP_PRUNE_ARG:
722        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
723          ims, eptrb, flags, RM51);
724        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
725        md->mark = ecode + 2;
726        RRETURN(MATCH_PRUNE);
727    
728        case OP_SKIP:
729        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
730          ims, eptrb, flags, RM53);
731        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
732        md->start_match_ptr = eptr;   /* Pass back current position */
733        MRRETURN(MATCH_SKIP);
734    
735        case OP_SKIP_ARG:
736        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
737          ims, eptrb, flags, RM53);
738        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
739    
740        /* Pass back the current skip name by overloading md->start_match_ptr and
741        returning the special MATCH_SKIP_ARG return code. This will either be
742        caught by a matching MARK, or get to the top, where it is treated the same
743        as PRUNE. */
744    
745        md->start_match_ptr = ecode + 2;
746        RRETURN(MATCH_SKIP_ARG);
747    
748        case OP_THEN:
749        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
750          ims, eptrb, flags, RM54);
751        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
752        MRRETURN(MATCH_THEN);
753    
754        case OP_THEN_ARG:
755        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
756          ims, eptrb, flags, RM54);
757        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
758        md->mark = ecode + 2;
759        RRETURN(MATCH_THEN);
760    
761        /* Handle a capturing bracket. If there is space in the offset vector, save
762        the current subject position in the working slot at the top of the vector.
763        We mustn't change the current values of the data slot, because they may be
764        set from a previous iteration of this group, and be referred to by a
765        reference inside the group.
766    
767        If the bracket fails to match, we need to restore this value and also the
768        values of the final offsets, in case they were set by a previous iteration
769        of the same bracket.
770    
771        If there isn't enough space in the offset vector, treat this as if it were
772        a non-capturing bracket. Don't worry about setting the flag for the error
773        case here; that is handled in the code for KET. */
774    
775        case OP_CBRA:
776        case OP_SCBRA:
777        number = GET2(ecode, 1+LINK_SIZE);
778      offset = number << 1;      offset = number << 1;
779    
780  #ifdef DEBUG  #ifdef PCRE_DEBUG
781      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
782        printf("subject=");
783      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
784      printf("\n");      printf("\n");
785  #endif  #endif
# Line 584  for (;;) Line 794  for (;;)
794        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
795        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
796    
797          flags = (op == OP_SCBRA)? match_cbegroup : 0;
798        do        do
799          {          {
800          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
801            match_isgroup);            ims, eptrb, flags, RM1);
802          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
804          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
805          }          }
# Line 600  for (;;) Line 811  for (;;)
811        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
812        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
813    
814          if (rrc != MATCH_THEN) md->mark = markptr;
815        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
816        }        }
817    
818      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
819        as a non-capturing bracket. */
820    
821      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
822      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823    
824    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
825    
826    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
827      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828      case OP_BRA:     /* Non-capturing bracket: optimized */  
829      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
830      do      final alternative within the brackets, we would return the result of a
831        recursive call to match() whatever happened. We can reduce stack usage by
832        turning this into a tail recursion, except in the case when match_cbegroup
833        is set.*/
834    
835        case OP_BRA:
836        case OP_SBRA:
837        DPRINTF(("start non-capturing bracket\n"));
838        flags = (op >= OP_SBRA)? match_cbegroup : 0;
839        for (;;)
840        {        {
841        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
842          match_isgroup);          {
843        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
844              {
845              ecode += _pcre_OP_lengths[*ecode];
846              DPRINTF(("bracket 0 tail recursion\n"));
847              goto TAIL_RECURSE;
848              }
849    
850            /* Possibly empty group; can't use tail recursion. */
851    
852            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
853              eptrb, flags, RM48);
854            if (rrc == MATCH_NOMATCH) md->mark = markptr;
855            RRETURN(rrc);
856            }
857    
858          /* For non-final alternatives, continue the loop for a NOMATCH result;
859          otherwise return. */
860    
861          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
862            eptrb, flags, RM2);
863          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
864        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
865        }        }
866      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
867    
868      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
869      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
870      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
871      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
872        obeyed, we can use tail recursion to avoid using another stack frame. */
873    
874      case OP_COND:      case OP_COND:
875      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
876        codelink= GET(ecode, 1);
877    
878        /* Because of the way auto-callout works during compile, a callout item is
879        inserted between OP_COND and an assertion condition. */
880    
881        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
882          {
883          if (pcre_callout != NULL)
884            {
885            pcre_callout_block cb;
886            cb.version          = 1;   /* Version 1 of the callout block */
887            cb.callout_number   = ecode[LINK_SIZE+2];
888            cb.offset_vector    = md->offset_vector;
889            cb.subject          = (PCRE_SPTR)md->start_subject;
890            cb.subject_length   = md->end_subject - md->start_subject;
891            cb.start_match      = mstart - md->start_subject;
892            cb.current_position = eptr - md->start_subject;
893            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
894            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
895            cb.capture_top      = offset_top/2;
896            cb.capture_last     = md->capture_last;
897            cb.callout_data     = md->callout_data;
898            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
899            if (rrc < 0) RRETURN(rrc);
900            }
901          ecode += _pcre_OP_lengths[OP_CALLOUT];
902          }
903    
904        condcode = ecode[LINK_SIZE+1];
905    
906        /* Now see what the actual condition is */
907    
908        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
909          {
910          if (md->recursive == NULL)                /* Not recursing => FALSE */
911            {
912            condition = FALSE;
913            ecode += GET(ecode, 1);
914            }
915          else
916            {
917            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
918            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
919    
920            /* If the test is for recursion into a specific subpattern, and it is
921            false, but the test was set up by name, scan the table to see if the
922            name refers to any other numbers, and test them. The condition is true
923            if any one is set. */
924    
925            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
926              {
927              uschar *slotA = md->name_table;
928              for (i = 0; i < md->name_count; i++)
929                {
930                if (GET2(slotA, 0) == recno) break;
931                slotA += md->name_entry_size;
932                }
933    
934              /* Found a name for the number - there can be only one; duplicate
935              names for different numbers are allowed, but not vice versa. First
936              scan down for duplicates. */
937    
938              if (i < md->name_count)
939                {
940                uschar *slotB = slotA;
941                while (slotB > md->name_table)
942                  {
943                  slotB -= md->name_entry_size;
944                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
945                    {
946                    condition = GET2(slotB, 0) == md->recursive->group_num;
947                    if (condition) break;
948                    }
949                  else break;
950                  }
951    
952                /* Scan up for duplicates */
953    
954                if (!condition)
955                  {
956                  slotB = slotA;
957                  for (i++; i < md->name_count; i++)
958                    {
959                    slotB += md->name_entry_size;
960                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961                      {
962                      condition = GET2(slotB, 0) == md->recursive->group_num;
963                      if (condition) break;
964                      }
965                    else break;
966                    }
967                  }
968                }
969              }
970    
971            /* Chose branch according to the condition */
972    
973            ecode += condition? 3 : GET(ecode, 1);
974            }
975          }
976    
977        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
978        {        {
979        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
980        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
981          (md->recursive != NULL) :  
982          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
983        RMATCH(rrc, eptr, ecode + (condition?        scan the table to see if the name refers to any other numbers, and test
984          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        them. The condition is true if any one is set. This is tediously similar
985          offset_top, md, ims, eptrb, match_isgroup);        to the code above, but not close enough to try to amalgamate. */
986        RRETURN(rrc);  
987          if (!condition && condcode == OP_NCREF)
988            {
989            int refno = offset >> 1;
990            uschar *slotA = md->name_table;
991    
992            for (i = 0; i < md->name_count; i++)
993              {
994              if (GET2(slotA, 0) == refno) break;
995              slotA += md->name_entry_size;
996              }
997    
998            /* Found a name for the number - there can be only one; duplicate names
999            for different numbers are allowed, but not vice versa. First scan down
1000            for duplicates. */
1001    
1002            if (i < md->name_count)
1003              {
1004              uschar *slotB = slotA;
1005              while (slotB > md->name_table)
1006                {
1007                slotB -= md->name_entry_size;
1008                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1009                  {
1010                  offset = GET2(slotB, 0) << 1;
1011                  condition = offset < offset_top &&
1012                    md->offset_vector[offset] >= 0;
1013                  if (condition) break;
1014                  }
1015                else break;
1016                }
1017    
1018              /* Scan up for duplicates */
1019    
1020              if (!condition)
1021                {
1022                slotB = slotA;
1023                for (i++; i < md->name_count; i++)
1024                  {
1025                  slotB += md->name_entry_size;
1026                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1027                    {
1028                    offset = GET2(slotB, 0) << 1;
1029                    condition = offset < offset_top &&
1030                      md->offset_vector[offset] >= 0;
1031                    if (condition) break;
1032                    }
1033                  else break;
1034                  }
1035                }
1036              }
1037            }
1038    
1039          /* Chose branch according to the condition */
1040    
1041          ecode += condition? 3 : GET(ecode, 1);
1042          }
1043    
1044        else if (condcode == OP_DEF)     /* DEFINE - always false */
1045          {
1046          condition = FALSE;
1047          ecode += GET(ecode, 1);
1048        }        }
1049    
1050      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1051      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1052        assertion. */
1053    
1054      else      else
1055        {        {
1056        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1057            match_condassert | match_isgroup);            match_condassert, RM3);
1058        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1059          {          {
1060          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1061            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1062          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1063          }          }
1064        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1065          {          {
1066          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1067          }          }
1068        else ecode += GET(ecode, 1);        else
1069        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
1070          match_isgroup);          condition = FALSE;
1071        RRETURN(rrc);          ecode += codelink;
1072            }
1073        }        }
     /* Control never reaches here */  
1074    
1075      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
1076      encountered. */      we can use tail recursion to avoid using another stack frame, except when
1077        match_cbegroup is required for an unlimited repeat of a possibly empty
1078        group. If the second alternative doesn't exist, we can just plough on. */
1079    
1080        if (condition || *ecode == OP_ALT)
1081          {
1082          ecode += 1 + LINK_SIZE;
1083          if (op == OP_SCOND)        /* Possibly empty group */
1084            {
1085            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1086            RRETURN(rrc);
1087            }
1088          else                       /* Group must match something */
1089            {
1090            flags = 0;
1091            goto TAIL_RECURSE;
1092            }
1093          }
1094        else                         /* Condition false & no alternative */
1095          {
1096          ecode += 1 + LINK_SIZE;
1097          }
1098        break;
1099    
1100    
1101        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1102        to close any currently open capturing brackets. */
1103    
1104        case OP_CLOSE:
1105        number = GET2(ecode, 1);
1106        offset = number << 1;
1107    
1108    #ifdef PCRE_DEBUG
1109          printf("end bracket %d at *ACCEPT", number);
1110          printf("\n");
1111    #endif
1112    
1113      case OP_CREF:      md->capture_last = number;
1114      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1115          {
1116          md->offset_vector[offset] =
1117            md->offset_vector[md->offset_end - number];
1118          md->offset_vector[offset+1] = eptr - md->start_subject;
1119          if (offset_top <= offset) offset_top = offset + 2;
1120          }
1121      ecode += 3;      ecode += 3;
1122      break;      break;
1123    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1124    
1125        /* End of the pattern, either real or forced. If we are in a top-level
1126        recursion, we should restore the offsets appropriately and continue from
1127        after the call. */
1128    
1129        case OP_ACCEPT:
1130      case OP_END:      case OP_END:
1131      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1132        {        {
1133        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
1134        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
1135        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1136        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1137          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1138        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1139        ims = original_ims;        ims = original_ims;
1140        ecode = rec->after_call;        ecode = rec->after_call;
1141        break;        break;
1142        }        }
1143    
1144      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1145      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1146        the subject. In both cases, backtracking will then try other alternatives,
1147      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if any. */
1148      md->end_match_ptr = eptr;          /* Record where we ended */  
1149      md->end_offset_top = offset_top;   /* and how many extracts were taken */      if (eptr == mstart &&
1150      RRETURN(MATCH_MATCH);          (md->notempty ||
1151              (md->notempty_atstart &&
1152                mstart == md->start_subject + md->start_offset)))
1153          MRRETURN(MATCH_NOMATCH);
1154    
1155        /* Otherwise, we have a match. */
1156    
1157        md->end_match_ptr = eptr;           /* Record where we ended */
1158        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1159        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1160        MRRETURN(MATCH_MATCH);
1161    
1162      /* Change option settings */      /* Change option settings */
1163    
# Line 717  for (;;) Line 1177  for (;;)
1177      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1178      do      do
1179        {        {
1180        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1181          match_isgroup);          RM4);
1182        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1183        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1184            mstart = md->start_match_ptr;   /* In case \K reset it */
1185            break;
1186            }
1187          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1188        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1189        }        }
1190      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1191      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1192    
1193      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1194    
# Line 738  for (;;) Line 1202  for (;;)
1202      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1203      continue;      continue;
1204    
1205      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1206        PRUNE, or COMMIT means we must assume failure without checking subsequent
1207        branches. */
1208    
1209      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1210      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1211      do      do
1212        {        {
1213        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214          match_isgroup);          RM5);
1215        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) MRRETURN(MATCH_NOMATCH);
1216        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1217            {
1218            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1219            break;
1220            }
1221          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1222        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1223        }        }
1224      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 766  for (;;) Line 1237  for (;;)
1237  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1238      if (utf8)      if (utf8)
1239        {        {
1240        c = GET(ecode,1);        i = GET(ecode, 1);
1241        for (i = 0; i < c; i++)        while (i-- > 0)
1242          {          {
1243          eptr--;          eptr--;
1244          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1245          BACKCHAR(eptr)          BACKCHAR(eptr);
1246          }          }
1247        }        }
1248      else      else
# Line 780  for (;;) Line 1251  for (;;)
1251      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1252    
1253        {        {
1254        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1255        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1256        }        }
1257    
1258      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1259    
1260        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1261      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1262      break;      break;
1263    
# Line 800  for (;;) Line 1272  for (;;)
1272        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
1273        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1274        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1275        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1276        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1277        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1278        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1279        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1280        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1281        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1282        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1283        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1284        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1285        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1286        }        }
1287      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 837  for (;;) Line 1309  for (;;)
1309      case OP_RECURSE:      case OP_RECURSE:
1310        {        {
1311        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1312        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1313            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1314    
1315        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1316    
# Line 869  for (;;) Line 1336  for (;;)
1336    
1337        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1338              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1339        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1340    
1341        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1342        restore the offset and recursion data. */        restore the offset and recursion data. */
1343    
1344        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1345          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1346        do        do
1347          {          {
1348          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1349              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1350          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1351            {            {
1352              DPRINTF(("Recursion matched\n"));
1353            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1354            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1355              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1356            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1357              }
1358            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1359              {
1360              DPRINTF(("Recursion gave error %d\n", rrc));
1361              if (new_recursive.offset_save != stacksave)
1362                (pcre_free)(new_recursive.offset_save);
1363              RRETURN(rrc);
1364            }            }
         else if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1365    
1366          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1367          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 900  for (;;) Line 1374  for (;;)
1374        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1375        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1376          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1377        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1378        }        }
1379      /* Control never reaches here */      /* Control never reaches here */
1380    
# Line 909  for (;;) Line 1383  for (;;)
1383      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1384      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1385      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1386      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1387        the start-of-match value in case it was changed by \K. */
1388    
1389      case OP_ONCE:      case OP_ONCE:
1390        {      prev = ecode;
1391        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1392    
1393        do      do
1394          {
1395          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1396          if (rrc == MATCH_MATCH)
1397          {          {
1398          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          mstart = md->start_match_ptr;
1399            eptrb, match_isgroup);          break;
         if (rrc == MATCH_MATCH) break;  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         ecode += GET(ecode,1);  
1400          }          }
1401        while (*ecode == OP_ALT);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1402          ecode += GET(ecode,1);
1403          }
1404        while (*ecode == OP_ALT);
1405    
1406        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1407    
1408        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1409    
1410        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1411        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1412    
1413        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1414    
1415        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1416        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1417    
1418        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1419        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1420        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1421        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1422        course of events. */      course of events. */
1423    
1424        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1425          {        {
1426          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1427          break;        break;
1428          }        }
1429    
1430        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1431        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1432        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1433        opcode. */      any options that changed within the bracket before re-running it, so
1434        check the next opcode. */
1435    
1436        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1437          {        {
1438          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1439          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1440          }        }
1441    
1442        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1443          {        {
1444          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1445          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1446          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1447          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1448          }        goto TAIL_RECURSE;
1449        else  /* OP_KETRMAX */        }
1450          {      else  /* OP_KETRMAX */
1451          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1452          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1453          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1454          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1455          }        flags = 0;
1456          goto TAIL_RECURSE;
1457        }        }
1458      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1459    
1460      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1461      bracketed group and go to there. */      bracketed group and go to there. */
# Line 985  for (;;) Line 1464  for (;;)
1464      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1465      break;      break;
1466    
1467      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1468      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1469      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1470      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1471      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1472    
1473      case OP_BRAZERO:      case OP_BRAZERO:
1474        {        {
1475        next = ecode+1;        next = ecode+1;
1476        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1477        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1478        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1479        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1480        }        }
1481      break;      break;
1482    
1483      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1484        {        {
1485        next = ecode+1;        next = ecode+1;
1486        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1487        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1488        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1489        ecode++;        ecode++;
1490        }        }
1491      break;      break;
1492    
1493      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1494      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1495      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1496      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1497          ecode = next + 1 + LINK_SIZE;
1498          }
1499        break;
1500    
1501        /* End of a group, repeated or non-repeating. */
1502    
1503      case OP_KET:      case OP_KET:
1504      case OP_KETRMIN:      case OP_KETRMIN:
1505      case OP_KETRMAX:      case OP_KETRMAX:
1506        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1507    
1508        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1509        infinite repeats of empty string matches, retrieve the subject start from
1510        the chain. Otherwise, set it NULL. */
1511    
1512        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1513          {
1514        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1515            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1516            *prev == OP_ONCE)        }
1517          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1518    
1519        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group or an atomic group, stop
1520        group number back at the start and if necessary complete handling an      matching and return MATCH_MATCH, but record the current high water mark for
1521        extraction by setting the offsets and bumping the high water mark. */      use by positive assertions. We also need to record the match start in case
1522        it was changed by \K. */
1523    
1524        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1525          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1526          number = *prev - OP_BRA;          *prev == OP_ONCE)
1527          {
1528          md->end_match_ptr = eptr;      /* For ONCE */
1529          md->end_offset_top = offset_top;
1530          md->start_match_ptr = mstart;
1531          MRRETURN(MATCH_MATCH);
1532          }
1533    
1534          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1535          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1536        bumping the high water mark. Note that whole-pattern recursion is coded as
1537        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1538        when the OP_END is reached. Other recursion is handled here. */
1539    
1540          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1541          offset = number << 1;        {
1542          number = GET2(prev, 1+LINK_SIZE);
1543          offset = number << 1;
1544    
1545  #ifdef DEBUG  #ifdef PCRE_DEBUG
1546          printf("end bracket %d", number);        printf("end bracket %d", number);
1547          printf("\n");        printf("\n");
1548  #endif  #endif
1549    
1550          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1551          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1552          into group 0, so it won't be picked up here. Instead, we catch it when          {
1553          the OP_END is reached. */          md->offset_vector[offset] =
1554              md->offset_vector[md->offset_end - number];
1555            md->offset_vector[offset+1] = eptr - md->start_subject;
1556            if (offset_top <= offset) offset_top = offset + 2;
1557            }
1558    
1559          /* Handle a recursively called group. Restore the offsets
1560          appropriately and continue from after the call. */
1561    
1562          if (md->recursive != NULL && md->recursive->group_num == number)
1563            {
1564            recursion_info *rec = md->recursive;
1565            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1566            md->recursive = rec->prevrec;
1567            memcpy(md->offset_vector, rec->offset_save,
1568              rec->saved_max * sizeof(int));
1569            offset_top = rec->save_offset_top;
1570            ecode = rec->after_call;
1571            ims = original_ims;
1572            break;
1573            }
1574          }
1575    
1576          if (number > 0)      /* For both capturing and non-capturing groups, reset the value of the ims
1577            {      flags, in case they got changed during the group. */
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
1578    
1579            /* Handle a recursively called group. Restore the offsets      ims = original_ims;
1580            appropriately and continue from after the call. */      DPRINTF(("ims reset to %02lx\n", ims));
1581    
1582            if (md->recursive != NULL && md->recursive->group_num == number)      /* For a non-repeating ket, just continue at this level. This also
1583              {      happens for a repeating ket if no characters were matched in the group.
1584              recursion_info *rec = md->recursive;      This is the forcible breaking of infinite loops as implemented in Perl
1585              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));      5.005. If there is an options reset, it will get obeyed in the normal
1586              md->recursive = rec->prevrec;      course of events. */
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
         }  
1587    
1588        /* Reset the value of the ims flags, in case they got changed during      if (*ecode == OP_KET || eptr == saved_eptr)
1589        the group. */        {
1590          ecode += 1 + LINK_SIZE;
1591          break;
1592          }
1593    
1594        ims = original_ims;      /* The repeating kets try the rest of the pattern or restart from the
1595        DPRINTF(("ims reset to %02lx\n", ims));      preceding bracket, in the appropriate order. In the second case, we can use
1596        tail recursion to avoid using another stack frame, unless we have an
1597        unlimited repeat of a group that can match an empty string. */
1598    
1599        /* For a non-repeating ket, just continue at this level. This also      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
1600    
1601        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KETRMIN)
1602          {
1603          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1604          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1605          if (flags != 0)    /* Could match an empty string */
1606          {          {
1607          ecode += 1 + LINK_SIZE;          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1608          break;          RRETURN(rrc);
         }  
   
       /* The repeating kets try the rest of the pattern or restart from the  
       preceding bracket, in the appropriate order. */  
   
       if (*ecode == OP_KETRMIN)  
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1609          }          }
1610          ecode = prev;
1611          goto TAIL_RECURSE;
1612        }        }
1613        else  /* OP_KETRMAX */
1614      RRETURN(MATCH_NOMATCH);        {
1615          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1616          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1617          ecode += 1 + LINK_SIZE;
1618          flags = 0;
1619          goto TAIL_RECURSE;
1620          }
1621        /* Control never gets here */
1622    
1623      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1624    
1625      case OP_CIRC:      case OP_CIRC:
1626      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1627      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1628        {        {
1629        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1630          RRETURN(MATCH_NOMATCH);            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1631            MRRETURN(MATCH_NOMATCH);
1632        ecode++;        ecode++;
1633        break;        break;
1634        }        }
# Line 1145  for (;;) Line 1637  for (;;)
1637      /* Start of subject assertion */      /* Start of subject assertion */
1638    
1639      case OP_SOD:      case OP_SOD:
1640      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1641      ecode++;      ecode++;
1642      break;      break;
1643    
1644      /* Start of match assertion */      /* Start of match assertion */
1645    
1646      case OP_SOM:      case OP_SOM:
1647      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1648        ecode++;
1649        break;
1650    
1651        /* Reset the start of match point */
1652    
1653        case OP_SET_SOM:
1654        mstart = eptr;
1655      ecode++;      ecode++;
1656      break;      break;
1657    
# Line 1163  for (;;) Line 1662  for (;;)
1662      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1663        {        {
1664        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1665          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1666        else        else
1667          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1668        ecode++;        ecode++;
1669        break;        break;
1670        }        }
1671      else      else
1672        {        {
1673        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1674        if (!md->endonly)        if (!md->endonly)
1675          {          {
1676          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1677             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1678            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1679          ecode++;          ecode++;
1680          break;          break;
1681          }          }
1682        }        }
1683      /* ... else fall through */      /* ... else fall through for endonly */
1684    
1685      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1686    
1687      case OP_EOD:      case OP_EOD:
1688      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1689      ecode++;      ecode++;
1690      break;      break;
1691    
1692      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1693    
1694      case OP_EODN:      case OP_EODN:
1695      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1696         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1697          MRRETURN(MATCH_NOMATCH);
1698      ecode++;      ecode++;
1699      break;      break;
1700    
# Line 1206  for (;;) Line 1706  for (;;)
1706    
1707        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1708        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1709        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1710          partial matching. */
1711    
1712  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1713        if (utf8)        if (utf8)
1714          {          {
1715          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1716            {            {
1717            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1718            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1719              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1720            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1721            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1722            }            }
1723          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1724              {
1725              SCHECK_PARTIAL();
1726              cur_is_word = FALSE;
1727              }
1728            else
1729            {            {
1730            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1731            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1227  for (;;) Line 1734  for (;;)
1734        else        else
1735  #endif  #endif
1736    
1737        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1738    
1739          {          {
1740          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1741            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1742          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1743            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1744              }
1745            if (eptr >= md->end_subject)
1746              {
1747              SCHECK_PARTIAL();
1748              cur_is_word = FALSE;
1749              }
1750            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1751          }          }
1752    
1753        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1754    
1755        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1756             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1757          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1758        }        }
1759      break;      break;
1760    
1761      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1762    
1763      case OP_ANY:      case OP_ANY:
1764      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1765        RRETURN(MATCH_NOMATCH);      /* Fall through */
1766      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);  
1767  #ifdef SUPPORT_UTF8      case OP_ALLANY:
1768      if (utf8)      if (eptr++ >= md->end_subject)
1769        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        {
1770  #endif        SCHECK_PARTIAL();
1771          MRRETURN(MATCH_NOMATCH);
1772          }
1773        if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1774      ecode++;      ecode++;
1775      break;      break;
1776    
# Line 1261  for (;;) Line 1778  for (;;)
1778      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1779    
1780      case OP_ANYBYTE:      case OP_ANYBYTE:
1781      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1782          {
1783          SCHECK_PARTIAL();
1784          MRRETURN(MATCH_NOMATCH);
1785          }
1786      ecode++;      ecode++;
1787      break;      break;
1788    
1789      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1790      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1791          {
1792          SCHECK_PARTIAL();
1793          MRRETURN(MATCH_NOMATCH);
1794          }
1795      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1796      if (      if (
1797  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1274  for (;;) Line 1799  for (;;)
1799  #endif  #endif
1800         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1801         )         )
1802        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1803      ecode++;      ecode++;
1804      break;      break;
1805    
1806      case OP_DIGIT:      case OP_DIGIT:
1807      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1808          {
1809          SCHECK_PARTIAL();
1810          MRRETURN(MATCH_NOMATCH);
1811          }
1812      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1813      if (      if (
1814  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1287  for (;;) Line 1816  for (;;)
1816  #endif  #endif
1817         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1818         )         )
1819        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1820      ecode++;      ecode++;
1821      break;      break;
1822    
1823      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1824      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1825          {
1826          SCHECK_PARTIAL();
1827          MRRETURN(MATCH_NOMATCH);
1828          }
1829      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1830      if (      if (
1831  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1300  for (;;) Line 1833  for (;;)
1833  #endif  #endif
1834         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1835         )         )
1836        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1837      ecode++;      ecode++;
1838      break;      break;
1839    
1840      case OP_WHITESPACE:      case OP_WHITESPACE:
1841      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1842          {
1843          SCHECK_PARTIAL();
1844          MRRETURN(MATCH_NOMATCH);
1845          }
1846      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1847      if (      if (
1848  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1313  for (;;) Line 1850  for (;;)
1850  #endif  #endif
1851         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1852         )         )
1853        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1854      ecode++;      ecode++;
1855      break;      break;
1856    
1857      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1858      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1859          {
1860          SCHECK_PARTIAL();
1861          MRRETURN(MATCH_NOMATCH);
1862          }
1863      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1864      if (      if (
1865  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1326  for (;;) Line 1867  for (;;)
1867  #endif  #endif
1868         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1869         )         )
1870        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1871      ecode++;      ecode++;
1872      break;      break;
1873    
1874      case OP_WORDCHAR:      case OP_WORDCHAR:
1875      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1876          {
1877          SCHECK_PARTIAL();
1878          MRRETURN(MATCH_NOMATCH);
1879          }
1880      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1881      if (      if (
1882  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1339  for (;;) Line 1884  for (;;)
1884  #endif  #endif
1885         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1886         )         )
1887        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1888        ecode++;
1889        break;
1890    
1891        case OP_ANYNL:
1892        if (eptr >= md->end_subject)
1893          {
1894          SCHECK_PARTIAL();
1895          MRRETURN(MATCH_NOMATCH);
1896          }
1897        GETCHARINCTEST(c, eptr);
1898        switch(c)
1899          {
1900          default: MRRETURN(MATCH_NOMATCH);
1901          case 0x000d:
1902          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1903          break;
1904    
1905          case 0x000a:
1906          break;
1907    
1908          case 0x000b:
1909          case 0x000c:
1910          case 0x0085:
1911          case 0x2028:
1912          case 0x2029:
1913          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1914          break;
1915          }
1916        ecode++;
1917        break;
1918    
1919        case OP_NOT_HSPACE:
1920        if (eptr >= md->end_subject)
1921          {
1922          SCHECK_PARTIAL();
1923          MRRETURN(MATCH_NOMATCH);
1924          }
1925        GETCHARINCTEST(c, eptr);
1926        switch(c)
1927          {
1928          default: break;
1929          case 0x09:      /* HT */
1930          case 0x20:      /* SPACE */
1931          case 0xa0:      /* NBSP */
1932          case 0x1680:    /* OGHAM SPACE MARK */
1933          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1934          case 0x2000:    /* EN QUAD */
1935          case 0x2001:    /* EM QUAD */
1936          case 0x2002:    /* EN SPACE */
1937          case 0x2003:    /* EM SPACE */
1938          case 0x2004:    /* THREE-PER-EM SPACE */
1939          case 0x2005:    /* FOUR-PER-EM SPACE */
1940          case 0x2006:    /* SIX-PER-EM SPACE */
1941          case 0x2007:    /* FIGURE SPACE */
1942          case 0x2008:    /* PUNCTUATION SPACE */
1943          case 0x2009:    /* THIN SPACE */
1944          case 0x200A:    /* HAIR SPACE */
1945          case 0x202f:    /* NARROW NO-BREAK SPACE */
1946          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1947          case 0x3000:    /* IDEOGRAPHIC SPACE */
1948          MRRETURN(MATCH_NOMATCH);
1949          }
1950        ecode++;
1951        break;
1952    
1953        case OP_HSPACE:
1954        if (eptr >= md->end_subject)
1955          {
1956          SCHECK_PARTIAL();
1957          MRRETURN(MATCH_NOMATCH);
1958          }
1959        GETCHARINCTEST(c, eptr);
1960        switch(c)
1961          {
1962          default: MRRETURN(MATCH_NOMATCH);
1963          case 0x09:      /* HT */
1964          case 0x20:      /* SPACE */
1965          case 0xa0:      /* NBSP */
1966          case 0x1680:    /* OGHAM SPACE MARK */
1967          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1968          case 0x2000:    /* EN QUAD */
1969          case 0x2001:    /* EM QUAD */
1970          case 0x2002:    /* EN SPACE */
1971          case 0x2003:    /* EM SPACE */
1972          case 0x2004:    /* THREE-PER-EM SPACE */
1973          case 0x2005:    /* FOUR-PER-EM SPACE */
1974          case 0x2006:    /* SIX-PER-EM SPACE */
1975          case 0x2007:    /* FIGURE SPACE */
1976          case 0x2008:    /* PUNCTUATION SPACE */
1977          case 0x2009:    /* THIN SPACE */
1978          case 0x200A:    /* HAIR SPACE */
1979          case 0x202f:    /* NARROW NO-BREAK SPACE */
1980          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1981          case 0x3000:    /* IDEOGRAPHIC SPACE */
1982          break;
1983          }
1984        ecode++;
1985        break;
1986    
1987        case OP_NOT_VSPACE:
1988        if (eptr >= md->end_subject)
1989          {
1990          SCHECK_PARTIAL();
1991          MRRETURN(MATCH_NOMATCH);
1992          }
1993        GETCHARINCTEST(c, eptr);
1994        switch(c)
1995          {
1996          default: break;
1997          case 0x0a:      /* LF */
1998          case 0x0b:      /* VT */
1999          case 0x0c:      /* FF */
2000          case 0x0d:      /* CR */
2001          case 0x85:      /* NEL */
2002          case 0x2028:    /* LINE SEPARATOR */
2003          case 0x2029:    /* PARAGRAPH SEPARATOR */
2004          MRRETURN(MATCH_NOMATCH);
2005          }
2006        ecode++;
2007        break;
2008    
2009        case OP_VSPACE:
2010        if (eptr >= md->end_subject)
2011          {
2012          SCHECK_PARTIAL();
2013          MRRETURN(MATCH_NOMATCH);
2014          }
2015        GETCHARINCTEST(c, eptr);
2016        switch(c)
2017          {
2018          default: MRRETURN(MATCH_NOMATCH);
2019          case 0x0a:      /* LF */
2020          case 0x0b:      /* VT */
2021          case 0x0c:      /* FF */
2022          case 0x0d:      /* CR */
2023          case 0x85:      /* NEL */
2024          case 0x2028:    /* LINE SEPARATOR */
2025          case 0x2029:    /* PARAGRAPH SEPARATOR */
2026          break;
2027          }
2028      ecode++;      ecode++;
2029      break;      break;
2030    
# Line 1349  for (;;) Line 2034  for (;;)
2034    
2035      case OP_PROP:      case OP_PROP:
2036      case OP_NOTPROP:      case OP_NOTPROP:
2037      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2038          {
2039          SCHECK_PARTIAL();
2040          MRRETURN(MATCH_NOMATCH);
2041          }
2042      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2043        {        {
2044        int chartype, rqdtype;        const ucd_record *prop = GET_UCD(c);
       int othercase;  
       int category = ucp_findchar(c, &chartype, &othercase);  
2045    
2046        rqdtype = *(++ecode);        switch(ecode[1])
       ecode++;  
   
       if (rqdtype >= 128)  
         {  
         if ((rqdtype - 128 != category) == (op == OP_PROP))  
           RRETURN(MATCH_NOMATCH);  
         }  
       else  
2047          {          {
2048          if ((rqdtype != chartype) == (op == OP_PROP))          case PT_ANY:
2049            RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2050            break;
2051    
2052            case PT_LAMP:
2053            if ((prop->chartype == ucp_Lu ||
2054                 prop->chartype == ucp_Ll ||
2055                 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2056              MRRETURN(MATCH_NOMATCH);
2057             break;
2058    
2059            case PT_GC:
2060            if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2061              MRRETURN(MATCH_NOMATCH);
2062            break;
2063    
2064            case PT_PC:
2065            if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2066              MRRETURN(MATCH_NOMATCH);
2067            break;
2068    
2069            case PT_SC:
2070            if ((ecode[2] != prop->script) == (op == OP_PROP))
2071              MRRETURN(MATCH_NOMATCH);
2072            break;
2073    
2074            default:
2075            RRETURN(PCRE_ERROR_INTERNAL);
2076          }          }
2077    
2078          ecode += 3;
2079        }        }
2080      break;      break;
2081    
# Line 1376  for (;;) Line 2083  for (;;)
2083      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2084    
2085      case OP_EXTUNI:      case OP_EXTUNI:
2086      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2087          {
2088          SCHECK_PARTIAL();
2089          MRRETURN(MATCH_NOMATCH);
2090          }
2091      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2092        {        {
2093        int chartype;        int category = UCD_CATEGORY(c);
2094        int othercase;        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       int category = ucp_findchar(c, &chartype, &othercase);  
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2095        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2096          {          {
2097          int len = 1;          int len = 1;
# Line 1390  for (;;) Line 2099  for (;;)
2099            {            {
2100            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2101            }            }
2102          category = ucp_findchar(c, &chartype, &othercase);          category = UCD_CATEGORY(c);
2103          if (category != ucp_M) break;          if (category != ucp_M) break;
2104          eptr += len;          eptr += len;
2105          }          }
# Line 1411  for (;;) Line 2120  for (;;)
2120      case OP_REF:      case OP_REF:
2121        {        {
2122        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2123        ecode += 3;                                 /* Advance past item */        ecode += 3;
2124    
2125          /* If the reference is unset, there are two possibilities:
2126    
2127          (a) In the default, Perl-compatible state, set the length to be longer
2128          than the amount of subject left; this ensures that every attempt at a
2129          match fails. We can't just fail here, because of the possibility of
2130          quantifiers with zero minima.
2131    
2132        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
2133        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
2134        can't just fail here, because of the possibility of quantifiers with zero  
2135        minima. */        Otherwise, set the length to the length of what was matched by the
2136          referenced subpattern. */
2137        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
2138          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
2139          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2140          else
2141            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2142    
2143        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2144    
# Line 1449  for (;;) Line 2167  for (;;)
2167          break;          break;
2168    
2169          default:               /* No repeat follows */          default:               /* No repeat follows */
2170          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2171              {
2172              CHECK_PARTIAL();
2173              MRRETURN(MATCH_NOMATCH);
2174              }
2175          eptr += length;          eptr += length;
2176          continue;              /* With the main loop */          continue;              /* With the main loop */
2177          }          }
# Line 1465  for (;;) Line 2187  for (;;)
2187    
2188        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2189          {          {
2190          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2191              {
2192              CHECK_PARTIAL();
2193              MRRETURN(MATCH_NOMATCH);
2194              }
2195          eptr += length;          eptr += length;
2196          }          }
2197    
# Line 1480  for (;;) Line 2206  for (;;)
2206          {          {
2207          for (fi = min;; fi++)          for (fi = min;; fi++)
2208            {            {
2209            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2210            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2211            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2212              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2213                {
2214                CHECK_PARTIAL();
2215                MRRETURN(MATCH_NOMATCH);
2216                }
2217            eptr += length;            eptr += length;
2218            }            }
2219          /* Control never gets here */          /* Control never gets here */
# Line 1496  for (;;) Line 2226  for (;;)
2226          pp = eptr;          pp = eptr;
2227          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2228            {            {
2229            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2230                {
2231                CHECK_PARTIAL();
2232                break;
2233                }
2234            eptr += length;            eptr += length;
2235            }            }
2236          while (eptr >= pp)          while (eptr >= pp)
2237            {            {
2238            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2239            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2240            eptr -= length;            eptr -= length;
2241            }            }
2242          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2243          }          }
2244        }        }
2245      /* Control never gets here */      /* Control never gets here */
2246    
   
   
2247      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2248      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2249      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1566  for (;;) Line 2298  for (;;)
2298          {          {
2299          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2300            {            {
2301            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2302                {
2303                SCHECK_PARTIAL();
2304                MRRETURN(MATCH_NOMATCH);
2305                }
2306            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2307            if (c > 255)            if (c > 255)
2308              {              {
2309              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2310              }              }
2311            else            else
2312              {              {
2313              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2314              }              }
2315            }            }
2316          }          }
# Line 1584  for (;;) Line 2320  for (;;)
2320          {          {
2321          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2322            {            {
2323            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2324                {
2325                SCHECK_PARTIAL();
2326                MRRETURN(MATCH_NOMATCH);
2327                }
2328            c = *eptr++;            c = *eptr++;
2329            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2330            }            }
2331          }          }
2332    
# Line 1606  for (;;) Line 2346  for (;;)
2346            {            {
2347            for (fi = min;; fi++)            for (fi = min;; fi++)
2348              {              {
2349              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2350              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2351              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2352                if (eptr >= md->end_subject)
2353                  {
2354                  SCHECK_PARTIAL();
2355                  MRRETURN(MATCH_NOMATCH);
2356                  }
2357              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2358              if (c > 255)              if (c > 255)
2359                {                {
2360                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2361                }                }
2362              else              else
2363                {                {
2364                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2365                }                }
2366              }              }
2367            }            }
# Line 1626  for (;;) Line 2371  for (;;)
2371            {            {
2372            for (fi = min;; fi++)            for (fi = min;; fi++)
2373              {              {
2374              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2375              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2376              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2377                if (eptr >= md->end_subject)
2378                  {
2379                  SCHECK_PARTIAL();
2380                  MRRETURN(MATCH_NOMATCH);
2381                  }
2382              c = *eptr++;              c = *eptr++;
2383              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2384              }              }
2385            }            }
2386          /* Control never gets here */          /* Control never gets here */
# Line 1649  for (;;) Line 2399  for (;;)
2399            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2400              {              {
2401              int len = 1;              int len = 1;
2402              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2403                  {
2404                  SCHECK_PARTIAL();
2405                  break;
2406                  }
2407              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2408              if (c > 255)              if (c > 255)
2409                {                {
# Line 1663  for (;;) Line 2417  for (;;)
2417              }              }
2418            for (;;)            for (;;)
2419              {              {
2420              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2421              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2422              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2423              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1675  for (;;) Line 2429  for (;;)
2429            {            {
2430            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2431              {              {
2432              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2433                  {
2434                  SCHECK_PARTIAL();
2435                  break;
2436                  }
2437              c = *eptr;              c = *eptr;
2438              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2439              eptr++;              eptr++;
2440              }              }
2441            while (eptr >= pp)            while (eptr >= pp)
2442              {              {
2443              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
2444              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2445                eptr--;
2446              }              }
2447            }            }
2448    
2449          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2450          }          }
2451        }        }
2452      /* Control never gets here */      /* Control never gets here */
2453    
2454    
2455      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2456      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2457        mode, because Unicode properties are supported in non-UTF-8 mode. */
2458    
2459  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2460      case OP_XCLASS:      case OP_XCLASS:
# Line 1736  for (;;) Line 2495  for (;;)
2495    
2496        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2497          {          {
2498          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2499          GETCHARINC(c, eptr);            {
2500          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2501              MRRETURN(MATCH_NOMATCH);
2502              }
2503            GETCHARINCTEST(c, eptr);
2504            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2505          }          }
2506    
2507        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1753  for (;;) Line 2516  for (;;)
2516          {          {
2517          for (fi = min;; fi++)          for (fi = min;; fi++)
2518            {            {
2519            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2520            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2523            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2524                SCHECK_PARTIAL();
2525                MRRETURN(MATCH_NOMATCH);
2526                }
2527              GETCHARINCTEST(c, eptr);
2528              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2529            }            }
2530          /* Control never gets here */          /* Control never gets here */
2531          }          }
# Line 1770  for (;;) Line 2538  for (;;)
2538          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2539            {            {
2540            int len = 1;            int len = 1;
2541            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2542            GETCHARLEN(c, eptr, len);              {
2543                SCHECK_PARTIAL();
2544                break;
2545                }
2546              GETCHARLENTEST(c, eptr, len);
2547            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2548            eptr += len;            eptr += len;
2549            }            }
2550          for(;;)          for(;;)
2551            {            {
2552            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2553            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2554            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2555            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2556            }            }
2557          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2558          }          }
2559    
2560        /* Control never gets here */        /* Control never gets here */
# Line 1798  for (;;) Line 2570  for (;;)
2570        length = 1;        length = 1;
2571        ecode++;        ecode++;
2572        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2573        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2574        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2575            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2576            MRRETURN(MATCH_NOMATCH);
2577            }
2578          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2579        }        }
2580      else      else
2581  #endif  #endif
2582    
2583      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2584        {        {
2585        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2586        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2587            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2588            MRRETURN(MATCH_NOMATCH);
2589            }
2590          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2591        ecode += 2;        ecode += 2;
2592        }        }
2593      break;      break;
# Line 1822  for (;;) Line 2602  for (;;)
2602        ecode++;        ecode++;
2603        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2604    
2605        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2606            {
2607            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2608            MRRETURN(MATCH_NOMATCH);
2609            }
2610    
2611        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2612        can use the fast lookup table. */        can use the fast lookup table. */
2613    
2614        if (fc < 128)        if (fc < 128)
2615          {          {
2616          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2617          }          }
2618    
2619        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2620    
2621        else        else
2622          {          {
2623          int dc;          unsigned int dc;
2624          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2625          ecode += length;          ecode += length;
2626    
2627          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2628          case of the character, if there is one. The result of ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2629    
2630          if (fc != dc)          if (fc != dc)
2631            {            {
2632  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2633            int chartype;            if (dc != UCD_OTHERCASE(fc))
           int othercase;  
           if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2634  #endif  #endif
2635              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2636            }            }
2637          }          }
2638        }        }
# Line 1861  for (;;) Line 2641  for (;;)
2641    
2642      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2643        {        {
2644        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2645        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2646            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2647            MRRETURN(MATCH_NOMATCH);
2648            }
2649          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2650        ecode += 2;        ecode += 2;
2651        }        }
2652      break;      break;
2653    
2654      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2655    
2656      case OP_EXACT:      case OP_EXACT:
2657      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2658      ecode += 3;      ecode += 3;
2659      goto REPEATCHAR;      goto REPEATCHAR;
2660    
2661        case OP_POSUPTO:
2662        possessive = TRUE;
2663        /* Fall through */
2664    
2665      case OP_UPTO:      case OP_UPTO:
2666      case OP_MINUPTO:      case OP_MINUPTO:
2667      min = 0;      min = 0;
# Line 1882  for (;;) Line 2670  for (;;)
2670      ecode += 3;      ecode += 3;
2671      goto REPEATCHAR;      goto REPEATCHAR;
2672    
2673        case OP_POSSTAR:
2674        possessive = TRUE;
2675        min = 0;
2676        max = INT_MAX;
2677        ecode++;
2678        goto REPEATCHAR;
2679    
2680        case OP_POSPLUS:
2681        possessive = TRUE;
2682        min = 1;
2683        max = INT_MAX;
2684        ecode++;
2685        goto REPEATCHAR;
2686    
2687        case OP_POSQUERY:
2688        possessive = TRUE;
2689        min = 0;
2690        max = 1;
2691        ecode++;
2692        goto REPEATCHAR;
2693    
2694      case OP_STAR:      case OP_STAR:
2695      case OP_MINSTAR:      case OP_MINSTAR:
2696      case OP_PLUS:      case OP_PLUS:
# Line 1890  for (;;) Line 2699  for (;;)
2699      case OP_MINQUERY:      case OP_MINQUERY:
2700      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2701      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2702    
2703      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2704      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2705      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2706    
2707      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2708    
2709      REPEATCHAR:      REPEATCHAR:
2710  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1905  for (;;) Line 2713  for (;;)
2713        length = 1;        length = 1;
2714        charptr = ecode;        charptr = ecode;
2715        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2716        ecode += length;        ecode += length;
2717    
2718        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1913  for (;;) Line 2720  for (;;)
2720    
2721        if (length > 1)        if (length > 1)
2722          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2723  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2724          int othercase;          unsigned int othercase;
         int chartype;  
2725          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2726               ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase > 0)  
2727            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2728            else oclength = 0;
2729  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2730    
2731          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2732            {            {
2733            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2734            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2735            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2736              else if (oclength > 0 &&
2737                       eptr <= md->end_subject - oclength &&
2738                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2739    #endif  /* SUPPORT_UCP */
2740            else            else
2741              {              {
2742              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2743              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2744              }              }
2745            }            }
2746    
# Line 1943  for (;;) Line 2750  for (;;)
2750            {            {
2751            for (fi = min;; fi++)            for (fi = min;; fi++)
2752              {              {
2753              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2754              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2755              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2756              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2757              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2758              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2759                else if (oclength > 0 &&
2760                         eptr <= md->end_subject - oclength &&
2761                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2762    #endif  /* SUPPORT_UCP */
2763              else              else
2764                {                {
2765                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2766                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2767                }                }
2768              }              }
2769            /* Control never gets here */            /* Control never gets here */
2770            }            }
2771          else  
2772            else  /* Maximize */
2773            {            {
2774            pp = eptr;            pp = eptr;
2775            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2776              {              {
2777              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2778              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2779              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2780                else if (oclength > 0 &&
2781                         eptr <= md->end_subject - oclength &&
2782                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2783    #endif  /* SUPPORT_UCP */
2784              else              else
2785                {                {
2786                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2787                eptr += oclength;                break;
2788                }                }
2789              }              }
2790            while (eptr >= pp)  
2791             {            if (possessive) continue;
2792             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2793             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2794             eptr -= length;              {
2795             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2796            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2797                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2798    #ifdef SUPPORT_UCP
2799                eptr--;
2800                BACKCHAR(eptr);
2801    #else   /* without SUPPORT_UCP */
2802                eptr -= length;
2803    #endif  /* SUPPORT_UCP */
2804                }
2805            }            }
2806          /* Control never gets here */          /* Control never gets here */
2807          }          }
# Line 1990  for (;;) Line 2814  for (;;)
2814  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2815    
2816      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2817        {  
2818        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2819    
2820      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2821      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2011  for (;;) Line 2833  for (;;)
2833        {        {
2834        fc = md->lcc[fc];        fc = md->lcc[fc];
2835        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2836          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2837            if (eptr >= md->end_subject)
2838              {
2839              SCHECK_PARTIAL();
2840              MRRETURN(MATCH_NOMATCH);
2841              }
2842            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2843            }
2844        if (min == max) continue;        if (min == max) continue;
2845        if (minimize)        if (minimize)
2846          {          {
2847          for (fi = min;; fi++)          for (fi = min;; fi++)
2848            {            {
2849            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2850            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2851            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2852                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2853              RRETURN(MATCH_NOMATCH);              {
2854                SCHECK_PARTIAL();
2855                MRRETURN(MATCH_NOMATCH);
2856                }
2857              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2858            }            }
2859          /* Control never gets here */          /* Control never gets here */
2860          }          }
2861        else        else  /* Maximize */
2862          {          {
2863          pp = eptr;          pp = eptr;
2864          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2865            {            {
2866            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2867                {
2868                SCHECK_PARTIAL();
2869                break;
2870                }
2871              if (fc != md->lcc[*eptr]) break;
2872            eptr++;            eptr++;
2873            }            }
2874    
2875            if (possessive) continue;
2876    
2877          while (eptr >= pp)          while (eptr >= pp)
2878            {            {
2879            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2880            eptr--;            eptr--;
2881            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2882            }            }
2883          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2884          }          }
2885        /* Control never gets here */        /* Control never gets here */
2886        }        }
# Line 2048  for (;;) Line 2889  for (;;)
2889    
2890      else      else
2891        {        {
2892        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2893            {
2894            if (eptr >= md->end_subject)
2895              {
2896              SCHECK_PARTIAL();
2897              MRRETURN(MATCH_NOMATCH);
2898              }
2899            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2900            }
2901    
2902        if (min == max) continue;        if (min == max) continue;
2903    
2904        if (minimize)        if (minimize)
2905          {          {
2906          for (fi = min;; fi++)          for (fi = min;; fi++)
2907            {            {
2908            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2909            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2911              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2912                {
2913                SCHECK_PARTIAL();
2914                MRRETURN(MATCH_NOMATCH);
2915                }
2916              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2917            }            }
2918          /* Control never gets here */          /* Control never gets here */
2919          }          }
2920        else        else  /* Maximize */
2921          {          {
2922          pp = eptr;          pp = eptr;
2923          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2924            {            {
2925            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2926                {
2927                SCHECK_PARTIAL();
2928                break;
2929                }
2930              if (fc != *eptr) break;
2931            eptr++;            eptr++;
2932            }            }
2933            if (possessive) continue;
2934    
2935          while (eptr >= pp)          while (eptr >= pp)
2936            {            {
2937            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2938            eptr--;            eptr--;
2939            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2940            }            }
2941          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2942          }          }
2943        }        }
2944      /* Control never gets here */      /* Control never gets here */
# Line 2084  for (;;) Line 2947  for (;;)
2947      checking can be multibyte. */      checking can be multibyte. */
2948    
2949      case OP_NOT:      case OP_NOT:
2950      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2951          {
2952          SCHECK_PARTIAL();
2953          MRRETURN(MATCH_NOMATCH);
2954          }
2955      ecode++;      ecode++;
2956      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2957      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2093  for (;;) Line 2960  for (;;)
2960        if (c < 256)        if (c < 256)
2961  #endif  #endif
2962        c = md->lcc[c];        c = md->lcc[c];
2963        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
2964        }        }
2965      else      else
2966        {        {
2967        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
2968        }        }
2969      break;      break;
2970    
# Line 2121  for (;;) Line 2988  for (;;)
2988      ecode += 3;      ecode += 3;
2989      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2990    
2991        case OP_NOTPOSSTAR:
2992        possessive = TRUE;
2993        min = 0;
2994        max = INT_MAX;
2995        ecode++;
2996        goto REPEATNOTCHAR;
2997    
2998        case OP_NOTPOSPLUS:
2999        possessive = TRUE;
3000        min = 1;
3001        max = INT_MAX;
3002        ecode++;
3003        goto REPEATNOTCHAR;
3004    
3005        case OP_NOTPOSQUERY:
3006        possessive = TRUE;
3007        min = 0;
3008        max = 1;
3009        ecode++;
3010        goto REPEATNOTCHAR;
3011    
3012        case OP_NOTPOSUPTO:
3013        possessive = TRUE;
3014        min = 0;
3015        max = GET2(ecode, 1);
3016        ecode += 3;
3017        goto REPEATNOTCHAR;
3018    
3019      case OP_NOTSTAR:      case OP_NOTSTAR:
3020      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3021      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2133  for (;;) Line 3028  for (;;)
3028      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3029      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3030    
3031      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3032    
3033      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3034      fc = *ecode++;      fc = *ecode++;
3035    
3036      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2160  for (;;) Line 3052  for (;;)
3052        /* UTF-8 mode */        /* UTF-8 mode */
3053        if (utf8)        if (utf8)
3054          {          {
3055          register int d;          register unsigned int d;
3056          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3057            {            {
3058              if (eptr >= md->end_subject)
3059                {
3060                SCHECK_PARTIAL();
3061                MRRETURN(MATCH_NOMATCH);
3062                }
3063            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3064            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3065            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3066            }            }
3067          }          }
3068        else        else
# Line 2174  for (;;) Line 3071  for (;;)
3071        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3072          {          {
3073          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3074            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3075              if (eptr >= md->end_subject)
3076                {
3077                SCHECK_PARTIAL();
3078                MRRETURN(MATCH_NOMATCH);
3079                }
3080              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3081              }
3082          }          }
3083    
3084        if (min == max) continue;        if (min == max) continue;
# Line 2185  for (;;) Line 3089  for (;;)
3089          /* UTF-8 mode */          /* UTF-8 mode */
3090          if (utf8)          if (utf8)
3091            {            {
3092            register int d;            register unsigned int d;
3093            for (fi = min;; fi++)            for (fi = min;; fi++)
3094              {              {
3095              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3096              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3097                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3098                if (eptr >= md->end_subject)
3099                  {
3100                  SCHECK_PARTIAL();
3101                  MRRETURN(MATCH_NOMATCH);
3102                  }
3103              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3104              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3105              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3106              }              }
3107            }            }
3108          else          else
# Line 2202  for (;;) Line 3111  for (;;)
3111            {            {
3112            for (fi = min;; fi++)            for (fi = min;; fi++)
3113              {              {
3114              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3115              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3116              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3117                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3118                  {
3119                  SCHECK_PARTIAL();
3120                  MRRETURN(MATCH_NOMATCH);
3121                  }
3122                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3123              }              }
3124            }            }
3125          /* Control never gets here */          /* Control never gets here */
# Line 2221  for (;;) Line 3135  for (;;)
3135          /* UTF-8 mode */          /* UTF-8 mode */
3136          if (utf8)          if (utf8)
3137            {            {
3138            register int d;            register unsigned int d;
3139            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3140              {              {
3141              int len = 1;              int len = 1;
3142              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3143                  {
3144                  SCHECK_PARTIAL();
3145                  break;
3146                  }
3147              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3148              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3149              if (fc == d) break;              if (fc == d) break;
3150              eptr += len;              eptr += len;
3151              }              }
3152            for(;;)          if (possessive) continue;
3153            for(;;)
3154              {              {
3155              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3156              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3157              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3158              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2245  for (;;) Line 3164  for (;;)
3164            {            {
3165            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3166              {              {
3167              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3168                  {
3169                  SCHECK_PARTIAL();
3170                  break;
3171                  }
3172                if (fc == md->lcc[*eptr]) break;
3173              eptr++;              eptr++;
3174              }              }
3175              if (possessive) continue;
3176            while (eptr >= pp)            while (eptr >= pp)
3177              {              {
3178              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3179              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3180              eptr--;              eptr--;
3181              }              }
3182            }            }
3183    
3184          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3185          }          }
3186        /* Control never gets here */        /* Control never gets here */
3187        }        }
# Line 2269  for (;;) Line 3194  for (;;)
3194        /* UTF-8 mode */        /* UTF-8 mode */
3195        if (utf8)        if (utf8)
3196          {          {
3197          register int d;          register unsigned int d;
3198          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3199            {            {
3200              if (eptr >= md->end_subject)
3201                {
3202                SCHECK_PARTIAL();
3203                MRRETURN(MATCH_NOMATCH);
3204                }
3205            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3206            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3207            }            }
3208          }          }
3209        else        else
# Line 2281  for (;;) Line 3211  for (;;)
3211        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3212          {          {
3213          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3214            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3215              if (eptr >= md->end_subject)
3216                {
3217                SCHECK_PARTIAL();
3218                MRRETURN(MATCH_NOMATCH);
3219                }
3220              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3221              }
3222          }          }
3223    
3224        if (min == max) continue;        if (min == max) continue;
# Line 2292  for (;;) Line 3229  for (;;)
3229          /* UTF-8 mode */          /* UTF-8 mode */
3230          if (utf8)          if (utf8)
3231            {            {
3232            register int d;            register unsigned int d;
3233            for (fi = min;; fi++)            for (fi = min;; fi++)
3234              {              {
3235              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3236              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3237                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3238                if (eptr >= md->end_subject)
3239                  {
3240                  SCHECK_PARTIAL();
3241                  MRRETURN(MATCH_NOMATCH);
3242                  }
3243              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3244              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3245              }              }
3246            }            }
3247          else          else
# Line 2308  for (;;) Line 3250  for (;;)
3250            {            {
3251            for (fi = min;; fi++)            for (fi = min;; fi++)
3252              {              {
3253              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3254              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3255              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3256                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3257                  {
3258                  SCHECK_PARTIAL();
3259                  MRRETURN(MATCH_NOMATCH);
3260                  }
3261                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3262              }              }
3263            }            }
3264          /* Control never gets here */          /* Control never gets here */
# Line 2327  for (;;) Line 3274  for (;;)
3274          /* UTF-8 mode */          /* UTF-8 mode */
3275          if (utf8)          if (utf8)
3276            {            {
3277            register int d;            register unsigned int d;
3278            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3279              {              {
3280              int len = 1;              int len = 1;
3281              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3282                  {
3283                  SCHECK_PARTIAL();
3284                  break;
3285                  }
3286              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3287              if (fc == d) break;              if (fc == d) break;
3288              eptr += len;              eptr += len;
3289              }              }
3290              if (possessive) continue;
3291            for(;;)            for(;;)
3292              {              {
3293              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3294              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3295              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3296              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2350  for (;;) Line 3302  for (;;)
3302            {            {
3303            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3304              {              {
3305              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3306                  {
3307                  SCHECK_PARTIAL();
3308                  break;
3309                  }
3310                if (fc == *eptr) break;
3311              eptr++;              eptr++;
3312              }              }
3313              if (possessive) continue;
3314            while (eptr >= pp)            while (eptr >= pp)
3315              {              {
3316              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3317              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3318              eptr--;              eptr--;
3319              }              }
3320            }            }
3321    
3322          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3323          }          }
3324        }        }
3325      /* Control never gets here */      /* Control never gets here */
# Line 2384  for (;;) Line 3342  for (;;)
3342      ecode += 3;      ecode += 3;
3343      goto REPEATTYPE;      goto REPEATTYPE;
3344    
3345        case OP_TYPEPOSSTAR:
3346        possessive = TRUE;
3347        min = 0;
3348        max = INT_MAX;
3349        ecode++;
3350        goto REPEATTYPE;
3351    
3352        case OP_TYPEPOSPLUS:
3353        possessive = TRUE;
3354        min = 1;
3355        max = INT_MAX;
3356        ecode++;
3357        goto REPEATTYPE;
3358    
3359        case OP_TYPEPOSQUERY:
3360        possessive = TRUE;
3361        min = 0;
3362        max = 1;
3363        ecode++;
3364        goto REPEATTYPE;
3365    
3366        case OP_TYPEPOSUPTO:
3367        possessive = TRUE;
3368        min = 0;
3369        max = GET2(ecode, 1);
3370        ecode += 3;
3371        goto REPEATTYPE;
3372    
3373      case OP_TYPESTAR:      case OP_TYPESTAR:
3374      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3375      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 3394  for (;;)
3394        {        {
3395        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
3396        prop_type = *ecode++;        prop_type = *ecode++;
3397        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
3398        }        }
3399      else prop_type = -1;      else prop_type = -1;
3400  #endif  #endif
3401    
3402      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3403      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3404      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3405      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3406      and single-bytes. */      and single-bytes. */
3407    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3408      if (min > 0)      if (min > 0)
3409        {        {
3410  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3411        if (prop_type > 0)        if (prop_type >= 0)
3412          {          {
3413          for (i = 1; i <= min; i++)          switch(prop_type)
3414            {            {
3415            GETCHARINC(c, eptr);            case PT_ANY:
3416            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3417            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
3418              RRETURN(MATCH_NOMATCH);              {
3419                if (eptr >= md->end_subject)
3420                  {
3421                  SCHECK_PARTIAL();
3422                  MRRETURN(MATCH_NOMATCH);
3423                  }
3424                GETCHARINCTEST(c, eptr);
3425                }
3426              break;
3427    
3428              case PT_LAMP:
3429              for (i = 1; i <= min; i++)
3430                {
3431                if (eptr >= md->end_subject)
3432                  {
3433                  SCHECK_PARTIAL();
3434                  MRRETURN(MATCH_NOMATCH);
3435                  }
3436                GETCHARINCTEST(c, eptr);
3437                prop_chartype = UCD_CHARTYPE(c);
3438                if ((prop_chartype == ucp_Lu ||
3439                     prop_chartype == ucp_Ll ||
3440                     prop_chartype == ucp_Lt) == prop_fail_result)
3441                  MRRETURN(MATCH_NOMATCH);
3442                }
3443              break;
3444    
3445              case PT_GC:
3446              for (i = 1; i <= min; i++)
3447                {
3448                if (eptr >= md->end_subject)
3449                  {
3450                  SCHECK_PARTIAL();
3451                  MRRETURN(MATCH_NOMATCH);
3452                  }
3453                GETCHARINCTEST(c, eptr);
3454                prop_category = UCD_CATEGORY(c);
3455                if ((prop_category == prop_value) == prop_fail_result)
3456                  MRRETURN(MATCH_NOMATCH);
3457                }
3458              break;
3459    
3460              case PT_PC:
3461              for (i = 1; i <= min; i++)
3462                {
3463                if (eptr >= md->end_subject)
3464                  {
3465                  SCHECK_PARTIAL();
3466                  MRRETURN(MATCH_NOMATCH);
3467                  }
3468                GETCHARINCTEST(c, eptr);
3469                prop_chartype = UCD_CHARTYPE(c);
3470                if ((prop_chartype == prop_value) == prop_fail_result)
3471                  MRRETURN(MATCH_NOMATCH);
3472                }
3473              break;
3474    
3475              case PT_SC:
3476              for (i = 1; i <= min; i++)
3477                {
3478                if (eptr >= md->end_subject)
3479                  {
3480                  SCHECK_PARTIAL();
3481                  MRRETURN(MATCH_NOMATCH);
3482                  }
3483                GETCHARINCTEST(c, eptr);
3484                prop_script = UCD_SCRIPT(c);
3485                if ((prop_script == prop_value) == prop_fail_result)
3486                  MRRETURN(MATCH_NOMATCH);
3487                }
3488              break;
3489    
3490              default:
3491              RRETURN(PCRE_ERROR_INTERNAL);
3492            }            }
3493          }          }
3494    
# Line 2452  for (;;) Line 3499  for (;;)
3499          {          {
3500          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3501            {            {
3502              if (eptr >= md->end_subject)
3503                {
3504                SCHECK_PARTIAL();
3505                MRRETURN(MATCH_NOMATCH);
3506                }
3507            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3508            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
3509            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3510            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3511              {              {
3512              int len = 1;              int len = 1;
3513              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3514                {                else { GETCHARLEN(c, eptr, len); }
3515                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);  
3516              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3517              eptr += len;              eptr += len;
3518              }              }
# Line 2480  for (;;) Line 3530  for (;;)
3530          case OP_ANY:          case OP_ANY:
3531          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3532            {            {
3533            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3534               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))              {
3535              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3536                MRRETURN(MATCH_NOMATCH);
3537                }
3538              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3539              eptr++;
3540              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3541              }
3542            break;
3543    
3544            case OP_ALLANY:
3545            for (i = 1; i <= min; i++)
3546              {
3547              if (eptr >= md->end_subject)
3548                {
3549                SCHECK_PARTIAL();
3550