/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 501 by ph10, Sun Mar 7 11:49:54 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 89  static const char rep_max[] = { 0, 0, 0,
89    
90    
91    
92  #ifdef DEBUG  #ifdef PCRE_DEBUG
93  /*************************************************  /*************************************************
94  *        Debugging function to print chars       *  *        Debugging function to print chars       *
95  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 109  Returns:     nothing
109  static void  static void
110  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111  {  {
112  int c;  unsigned int c;
113  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114  while (length-- > 0)  while (length-- > 0)
115    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 136  Returns:      TRUE if matched
136  */  */
137    
138  static BOOL  static BOOL
139  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
140    unsigned long int ims)    unsigned long int ims)
141  {  {
142  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
143    
144  #ifdef DEBUG  #ifdef PCRE_DEBUG
145  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
146    printf("matching subject <null>");    printf("matching subject <null>");
147  else  else
# Line 150  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 169  return TRUE; Line 203  return TRUE;
203  ****************************************************************************  ****************************************************************************
204                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
205    
206  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
207  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
208  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
209  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
210  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
211    fine.
212  It turns out that on non-Unix systems there are problems with programs that  
213  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
214  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
215  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
216    been known for decades.) So....
217    
218  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
220  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
221  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
222  always used to.  always used to.
223    
224    The original heap-recursive code used longjmp(). However, it seems that this
225    can be very slow on some operating systems. Following a suggestion from Stan
226    Switzer, the use of longjmp() has been abolished, at the cost of having to
227    provide a unique number for each call to RMATCH. There is no way of generating
228    a sequence of numbers at compile time in C. I have given them names, to make
229    them stand out more clearly.
230    
231    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233    tests. Furthermore, not using longjmp() means that local dynamic variables
234    don't have indeterminate values; this has meant that the frame size can be
235    reduced because the result can be "passed back" by straight setting of the
236    variable instead of being passed in the frame.
237  ****************************************************************************  ****************************************************************************
238  ***************************************************************************/  ***************************************************************************/
239    
240    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241    below must be updated in sync.  */
242    
243  /* These versions of the macros use the stack, as normal */  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248           RM51,  RM52, RM53, RM54 };
249    
250    /* These versions of the macros use the stack, as normal. There are debugging
251    versions and production versions. Note that the "rw" argument of RMATCH isn't
252    actually used in this definition. */
253    
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
257    #ifdef PCRE_DEBUG
258    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259      { \
260      printf("match() called in line %d\n", __LINE__); \
261      rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
262      printf("to line %d\n", __LINE__); \
263      }
264    #define RRETURN(ra) \
265      { \
266      printf("match() returned %d from line %d ", ra, __LINE__); \
267      return ra; \
268      }
269    #else
270    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271      rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
272  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
273    #endif
274    
275  #else  #else
276    
277    
278  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
279  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280  match(), which never changes. */  argument of match(), which never changes. */
281    
282  #define REGISTER  #define REGISTER
283    
284  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285    {\    {\
286    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
288      {\    newframe->Xeptr = ra;\
289      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
290      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
291      newframe->Xoffset_top = rc;\    newframe->Xmarkptr = markptr;\
292      newframe->Xims = re;\    newframe->Xoffset_top = rc;\
293      newframe->Xeptrb = rf;\    newframe->Xims = re;\
294      newframe->Xflags = rg;\    newframe->Xeptrb = rf;\
295      newframe->Xprevframe = frame;\    newframe->Xflags = rg;\
296      frame = newframe;\    newframe->Xrdepth = frame->Xrdepth + 1;\
297      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xprevframe = frame;\
298      goto HEAP_RECURSE;\    frame = newframe;\
299      }\    DPRINTF(("restarting from line %d\n", __LINE__));\
300    else\    goto HEAP_RECURSE;\
301      {\    L_##rw:\
302      DPRINTF(("longjumped back to line %d\n", __LINE__));\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
303    }    }
304    
305  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 235  match(), which never changes. */ Line 309  match(), which never changes. */
309    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
310    if (frame != NULL)\    if (frame != NULL)\
311      {\      {\
312      frame->Xresult = ra;\      rrc = ra;\
313      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
314      }\      }\
315    return ra;\    return ra;\
316    }    }
# Line 250  typedef struct heapframe { Line 323  typedef struct heapframe {
323    
324    /* Function arguments that may change */    /* Function arguments that may change */
325    
326    const uschar *Xeptr;    USPTR Xeptr;
327    const uschar *Xecode;    const uschar *Xecode;
328      USPTR Xmstart;
329      USPTR Xmarkptr;
330    int Xoffset_top;    int Xoffset_top;
331    long int Xims;    long int Xims;
332    eptrblock *Xeptrb;    eptrblock *Xeptrb;
333    int Xflags;    int Xflags;
334      unsigned int Xrdepth;
335    
336    /* Function local variables */    /* Function local variables */
337    
338    const uschar *Xcallpat;    USPTR Xcallpat;
339    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
340    const uschar *Xdata;    USPTR Xcharptr;
341    const uschar *Xnext;  #endif
342    const uschar *Xpp;    USPTR Xdata;
343    const uschar *Xprev;    USPTR Xnext;
344    const uschar *Xsaved_eptr;    USPTR Xpp;
345      USPTR Xprev;
346      USPTR Xsaved_eptr;
347    
348    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
349    
350    BOOL Xcur_is_word;    BOOL Xcur_is_word;
351    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
352    BOOL Xprev_is_word;    BOOL Xprev_is_word;
353    
354    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
355    
356  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
357    int Xprop_type;    int Xprop_type;
358      int Xprop_value;
359    int Xprop_fail_result;    int Xprop_fail_result;
360    int Xprop_category;    int Xprop_category;
361    int Xprop_chartype;    int Xprop_chartype;
362    int Xprop_othercase;    int Xprop_script;
363    int Xprop_test_against;    int Xoclength;
364    int *Xprop_test_variable;    uschar Xocchars[8];
365  #endif  #endif
366    
367      int Xcodelink;
368    int Xctype;    int Xctype;
369    int Xfc;    unsigned int Xfc;
370    int Xfi;    int Xfi;
371    int Xlength;    int Xlength;
372    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 380  typedef struct heapframe {
380    
381    eptrblock Xnewptrb;    eptrblock Xnewptrb;
382    
383    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
384    
385    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
386    
387  } heapframe;  } heapframe;
388    
# Line 320  typedef struct heapframe { Line 398  typedef struct heapframe {
398  *         Match from current position            *  *         Match from current position            *
399  *************************************************/  *************************************************/
400    
401  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
402  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
403  same response.  same response. */
404    
405  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
406  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
407    at the end of the subject and also past the start of the subject (i.e.
408    something has been matched). For hard partial matching, we then return
409    immediately. The second one is used when we already know we are past the end of
410    the subject. */
411    
412    #define CHECK_PARTIAL()\
413      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
414        {\
415        md->hitend = TRUE;\
416        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
417        }
418    
419    #define SCHECK_PARTIAL()\
420      if (md->partial != 0 && eptr > mstart)\
421        {\
422        md->hitend = TRUE;\
423        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
424        }
425    
426    
427    /* Performance note: It might be tempting to extract commonly used fields from
428    the md structure (e.g. utf8, end_subject) into individual variables to improve
429  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
430  made performance worse.  made performance worse.
431    
432  Arguments:  Arguments:
433     eptr        pointer in subject     eptr        pointer to current character in subject
434     ecode       position in code     ecode       pointer to current position in compiled code
435       mstart      pointer to the current match start position (can be modified
436                     by encountering \K)
437       markptr     pointer to the most recent MARK name, or NULL
438     offset_top  current top pointer     offset_top  current top pointer
439     md          pointer to "static" info for the match     md          pointer to "static" info for the match
440     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 442  Arguments:
442                   brackets - for testing for empty matches                   brackets - for testing for empty matches
443     flags       can contain     flags       can contain
444                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
445                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
446                       group that can match an empty string
447       rdepth      the recursion depth
448    
449  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
450                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
451                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
452                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
453  */  */
454    
455  static int  static int
456  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR
457    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    markptr, int offset_top, match_data *md, unsigned long int ims,
458    int flags)    eptrblock *eptrb, int flags, unsigned int rdepth)
459  {  {
460  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
461  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
462  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
463    
464  register int  rrc;    /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
465  register int  i;      /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
466  register int  c;      /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
467  register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
468    
469    BOOL minimize, possessive; /* Quantifier options */
470    int condcode;
471    
472  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
473  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 377  frame->Xprevframe = NULL;            /* Line 482  frame->Xprevframe = NULL;            /*
482    
483  frame->Xeptr = eptr;  frame->Xeptr = eptr;
484  frame->Xecode = ecode;  frame->Xecode = ecode;
485    frame->Xmstart = mstart;
486    frame->Xmarkptr = markptr;
487  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
488  frame->Xims = ims;  frame->Xims = ims;
489  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
490  frame->Xflags = flags;  frame->Xflags = flags;
491    frame->Xrdepth = rdepth;
492    
493  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
494    
# Line 390  HEAP_RECURSE: Line 498  HEAP_RECURSE:
498    
499  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
500  #define ecode              frame->Xecode  #define ecode              frame->Xecode
501    #define mstart             frame->Xmstart
502    #define markptr            frame->Xmarkptr
503  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
504  #define ims                frame->Xims  #define ims                frame->Xims
505  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
506  #define flags              frame->Xflags  #define flags              frame->Xflags
507    #define rdepth             frame->Xrdepth
508    
509  /* Ditto for the local variables */  /* Ditto for the local variables */
510    
# Line 401  HEAP_RECURSE: Line 512  HEAP_RECURSE:
512  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
513  #endif  #endif
514  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
515    #define codelink           frame->Xcodelink
516  #define data               frame->Xdata  #define data               frame->Xdata
517  #define next               frame->Xnext  #define next               frame->Xnext
518  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 411  HEAP_RECURSE: Line 523  HEAP_RECURSE:
523    
524  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
525  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
526  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
527    
528  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
529    
530  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
531  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
532    #define prop_value         frame->Xprop_value
533  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
534  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
535  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
536  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
537  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
538  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
539  #endif  #endif
540    
541  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 559  HEAP_RECURSE:
559  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
560  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
561    
562  #else  #else         /* NO_RECURSE not defined */
563  #define fi i  #define fi i
564  #define fc c  #define fc c
565    
566    
567  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
568  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
569  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
570  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
571  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
572  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
573  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
574  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
575  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
576                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
577  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
578                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
579  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
580  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
581  BOOL prev_is_word;  BOOL prev_is_word;
582    
583  unsigned long int original_ims;  unsigned long int original_ims;
584    
585  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
586  int prop_type;  int prop_type;
587    int prop_value;
588  int prop_fail_result;  int prop_fail_result;
589  int prop_category;  int prop_category;
590  int prop_chartype;  int prop_chartype;
591  int prop_othercase;  int prop_script;
592  int prop_test_against;  int oclength;
593  int *prop_test_variable;  uschar occhars[8];
594  #endif  #endif
595    
596    int codelink;
597  int ctype;  int ctype;
598  int length;  int length;
599  int max;  int max;
# Line 493  int save_offset1, save_offset2, save_off Line 606  int save_offset1, save_offset2, save_off
606  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
607    
608  eptrblock newptrb;  eptrblock newptrb;
609  #endif  #endif     /* NO_RECURSE */
610    
611  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
612  variables. */  variables. */
613    
614  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
615    prop_value = 0;
616  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
617  #endif  #endif
618    
619  /* OK, now we can get on with the real code of the function. Recursion is  
620  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
621  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
622  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
623  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
624  performance when true recursion is being used. */  
625    TAIL_RECURSE:
626    
627    /* OK, now we can get on with the real code of the function. Recursive calls
628    are specified by the macro RMATCH and RRETURN is used to return. When
629    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
630    and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
631    defined). However, RMATCH isn't like a function call because it's quite a
632    complicated macro. It has to be used in one particular way. This shouldn't,
633    however, impact performance when true recursion is being used. */
634    
635    #ifdef SUPPORT_UTF8
636    utf8 = md->utf8;       /* Local copy of the flag */
637    #else
638    utf8 = FALSE;
639    #endif
640    
641    /* First check that we haven't called match() too many times, or that we
642    haven't exceeded the recursive call limit. */
643    
644  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
645    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
646    
647  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
648    
649  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
650  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
651  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
652  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
653    When match() is called in other circumstances, don't add to the chain. The
654    match_cbegroup flag must NOT be used with tail recursion, because the memory
655    block that is used is on the stack, so a new one may be required for each
656    match(). */
657    
658  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
659    {    {
   newptrb.epb_prev = eptrb;  
660    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
661      newptrb.epb_prev = eptrb;
662    eptrb = &newptrb;    eptrb = &newptrb;
663    }    }
664    
665  /* Now start processing the operations. */  /* Now start processing the opcodes. */
666    
667  for (;;)  for (;;)
668    {    {
669      minimize = possessive = FALSE;
670    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
671    
672    if (md->partial &&    switch(op)
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
673      {      {
674      number = op - OP_BRA;      case OP_FAIL:
675        RRETURN(MATCH_NOMATCH);
     /* For extended extraction brackets (large number), we have to fish out the  
     number from a dummy opcode at the start. */  
676    
677      if (number > EXTRACT_BASIC_MAX)      case OP_PRUNE:
678        number = GET2(ecode, 2+LINK_SIZE);      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
679          ims, eptrb, flags, RM51);
680        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
681        RRETURN(MATCH_PRUNE);
682    
683        case OP_COMMIT:
684        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
685          ims, eptrb, flags, RM52);
686        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
687        RRETURN(MATCH_COMMIT);
688    
689        case OP_SKIP:
690        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
691          ims, eptrb, flags, RM53);
692        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
693        md->start_match_ptr = eptr;   /* Pass back current position */
694        RRETURN(MATCH_SKIP);
695    
696        case OP_THEN:
697        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
698          ims, eptrb, flags, RM54);
699        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
700        RRETURN(MATCH_THEN);
701    
702        /* Handle a capturing bracket. If there is space in the offset vector, save
703        the current subject position in the working slot at the top of the vector.
704        We mustn't change the current values of the data slot, because they may be
705        set from a previous iteration of this group, and be referred to by a
706        reference inside the group.
707    
708        If the bracket fails to match, we need to restore this value and also the
709        values of the final offsets, in case they were set by a previous iteration
710        of the same bracket.
711    
712        If there isn't enough space in the offset vector, treat this as if it were
713        a non-capturing bracket. Don't worry about setting the flag for the error
714        case here; that is handled in the code for KET. */
715    
716        case OP_CBRA:
717        case OP_SCBRA:
718        number = GET2(ecode, 1+LINK_SIZE);
719      offset = number << 1;      offset = number << 1;
720    
721  #ifdef DEBUG  #ifdef PCRE_DEBUG
722      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
723        printf("subject=");
724      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
725      printf("\n");      printf("\n");
726  #endif  #endif
# Line 584  for (;;) Line 735  for (;;)
735        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
736        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
737    
738          flags = (op == OP_SCBRA)? match_cbegroup : 0;
739        do        do
740          {          {
741          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
742            match_isgroup);            ims, eptrb, flags, RM1);
743          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
744          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
745          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
746          }          }
# Line 603  for (;;) Line 755  for (;;)
755        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
756        }        }
757    
758      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
759        as a non-capturing bracket. */
760    
761      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
762      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
763    
764    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
765    
766    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
767      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
768      case OP_BRA:     /* Non-capturing bracket: optimized */  
769      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
770      do      final alternative within the brackets, we would return the result of a
771        recursive call to match() whatever happened. We can reduce stack usage by
772        turning this into a tail recursion, except in the case when match_cbegroup
773        is set.*/
774    
775        case OP_BRA:
776        case OP_SBRA:
777        DPRINTF(("start non-capturing bracket\n"));
778        flags = (op >= OP_SBRA)? match_cbegroup : 0;
779        for (;;)
780        {        {
781        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
782          match_isgroup);          {
783        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
784              {
785              ecode += _pcre_OP_lengths[*ecode];
786              DPRINTF(("bracket 0 tail recursion\n"));
787              goto TAIL_RECURSE;
788              }
789    
790            /* Possibly empty group; can't use tail recursion. */
791    
792            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
793              eptrb, flags, RM48);
794            RRETURN(rrc);
795            }
796    
797          /* For non-final alternatives, continue the loop for a NOMATCH result;
798          otherwise return. */
799    
800          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
801            eptrb, flags, RM2);
802          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
804        }        }
805      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
806    
807      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
808      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
809      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
810      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
811        obeyed, we can use tail recursion to avoid using another stack frame. */
812    
813      case OP_COND:      case OP_COND:
814      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
815        codelink= GET(ecode, 1);
816    
817        /* Because of the way auto-callout works during compile, a callout item is
818        inserted between OP_COND and an assertion condition. */
819    
820        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
821          {
822          if (pcre_callout != NULL)
823            {
824            pcre_callout_block cb;
825            cb.version          = 1;   /* Version 1 of the callout block */
826            cb.callout_number   = ecode[LINK_SIZE+2];
827            cb.offset_vector    = md->offset_vector;
828            cb.subject          = (PCRE_SPTR)md->start_subject;
829            cb.subject_length   = md->end_subject - md->start_subject;
830            cb.start_match      = mstart - md->start_subject;
831            cb.current_position = eptr - md->start_subject;
832            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
833            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
834            cb.capture_top      = offset_top/2;
835            cb.capture_last     = md->capture_last;
836            cb.callout_data     = md->callout_data;
837            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
838            if (rrc < 0) RRETURN(rrc);
839            }
840          ecode += _pcre_OP_lengths[OP_CALLOUT];
841          }
842    
843        condcode = ecode[LINK_SIZE+1];
844    
845        /* Now see what the actual condition is */
846    
847        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
848          {
849          if (md->recursive == NULL)                /* Not recursing => FALSE */
850            {
851            condition = FALSE;
852            ecode += GET(ecode, 1);
853            }
854          else
855            {
856            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
857            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
858    
859            /* If the test is for recursion into a specific subpattern, and it is
860            false, but the test was set up by name, scan the table to see if the
861            name refers to any other numbers, and test them. The condition is true
862            if any one is set. */
863    
864            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
865              {
866              uschar *slotA = md->name_table;
867              for (i = 0; i < md->name_count; i++)
868                {
869                if (GET2(slotA, 0) == recno) break;
870                slotA += md->name_entry_size;
871                }
872    
873              /* Found a name for the number - there can be only one; duplicate
874              names for different numbers are allowed, but not vice versa. First
875              scan down for duplicates. */
876    
877              if (i < md->name_count)
878                {
879                uschar *slotB = slotA;
880                while (slotB > md->name_table)
881                  {
882                  slotB -= md->name_entry_size;
883                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
884                    {
885                    condition = GET2(slotB, 0) == md->recursive->group_num;
886                    if (condition) break;
887                    }
888                  else break;
889                  }
890    
891                /* Scan up for duplicates */
892    
893                if (!condition)
894                  {
895                  slotB = slotA;
896                  for (i++; i < md->name_count; i++)
897                    {
898                    slotB += md->name_entry_size;
899                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
900                      {
901                      condition = GET2(slotB, 0) == md->recursive->group_num;
902                      if (condition) break;
903                      }
904                    else break;
905                    }
906                  }
907                }
908              }
909    
910            /* Chose branch according to the condition */
911    
912            ecode += condition? 3 : GET(ecode, 1);
913            }
914          }
915    
916        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
917        {        {
918        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
919        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
920          (md->recursive != NULL) :  
921          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
922        RMATCH(rrc, eptr, ecode + (condition?        scan the table to see if the name refers to any other numbers, and test
923          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        them. The condition is true if any one is set. This is tediously similar
924          offset_top, md, ims, eptrb, match_isgroup);        to the code above, but not close enough to try to amalgamate. */
925        RRETURN(rrc);  
926          if (!condition && condcode == OP_NCREF)
927            {
928            int refno = offset >> 1;
929            uschar *slotA = md->name_table;
930    
931            for (i = 0; i < md->name_count; i++)
932              {
933              if (GET2(slotA, 0) == refno) break;
934              slotA += md->name_entry_size;
935              }
936    
937            /* Found a name for the number - there can be only one; duplicate names
938            for different numbers are allowed, but not vice versa. First scan down
939            for duplicates. */
940    
941            if (i < md->name_count)
942              {
943              uschar *slotB = slotA;
944              while (slotB > md->name_table)
945                {
946                slotB -= md->name_entry_size;
947                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
948                  {
949                  offset = GET2(slotB, 0) << 1;
950                  condition = offset < offset_top &&
951                    md->offset_vector[offset] >= 0;
952                  if (condition) break;
953                  }
954                else break;
955                }
956    
957              /* Scan up for duplicates */
958    
959              if (!condition)
960                {
961                slotB = slotA;
962                for (i++; i < md->name_count; i++)
963                  {
964                  slotB += md->name_entry_size;
965                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
966                    {
967                    offset = GET2(slotB, 0) << 1;
968                    condition = offset < offset_top &&
969                      md->offset_vector[offset] >= 0;
970                    if (condition) break;
971                    }
972                  else break;
973                  }
974                }
975              }
976            }
977    
978          /* Chose branch according to the condition */
979    
980          ecode += condition? 3 : GET(ecode, 1);
981          }
982    
983        else if (condcode == OP_DEF)     /* DEFINE - always false */
984          {
985          condition = FALSE;
986          ecode += GET(ecode, 1);
987        }        }
988    
989      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
990      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
991        assertion. */
992    
993      else      else
994        {        {
995        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
996            match_condassert | match_isgroup);            match_condassert, RM3);
997        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
998          {          {
999          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1000            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1001          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1002          }          }
1003        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1004          {          {
1005          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1006          }          }
1007        else ecode += GET(ecode, 1);        else
1008        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
1009          match_isgroup);          condition = FALSE;
1010        RRETURN(rrc);          ecode += codelink;
1011            }
1012        }        }
     /* Control never reaches here */  
1013    
1014      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
1015      encountered. */      we can use tail recursion to avoid using another stack frame, except when
1016        match_cbegroup is required for an unlimited repeat of a possibly empty
1017        group. If the second alternative doesn't exist, we can just plough on. */
1018    
1019        if (condition || *ecode == OP_ALT)
1020          {
1021          ecode += 1 + LINK_SIZE;
1022          if (op == OP_SCOND)        /* Possibly empty group */
1023            {
1024            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1025            RRETURN(rrc);
1026            }
1027          else                       /* Group must match something */
1028            {
1029            flags = 0;
1030            goto TAIL_RECURSE;
1031            }
1032          }
1033        else                         /* Condition false & no alternative */
1034          {
1035          ecode += 1 + LINK_SIZE;
1036          }
1037        break;
1038    
1039    
1040        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1041        to close any currently open capturing brackets. */
1042    
1043        case OP_CLOSE:
1044        number = GET2(ecode, 1);
1045        offset = number << 1;
1046    
1047    #ifdef PCRE_DEBUG
1048          printf("end bracket %d at *ACCEPT", number);
1049          printf("\n");
1050    #endif
1051    
1052      case OP_CREF:      md->capture_last = number;
1053      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1054          {
1055          md->offset_vector[offset] =
1056            md->offset_vector[md->offset_end - number];
1057          md->offset_vector[offset+1] = eptr - md->start_subject;
1058          if (offset_top <= offset) offset_top = offset + 2;
1059          }
1060      ecode += 3;      ecode += 3;
1061      break;      break;
1062    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1063    
1064        /* End of the pattern, either real or forced. If we are in a top-level
1065        recursion, we should restore the offsets appropriately and continue from
1066        after the call. */
1067    
1068        case OP_ACCEPT:
1069      case OP_END:      case OP_END:
1070      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1071        {        {
1072        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
1073        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
1074        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1075        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1076          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1077        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1078        ims = original_ims;        ims = original_ims;
1079        ecode = rec->after_call;        ecode = rec->after_call;
1080        break;        break;
1081        }        }
1082    
1083      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1084      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1085        the subject. In both cases, backtracking will then try other alternatives,
1086        if any. */
1087    
1088        if (eptr == mstart &&
1089            (md->notempty ||
1090              (md->notempty_atstart &&
1091                mstart == md->start_subject + md->start_offset)))
1092          RRETURN(MATCH_NOMATCH);
1093    
1094        /* Otherwise, we have a match. */
1095    
1096      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      md->end_match_ptr = eptr;           /* Record where we ended */
1097      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1098      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1099      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
1100    
1101      /* Change option settings */      /* Change option settings */
# Line 717  for (;;) Line 1116  for (;;)
1116      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1117      do      do
1118        {        {
1119        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1120          match_isgroup);          RM4);
1121        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1122        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1123            mstart = md->start_match_ptr;   /* In case \K reset it */
1124            break;
1125            }
1126          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1127        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1128        }        }
1129      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 738  for (;;) Line 1141  for (;;)
1141      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1142      continue;      continue;
1143    
1144      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1145        PRUNE, or COMMIT means we must assume failure without checking subsequent
1146        branches. */
1147    
1148      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1149      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1150      do      do
1151        {        {
1152        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1153          match_isgroup);          RM5);
1154        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1155        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1156            {
1157            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1158            break;
1159            }
1160          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1161        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1162        }        }
1163      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 766  for (;;) Line 1176  for (;;)
1176  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1177      if (utf8)      if (utf8)
1178        {        {
1179        c = GET(ecode,1);        i = GET(ecode, 1);
1180        for (i = 0; i < c; i++)        while (i-- > 0)
1181          {          {
1182          eptr--;          eptr--;
1183          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1184          BACKCHAR(eptr)          BACKCHAR(eptr);
1185          }          }
1186        }        }
1187      else      else
# Line 780  for (;;) Line 1190  for (;;)
1190      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1191    
1192        {        {
1193        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1194        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1195        }        }
1196    
1197      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1198    
1199        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1200      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1201      break;      break;
1202    
# Line 800  for (;;) Line 1211  for (;;)
1211        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
1212        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1213        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1214        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1215        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1216        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1217        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1218        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1219        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 837  for (;;) Line 1248  for (;;)
1248      case OP_RECURSE:      case OP_RECURSE:
1249        {        {
1250        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1251        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1252            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1253    
1254        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1255    
# Line 869  for (;;) Line 1275  for (;;)
1275    
1276        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1277              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1278        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1279    
1280        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1281        restore the offset and recursion data. */        restore the offset and recursion data. */
1282    
1283        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1284          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1285        do        do
1286          {          {
1287          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1288              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1289          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1290            {            {
1291              DPRINTF(("Recursion matched\n"));
1292            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1293            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1294              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1295            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1296            }            }
1297          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1298              {
1299              DPRINTF(("Recursion gave error %d\n", rrc));
1300              if (new_recursive.offset_save != stacksave)
1301                (pcre_free)(new_recursive.offset_save);
1302              RRETURN(rrc);
1303              }
1304    
1305          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1306          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 909  for (;;) Line 1322  for (;;)
1322      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1323      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1324      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1325      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1326        the start-of-match value in case it was changed by \K. */
1327    
1328      case OP_ONCE:      case OP_ONCE:
1329        {      prev = ecode;
1330        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1331    
1332        do      do
1333          {
1334          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1335          if (rrc == MATCH_MATCH)
1336          {          {
1337          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          mstart = md->start_match_ptr;
1338            eptrb, match_isgroup);          break;
         if (rrc == MATCH_MATCH) break;  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         ecode += GET(ecode,1);  
1339          }          }
1340        while (*ecode == OP_ALT);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1341          ecode += GET(ecode,1);
1342          }
1343        while (*ecode == OP_ALT);
1344    
1345        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1346    
1347        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1348    
1349        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1350        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1351    
1352        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1353    
1354        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1355        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1356    
1357        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1358        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1359        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1360        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1361        course of events. */      course of events. */
1362    
1363        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1364          {        {
1365          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1366          break;        break;
1367          }        }
1368    
1369        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1370        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1371        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1372        opcode. */      any options that changed within the bracket before re-running it, so
1373        check the next opcode. */
1374    
1375        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1376          {        {
1377          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1378          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1379          }        }
1380    
1381        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1382          {        {
1383          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1384          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1385          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1386          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1387          }        goto TAIL_RECURSE;
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1388        }        }
1389      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1390          {
1391          RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1392          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1393          ecode += 1 + LINK_SIZE;
1394          flags = 0;
1395          goto TAIL_RECURSE;
1396          }
1397        /* Control never gets here */
1398    
1399      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1400      bracketed group and go to there. */      bracketed group and go to there. */
# Line 985  for (;;) Line 1403  for (;;)
1403      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1404      break;      break;
1405    
1406      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1407      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1408      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1409      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1410      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1411    
1412      case OP_BRAZERO:      case OP_BRAZERO:
1413        {        {
1414        next = ecode+1;        next = ecode+1;
1415        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1416        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1417        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1418        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1419        }        }
1420      break;      break;
1421    
1422      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1423        {        {
1424        next = ecode+1;        next = ecode+1;
1425        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1426        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1427        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1428        ecode++;        ecode++;
1429        }        }
1430      break;      break;
1431    
1432      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1433      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1434      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1435      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1436          ecode = next + 1 + LINK_SIZE;
1437          }
1438        break;
1439    
1440        /* End of a group, repeated or non-repeating. */
1441    
1442      case OP_KET:      case OP_KET:
1443      case OP_KETRMIN:      case OP_KETRMIN:
1444      case OP_KETRMAX:      case OP_KETRMAX:
1445        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
   
       /* Back up the stack of bracket start pointers. */  
1446    
1447        eptrb = eptrb->epb_prev;      /* If this was a group that remembered the subject start, in order to break
1448        infinite repeats of empty string matches, retrieve the subject start from
1449        the chain. Otherwise, set it NULL. */
1450    
1451        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev >= OP_SBRA)
1452            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        {
1453            *prev == OP_ONCE)        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1454          {        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1455          md->end_match_ptr = eptr;      /* For ONCE */        }
1456          md->end_offset_top = offset_top;      else saved_eptr = NULL;
         RRETURN(MATCH_MATCH);  
         }  
1457    
1458        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group or an atomic group, stop
1459        group number back at the start and if necessary complete handling an      matching and return MATCH_MATCH, but record the current high water mark for
1460        extraction by setting the offsets and bumping the high water mark. */      use by positive assertions. We also need to record the match start in case
1461        it was changed by \K. */
1462    
1463        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1464          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1465          number = *prev - OP_BRA;          *prev == OP_ONCE)
1466          {
1467          md->end_match_ptr = eptr;      /* For ONCE */
1468          md->end_offset_top = offset_top;
1469          md->start_match_ptr = mstart;
1470          RRETURN(MATCH_MATCH);
1471          }
1472    
1473          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1474          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1475        bumping the high water mark. Note that whole-pattern recursion is coded as
1476        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1477        when the OP_END is reached. Other recursion is handled here. */
1478    
1479          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1480          offset = number << 1;        {
1481          number = GET2(prev, 1+LINK_SIZE);
1482          offset = number << 1;
1483    
1484  #ifdef DEBUG  #ifdef PCRE_DEBUG
1485          printf("end bracket %d", number);        printf("end bracket %d", number);
1486          printf("\n");        printf("\n");
1487  #endif  #endif
1488    
1489          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1490          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1491          into group 0, so it won't be picked up here. Instead, we catch it when          {
1492          the OP_END is reached. */          md->offset_vector[offset] =
1493              md->offset_vector[md->offset_end - number];
1494            md->offset_vector[offset+1] = eptr - md->start_subject;
1495            if (offset_top <= offset) offset_top = offset + 2;
1496            }
1497    
1498          /* Handle a recursively called group. Restore the offsets
1499          appropriately and continue from after the call. */
1500    
1501          if (md->recursive != NULL && md->recursive->group_num == number)
1502            {
1503            recursion_info *rec = md->recursive;
1504            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1505            md->recursive = rec->prevrec;
1506            memcpy(md->offset_vector, rec->offset_save,
1507              rec->saved_max * sizeof(int));
1508            offset_top = rec->save_offset_top;
1509            ecode = rec->after_call;
1510            ims = original_ims;
1511            break;
1512            }
1513          }
1514    
1515          if (number > 0)      /* For both capturing and non-capturing groups, reset the value of the ims
1516            {      flags, in case they got changed during the group. */
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
1517    
1518            /* Handle a recursively called group. Restore the offsets      ims = original_ims;
1519            appropriately and continue from after the call. */      DPRINTF(("ims reset to %02lx\n", ims));
1520    
1521            if (md->recursive != NULL && md->recursive->group_num == number)      /* For a non-repeating ket, just continue at this level. This also
1522              {      happens for a repeating ket if no characters were matched in the group.
1523              recursion_info *rec = md->recursive;      This is the forcible breaking of infinite loops as implemented in Perl
1524              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));      5.005. If there is an options reset, it will get obeyed in the normal
1525              md->recursive = rec->prevrec;      course of events. */
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
         }  
1526    
1527        /* Reset the value of the ims flags, in case they got changed during      if (*ecode == OP_KET || eptr == saved_eptr)
1528        the group. */        {
1529          ecode += 1 + LINK_SIZE;
1530          break;
1531          }
1532    
1533        ims = original_ims;      /* The repeating kets try the rest of the pattern or restart from the
1534        DPRINTF(("ims reset to %02lx\n", ims));      preceding bracket, in the appropriate order. In the second case, we can use
1535        tail recursion to avoid using another stack frame, unless we have an
1536        unlimited repeat of a group that can match an empty string. */
1537    
1538        /* For a non-repeating ket, just continue at this level. This also      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
1539    
1540        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KETRMIN)
1541          {
1542          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1543          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1544          if (flags != 0)    /* Could match an empty string */
1545          {          {
1546          ecode += 1 + LINK_SIZE;          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1547          break;          RRETURN(rrc);
         }  
   
       /* The repeating kets try the rest of the pattern or restart from the  
       preceding bracket, in the appropriate order. */  
   
       if (*ecode == OP_KETRMIN)  
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1548          }          }
1549          ecode = prev;
1550          goto TAIL_RECURSE;
1551        }        }
1552        else  /* OP_KETRMAX */
1553      RRETURN(MATCH_NOMATCH);        {
1554          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1555          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1556          ecode += 1 + LINK_SIZE;
1557          flags = 0;
1558          goto TAIL_RECURSE;
1559          }
1560        /* Control never gets here */
1561    
1562      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1563    
# Line 1135  for (;;) Line 1565  for (;;)
1565      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1566      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1567        {        {
1568        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1569              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1570          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1571        ecode++;        ecode++;
1572        break;        break;
# Line 1156  for (;;) Line 1587  for (;;)
1587      ecode++;      ecode++;
1588      break;      break;
1589    
1590        /* Reset the start of match point */
1591    
1592        case OP_SET_SOM:
1593        mstart = eptr;
1594        ecode++;
1595        break;
1596    
1597      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1598      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1599    
# Line 1163  for (;;) Line 1601  for (;;)
1601      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1602        {        {
1603        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1604          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1605        else        else
1606          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1607        ecode++;        ecode++;
# Line 1174  for (;;) Line 1612  for (;;)
1612        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1613        if (!md->endonly)        if (!md->endonly)
1614          {          {
1615          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1616             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1617            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1618          ecode++;          ecode++;
1619          break;          break;
1620          }          }
1621        }        }
1622      /* ... else fall through */      /* ... else fall through for endonly */
1623    
1624      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1625    
# Line 1193  for (;;) Line 1631  for (;;)
1631      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1632    
1633      case OP_EODN:      case OP_EODN:
1634      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1635         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1636          RRETURN(MATCH_NOMATCH);
1637      ecode++;      ecode++;
1638      break;      break;
1639    
# Line 1206  for (;;) Line 1645  for (;;)
1645    
1646        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1647        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1648        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1649          partial matching. */
1650    
1651  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1652        if (utf8)        if (utf8)
1653          {          {
1654          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1655            {            {
1656            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1657            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1658              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1659            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1660            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1661            }            }
1662          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1663              {
1664              SCHECK_PARTIAL();
1665              cur_is_word = FALSE;
1666              }
1667            else
1668            {            {
1669            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1670            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1227  for (;;) Line 1673  for (;;)
1673        else        else
1674  #endif  #endif
1675    
1676        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1677    
1678          {          {
1679          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1680            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1681          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1682            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1683              }
1684            if (eptr >= md->end_subject)
1685              {
1686              SCHECK_PARTIAL();
1687              cur_is_word = FALSE;
1688              }
1689            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1690          }          }
1691    
1692        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 1247  for (;;) Line 1700  for (;;)
1700      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1701    
1702      case OP_ANY:      case OP_ANY:
1703      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1704        /* Fall through */
1705    
1706        case OP_ALLANY:
1707        if (eptr++ >= md->end_subject)
1708          {
1709          SCHECK_PARTIAL();
1710        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1711      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);        }
1712  #ifdef SUPPORT_UTF8      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
 #endif  
1713      ecode++;      ecode++;
1714      break;      break;
1715    
# Line 1261  for (;;) Line 1717  for (;;)
1717      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1718    
1719      case OP_ANYBYTE:      case OP_ANYBYTE:
1720      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1721          {
1722          SCHECK_PARTIAL();
1723          RRETURN(MATCH_NOMATCH);
1724          }
1725      ecode++;      ecode++;
1726      break;      break;
1727    
1728      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1729      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1730          {
1731          SCHECK_PARTIAL();
1732          RRETURN(MATCH_NOMATCH);
1733          }
1734      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1735      if (      if (
1736  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1279  for (;;) Line 1743  for (;;)
1743      break;      break;
1744    
1745      case OP_DIGIT:      case OP_DIGIT:
1746      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1747          {
1748          SCHECK_PARTIAL();
1749          RRETURN(MATCH_NOMATCH);
1750          }
1751      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1752      if (      if (
1753  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1292  for (;;) Line 1760  for (;;)
1760      break;      break;
1761    
1762      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1763      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1764          {
1765          SCHECK_PARTIAL();
1766          RRETURN(MATCH_NOMATCH);
1767          }
1768      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1769      if (      if (
1770  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1305  for (;;) Line 1777  for (;;)
1777      break;      break;
1778    
1779      case OP_WHITESPACE:      case OP_WHITESPACE:
1780      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1781          {
1782          SCHECK_PARTIAL();
1783          RRETURN(MATCH_NOMATCH);
1784          }
1785      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1786      if (      if (
1787  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1318  for (;;) Line 1794  for (;;)
1794      break;      break;
1795    
1796      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1797      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1798          {
1799          SCHECK_PARTIAL();
1800          RRETURN(MATCH_NOMATCH);
1801          }
1802      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1803      if (      if (
1804  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1331  for (;;) Line 1811  for (;;)
1811      break;      break;
1812    
1813      case OP_WORDCHAR:      case OP_WORDCHAR:
1814      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1815          {
1816          SCHECK_PARTIAL();
1817          RRETURN(MATCH_NOMATCH);
1818          }
1819      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1820      if (      if (
1821  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1343  for (;;) Line 1827  for (;;)
1827      ecode++;      ecode++;
1828      break;      break;
1829    
1830        case OP_ANYNL:
1831        if (eptr >= md->end_subject)
1832          {
1833          SCHECK_PARTIAL();
1834          RRETURN(MATCH_NOMATCH);
1835          }
1836        GETCHARINCTEST(c, eptr);
1837        switch(c)
1838          {
1839          default: RRETURN(MATCH_NOMATCH);
1840          case 0x000d:
1841          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1842          break;
1843    
1844          case 0x000a:
1845          break;
1846    
1847          case 0x000b:
1848          case 0x000c:
1849          case 0x0085:
1850          case 0x2028:
1851          case 0x2029:
1852          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1853          break;
1854          }
1855        ecode++;
1856        break;
1857    
1858        case OP_NOT_HSPACE:
1859        if (eptr >= md->end_subject)
1860          {
1861          SCHECK_PARTIAL();
1862          RRETURN(MATCH_NOMATCH);
1863          }
1864        GETCHARINCTEST(c, eptr);
1865        switch(c)
1866          {
1867          default: break;
1868          case 0x09:      /* HT */
1869          case 0x20:      /* SPACE */
1870          case 0xa0:      /* NBSP */
1871          case 0x1680:    /* OGHAM SPACE MARK */
1872          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1873          case 0x2000:    /* EN QUAD */
1874          case 0x2001:    /* EM QUAD */
1875          case 0x2002:    /* EN SPACE */
1876          case 0x2003:    /* EM SPACE */
1877          case 0x2004:    /* THREE-PER-EM SPACE */
1878          case 0x2005:    /* FOUR-PER-EM SPACE */
1879          case 0x2006:    /* SIX-PER-EM SPACE */
1880          case 0x2007:    /* FIGURE SPACE */
1881          case 0x2008:    /* PUNCTUATION SPACE */
1882          case 0x2009:    /* THIN SPACE */
1883          case 0x200A:    /* HAIR SPACE */
1884          case 0x202f:    /* NARROW NO-BREAK SPACE */
1885          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1886          case 0x3000:    /* IDEOGRAPHIC SPACE */
1887          RRETURN(MATCH_NOMATCH);
1888          }
1889        ecode++;
1890        break;
1891    
1892        case OP_HSPACE:
1893        if (eptr >= md->end_subject)
1894          {
1895          SCHECK_PARTIAL();
1896          RRETURN(MATCH_NOMATCH);
1897          }
1898        GETCHARINCTEST(c, eptr);
1899        switch(c)
1900          {
1901          default: RRETURN(MATCH_NOMATCH);
1902          case 0x09:      /* HT */
1903          case 0x20:      /* SPACE */
1904          case 0xa0:      /* NBSP */
1905          case 0x1680:    /* OGHAM SPACE MARK */
1906          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1907          case 0x2000:    /* EN QUAD */
1908          case 0x2001:    /* EM QUAD */
1909          case 0x2002:    /* EN SPACE */
1910          case 0x2003:    /* EM SPACE */
1911          case 0x2004:    /* THREE-PER-EM SPACE */
1912          case 0x2005:    /* FOUR-PER-EM SPACE */
1913          case 0x2006:    /* SIX-PER-EM SPACE */
1914          case 0x2007:    /* FIGURE SPACE */
1915          case 0x2008:    /* PUNCTUATION SPACE */
1916          case 0x2009:    /* THIN SPACE */
1917          case 0x200A:    /* HAIR SPACE */
1918          case 0x202f:    /* NARROW NO-BREAK SPACE */
1919          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1920          case 0x3000:    /* IDEOGRAPHIC SPACE */
1921          break;
1922          }
1923        ecode++;
1924        break;
1925    
1926        case OP_NOT_VSPACE:
1927        if (eptr >= md->end_subject)
1928          {
1929          SCHECK_PARTIAL();
1930          RRETURN(MATCH_NOMATCH);
1931          }
1932        GETCHARINCTEST(c, eptr);
1933        switch(c)
1934          {
1935          default: break;
1936          case 0x0a:      /* LF */
1937          case 0x0b:      /* VT */
1938          case 0x0c:      /* FF */
1939          case 0x0d:      /* CR */
1940          case 0x85:      /* NEL */
1941          case 0x2028:    /* LINE SEPARATOR */
1942          case 0x2029:    /* PARAGRAPH SEPARATOR */
1943          RRETURN(MATCH_NOMATCH);
1944          }
1945        ecode++;
1946        break;
1947    
1948        case OP_VSPACE:
1949        if (eptr >= md->end_subject)
1950          {
1951          SCHECK_PARTIAL();
1952          RRETURN(MATCH_NOMATCH);
1953          }
1954        GETCHARINCTEST(c, eptr);
1955        switch(c)
1956          {
1957          default: RRETURN(MATCH_NOMATCH);
1958          case 0x0a:      /* LF */
1959          case 0x0b:      /* VT */
1960          case 0x0c:      /* FF */
1961          case 0x0d:      /* CR */
1962          case 0x85:      /* NEL */
1963          case 0x2028:    /* LINE SEPARATOR */
1964          case 0x2029:    /* PARAGRAPH SEPARATOR */
1965          break;
1966          }
1967        ecode++;
1968        break;
1969    
1970  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1971      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1972      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
1973    
1974      case OP_PROP:      case OP_PROP:
1975      case OP_NOTPROP:      case OP_NOTPROP:
1976      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1977          {
1978          SCHECK_PARTIAL();
1979          RRETURN(MATCH_NOMATCH);
1980          }
1981      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1982        {        {
1983        int chartype, rqdtype;        const ucd_record *prop = GET_UCD(c);
       int othercase;  
       int category = ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
1984    
1985        if (rqdtype >= 128)        switch(ecode[1])
1986          {          {
1987          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1988            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1989            break;
1990    
1991            case PT_LAMP:
1992            if ((prop->chartype == ucp_Lu ||
1993                 prop->chartype == ucp_Ll ||
1994                 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1995            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1996          }           break;
1997        else  
1998          {          case PT_GC:
1999          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2000              RRETURN(MATCH_NOMATCH);
2001            break;
2002    
2003            case PT_PC:
2004            if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2005              RRETURN(MATCH_NOMATCH);
2006            break;
2007    
2008            case PT_SC:
2009            if ((ecode[2] != prop->script) == (op == OP_PROP))
2010            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2011            break;
2012    
2013            default:
2014            RRETURN(PCRE_ERROR_INTERNAL);
2015          }          }
2016    
2017          ecode += 3;
2018        }        }
2019      break;      break;
2020    
# Line 1376  for (;;) Line 2022  for (;;)
2022      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2023    
2024      case OP_EXTUNI:      case OP_EXTUNI:
2025      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2026          {
2027          SCHECK_PARTIAL();
2028          RRETURN(MATCH_NOMATCH);
2029          }
2030      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2031        {        {
2032        int chartype;        int category = UCD_CATEGORY(c);
       int othercase;  
       int category = ucp_findchar(c, &chartype, &othercase);  
2033        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
2034        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2035          {          {
# Line 1390  for (;;) Line 2038  for (;;)
2038            {            {
2039            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2040            }            }
2041          category = ucp_findchar(c, &chartype, &othercase);          category = UCD_CATEGORY(c);
2042          if (category != ucp_M) break;          if (category != ucp_M) break;
2043          eptr += len;          eptr += len;
2044          }          }
# Line 1411  for (;;) Line 2059  for (;;)
2059      case OP_REF:      case OP_REF:
2060        {        {
2061        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2062        ecode += 3;                                 /* Advance past item */        ecode += 3;
2063    
2064          /* If the reference is unset, there are two possibilities:
2065    
2066          (a) In the default, Perl-compatible state, set the length to be longer
2067          than the amount of subject left; this ensures that every attempt at a
2068          match fails. We can't just fail here, because of the possibility of
2069          quantifiers with zero minima.
2070    
2071          (b) If the JavaScript compatibility flag is set, set the length to zero
2072          so that the back reference matches an empty string.
2073    
2074        /* If the reference is unset, set the length to be longer than the amount        Otherwise, set the length to the length of what was matched by the
2075        of subject left; this ensures that every attempt at a match fails. We        referenced subpattern. */
2076        can't just fail here, because of the possibility of quantifiers with zero  
2077        minima. */        if (offset >= offset_top || md->offset_vector[offset] < 0)
2078            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2079        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        else
2080          md->end_subject - eptr + 1 :          length = md->offset_vector[offset+1] - md->offset_vector[offset];
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2081    
2082        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2083    
# Line 1449  for (;;) Line 2106  for (;;)
2106          break;          break;
2107    
2108          default:               /* No repeat follows */          default:               /* No repeat follows */
2109          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2110              {
2111              CHECK_PARTIAL();
2112              RRETURN(MATCH_NOMATCH);
2113              }
2114          eptr += length;          eptr += length;
2115          continue;              /* With the main loop */          continue;              /* With the main loop */
2116          }          }
# Line 1465  for (;;) Line 2126  for (;;)
2126    
2127        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2128          {          {
2129          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2130              {
2131              CHECK_PARTIAL();
2132              RRETURN(MATCH_NOMATCH);
2133              }
2134          eptr += length;          eptr += length;
2135          }          }
2136    
# Line 1480  for (;;) Line 2145  for (;;)
2145          {          {
2146          for (fi = min;; fi++)          for (fi = min;; fi++)
2147            {            {
2148            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2149            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2150            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) RRETURN(MATCH_NOMATCH);
2151              if (!match_ref(offset, eptr, length, md, ims))
2152                {
2153                CHECK_PARTIAL();
2154              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2155                }
2156            eptr += length;            eptr += length;
2157            }            }
2158          /* Control never gets here */          /* Control never gets here */
# Line 1496  for (;;) Line 2165  for (;;)
2165          pp = eptr;          pp = eptr;
2166          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2167            {            {
2168            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2169                {
2170                CHECK_PARTIAL();
2171                break;
2172                }
2173            eptr += length;            eptr += length;
2174            }            }
2175          while (eptr >= pp)          while (eptr >= pp)
2176            {            {
2177            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2178            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2179            eptr -= length;            eptr -= length;
2180            }            }
# Line 1510  for (;;) Line 2183  for (;;)
2183        }        }
2184      /* Control never gets here */      /* Control never gets here */
2185    
   
   
2186      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2187      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2188      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1566  for (;;) Line 2237  for (;;)
2237          {          {
2238          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2239            {            {
2240            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2241                {
2242                SCHECK_PARTIAL();
2243                RRETURN(MATCH_NOMATCH);
2244                }
2245            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2246            if (c > 255)            if (c > 255)
2247              {              {
# Line 1584  for (;;) Line 2259  for (;;)
2259          {          {
2260          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2261            {            {
2262            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2263                {
2264                SCHECK_PARTIAL();
2265                RRETURN(MATCH_NOMATCH);
2266                }
2267            c = *eptr++;            c = *eptr++;
2268            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2269            }            }
# Line 1606  for (;;) Line 2285  for (;;)
2285            {            {
2286            for (fi = min;; fi++)            for (fi = min;; fi++)
2287              {              {
2288              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2289              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2290              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2291                if (eptr >= md->end_subject)
2292                  {
2293                  SCHECK_PARTIAL();
2294                  RRETURN(MATCH_NOMATCH);
2295                  }
2296              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2297              if (c > 255)              if (c > 255)
2298                {                {
# Line 1626  for (;;) Line 2310  for (;;)
2310            {            {
2311            for (fi = min;; fi++)            for (fi = min;; fi++)
2312              {              {
2313              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2314              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2315              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2316                if (eptr >= md->end_subject)
2317                  {
2318                  SCHECK_PARTIAL();
2319                  RRETURN(MATCH_NOMATCH);
2320                  }
2321              c = *eptr++;              c = *eptr++;
2322              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2323              }              }
# Line 1649  for (;;) Line 2338  for (;;)
2338            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2339              {              {
2340              int len = 1;              int len = 1;
2341              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2342                  {
2343                  SCHECK_PARTIAL();
2344                  break;
2345                  }
2346              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2347              if (c > 255)              if (c > 255)
2348                {                {
# Line 1663  for (;;) Line 2356  for (;;)
2356              }              }
2357            for (;;)            for (;;)
2358              {              {
2359              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2360              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2361              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2362              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1675  for (;;) Line 2368  for (;;)
2368            {            {
2369            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2370              {              {
2371              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2372                  {
2373                  SCHECK_PARTIAL();
2374                  break;
2375                  }
2376              c = *eptr;              c = *eptr;
2377              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2378              eptr++;              eptr++;
2379              }              }
2380            while (eptr >= pp)            while (eptr >= pp)
2381              {              {
2382              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
2383              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2384                eptr--;
2385              }              }
2386            }            }
2387    
# Line 1695  for (;;) Line 2392  for (;;)
2392    
2393    
2394      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2395      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2396        mode, because Unicode properties are supported in non-UTF-8 mode. */
2397    
2398  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2399      case OP_XCLASS:      case OP_XCLASS:
# Line 1736  for (;;) Line 2434  for (;;)
2434    
2435        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2436          {          {
2437          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2438          GETCHARINC(c, eptr);            {
2439              SCHECK_PARTIAL();
2440              RRETURN(MATCH_NOMATCH);
2441              }
2442            GETCHARINCTEST(c, eptr);
2443          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2444          }          }
2445    
# Line 1753  for (;;) Line 2455  for (;;)
2455          {          {
2456          for (fi = min;; fi++)          for (fi = min;; fi++)
2457            {            {
2458            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2459            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2460            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
2461            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2462                {
2463                SCHECK_PARTIAL();
2464                RRETURN(MATCH_NOMATCH);
2465                }
2466              GETCHARINCTEST(c, eptr);
2467            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2468            }            }
2469          /* Control never gets here */          /* Control never gets here */
# Line 1770  for (;;) Line 2477  for (;;)
2477          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2478            {            {
2479            int len = 1;            int len = 1;
2480            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2481            GETCHARLEN(c, eptr, len);              {
2482                SCHECK_PARTIAL();
2483                break;
2484                }
2485              GETCHARLENTEST(c, eptr, len);
2486            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2487            eptr += len;            eptr += len;
2488            }            }
2489          for(;;)          for(;;)
2490            {            {
2491            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2492            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2493            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2494            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2495            }            }
2496          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2497          }          }
# Line 1798  for (;;) Line 2509  for (;;)
2509        length = 1;        length = 1;
2510        ecode++;        ecode++;
2511        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2512        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2513            {
2514            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2515            RRETURN(MATCH_NOMATCH);
2516            }
2517        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2518        }        }
2519      else      else
# Line 1806  for (;;) Line 2521  for (;;)
2521    
2522      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2523        {        {
2524        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2525            {
2526            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2527            RRETURN(MATCH_NOMATCH);
2528            }
2529        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2530        ecode += 2;        ecode += 2;
2531        }        }
# Line 1822  for (;;) Line 2541  for (;;)
2541        ecode++;        ecode++;
2542        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2543    
2544        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2545            {
2546            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2547            RRETURN(MATCH_NOMATCH);
2548            }
2549    
2550        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2551        can use the fast lookup table. */        can use the fast lookup table. */
# Line 1836  for (;;) Line 2559  for (;;)
2559    
2560        else        else
2561          {          {
2562          int dc;          unsigned int dc;
2563          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2564          ecode += length;          ecode += length;
2565    
2566          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2567          case of the character, if there is one. The result of ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2568    
2569          if (fc != dc)          if (fc != dc)
2570            {            {
2571  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2572            int chartype;            if (dc != UCD_OTHERCASE(fc))
           int othercase;  
           if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2573  #endif  #endif
2574              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2575            }            }
# Line 1861  for (;;) Line 2580  for (;;)
2580    
2581      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2582        {        {
2583        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2584            {
2585            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2586            RRETURN(MATCH_NOMATCH);
2587            }
2588        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2589        ecode += 2;        ecode += 2;
2590        }        }
2591      break;      break;
2592    
2593      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2594    
2595      case OP_EXACT:      case OP_EXACT:
2596      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2597      ecode += 3;      ecode += 3;
2598      goto REPEATCHAR;      goto REPEATCHAR;
2599    
2600        case OP_POSUPTO:
2601        possessive = TRUE;
2602        /* Fall through */
2603    
2604      case OP_UPTO:      case OP_UPTO:
2605      case OP_MINUPTO:      case OP_MINUPTO:
2606      min = 0;      min = 0;
# Line 1882  for (;;) Line 2609  for (;;)
2609      ecode += 3;      ecode += 3;
2610      goto REPEATCHAR;      goto REPEATCHAR;
2611    
2612        case OP_POSSTAR:
2613        possessive = TRUE;
2614        min = 0;
2615        max = INT_MAX;
2616        ecode++;
2617        goto REPEATCHAR;
2618    
2619        case OP_POSPLUS:
2620        possessive = TRUE;
2621        min = 1;
2622        max = INT_MAX;
2623        ecode++;
2624        goto REPEATCHAR;
2625    
2626        case OP_POSQUERY:
2627        possessive = TRUE;
2628        min = 0;
2629        max = 1;
2630        ecode++;
2631        goto REPEATCHAR;
2632    
2633      case OP_STAR:      case OP_STAR:
2634      case OP_MINSTAR:      case OP_MINSTAR:
2635      case OP_PLUS:      case OP_PLUS:
# Line 1890  for (;;) Line 2638  for (;;)
2638      case OP_MINQUERY:      case OP_MINQUERY:
2639      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2640      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2641    
2642      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2643      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2644      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2645    
2646      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2647    
2648      REPEATCHAR:      REPEATCHAR:
2649  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1905  for (;;) Line 2652  for (;;)
2652        length = 1;        length = 1;
2653        charptr = ecode;        charptr = ecode;
2654        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2655        ecode += length;        ecode += length;
2656    
2657        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1913  for (;;) Line 2659  for (;;)
2659    
2660        if (length > 1)        if (length > 1)
2661          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2662  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2663          int othercase;          unsigned int othercase;
         int chartype;  
2664          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2665               ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase > 0)  
2666            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2667            else oclength = 0;
2668  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2669    
2670          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2671            {            {
2672            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2673            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2674            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2675              else if (oclength > 0 &&
2676                       eptr <= md->end_subject - oclength &&
2677                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2678    #endif  /* SUPPORT_UCP */
2679            else            else
2680              {              {
2681              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2682              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2683              }              }
2684            }            }
2685    
# Line 1943  for (;;) Line 2689  for (;;)
2689            {            {
2690            for (fi = min;; fi++)            for (fi = min;; fi++)
2691              {              {
2692              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2693              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2694              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2695              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2696              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2697              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2698                else if (oclength > 0 &&
2699                         eptr <= md->end_subject - oclength &&
2700                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2701    #endif  /* SUPPORT_UCP */
2702              else              else
2703                {                {
2704                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2705                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2706                }                }
2707              }              }
2708            /* Control never gets here */            /* Control never gets here */
2709            }            }
2710          else  
2711            else  /* Maximize */
2712            {            {
2713            pp = eptr;            pp = eptr;
2714            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2715              {              {
2716              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2717              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2718              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2719                else if (oclength > 0 &&
2720                         eptr <= md->end_subject - oclength &&
2721                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2722    #endif  /* SUPPORT_UCP */
2723              else              else
2724                {                {
2725                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2726                eptr += oclength;                break;
2727                }                }
2728              }              }
2729            while (eptr >= pp)  
2730             {            if (possessive) continue;
2731             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2732             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2733             eptr -= length;              {
2734             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2735            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2736                if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2737    #ifdef SUPPORT_UCP
2738                eptr--;
2739                BACKCHAR(eptr);
2740    #else   /* without SUPPORT_UCP */
2741                eptr -= length;
2742    #endif  /* SUPPORT_UCP */
2743                }
2744            }            }
2745          /* Control never gets here */          /* Control never gets here */
2746          }          }
# Line 1990  for (;;) Line 2753  for (;;)
2753  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2754    
2755      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2756        {  
2757        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2758    
2759      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2760      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2011  for (;;) Line 2772  for (;;)
2772        {        {
2773        fc = md->lcc[fc];        fc = md->lcc[fc];
2774        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2775            {
2776            if (eptr >= md->end_subject)
2777              {
2778              SCHECK_PARTIAL();
2779              RRETURN(MATCH_NOMATCH);
2780              }
2781          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2782            }
2783        if (min == max) continue;        if (min == max) continue;
2784        if (minimize)        if (minimize)
2785          {          {
2786          for (fi = min;; fi++)          for (fi = min;; fi++)
2787            {            {
2788            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2789            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2790            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
2791                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2792                {
2793                SCHECK_PARTIAL();
2794              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2795                }
2796              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2797            }            }
2798          /* Control never gets here */          /* Control never gets here */
2799          }          }
2800        else        else  /* Maximize */
2801          {          {
2802          pp = eptr;          pp = eptr;
2803          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2804            {            {
2805            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2806                {
2807                SCHECK_PARTIAL();
2808                break;
2809                }
2810              if (fc != md->lcc[*eptr]) break;
2811            eptr++;            eptr++;
2812            }            }
2813    
2814            if (possessive) continue;
2815    
2816          while (eptr >= pp)          while (eptr >= pp)
2817            {            {
2818            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2819            eptr--;            eptr--;
2820            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2821            }            }
# Line 2048  for (;;) Line 2828  for (;;)
2828    
2829      else      else
2830        {        {
2831        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
       if (min == max) continue;  
       if (minimize)  
2832          {          {
2833          for (fi = min;; fi++)          if (eptr >= md->end_subject)
2834            {            {
2835            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            SCHECK_PARTIAL();
2836            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            RRETURN(MATCH_NOMATCH);
2837            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            }
2838              RRETURN(MATCH_NOMATCH);          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2839            }
2840    
2841          if (min == max) continue;
2842    
2843          if (minimize)
2844            {
2845            for (fi = min;; fi++)
2846              {
2847              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2848              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2849              if (fi >= max) RRETURN(MATCH_NOMATCH);
2850              if (eptr >= md->end_subject)
2851                {
2852                SCHECK_PARTIAL();
2853                RRETURN(MATCH_NOMATCH);
2854                }
2855              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2856            }            }
2857          /* Control never gets here */          /* Control never gets here */
2858          }          }
2859        else        else  /* Maximize */
2860          {          {
2861          pp = eptr;          pp = eptr;
2862          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2863            {            {
2864            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2865                {
2866                SCHECK_PARTIAL();
2867                break;
2868                }
2869              if (fc != *eptr) break;
2870            eptr++;            eptr++;
2871            }            }
2872            if (possessive) continue;
2873    
2874          while (eptr >= pp)          while (eptr >= pp)
2875            {            {
2876            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2877            eptr--;            eptr--;
2878            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2879            }            }
# Line 2084  for (;;) Line 2886  for (;;)
2886      checking can be multibyte. */      checking can be multibyte. */
2887    
2888      case OP_NOT:      case OP_NOT:
2889      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2890          {
2891          SCHECK_PARTIAL();
2892          RRETURN(MATCH_NOMATCH);
2893          }
2894      ecode++;      ecode++;
2895      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2896      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2121  for (;;) Line 2927  for (;;)
2927      ecode += 3;      ecode += 3;
2928      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2929    
2930        case OP_NOTPOSSTAR:
2931        possessive = TRUE;
2932        min = 0;
2933        max = INT_MAX;
2934        ecode++;
2935        goto REPEATNOTCHAR;
2936    
2937        case OP_NOTPOSPLUS:
2938        possessive = TRUE;
2939        min = 1;
2940        max = INT_MAX;
2941        ecode++;
2942        goto REPEATNOTCHAR;
2943    
2944        case OP_NOTPOSQUERY:
2945        possessive = TRUE;
2946        min = 0;
2947        max = 1;
2948        ecode++;
2949        goto REPEATNOTCHAR;
2950    
2951        case OP_NOTPOSUPTO:
2952        possessive = TRUE;
2953        min = 0;
2954        max = GET2(ecode, 1);
2955        ecode += 3;
2956        goto REPEATNOTCHAR;
2957    
2958      case OP_NOTSTAR:      case OP_NOTSTAR:
2959      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2960      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2133  for (;;) Line 2967  for (;;)
2967      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2968      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2969    
2970      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2971    
2972      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2973      fc = *ecode++;      fc = *ecode++;
2974    
2975      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2160  for (;;) Line 2991  for (;;)
2991        /* UTF-8 mode */        /* UTF-8 mode */
2992        if (utf8)        if (utf8)
2993          {          {
2994          register int d;          register unsigned int d;
2995          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2996            {            {
2997              if (eptr >= md->end_subject)
2998                {
2999                SCHECK_PARTIAL();
3000                RRETURN(MATCH_NOMATCH);
3001                }
3002            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3003            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3004            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2174  for (;;) Line 3010  for (;;)
3010        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3011          {          {
3012          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3013              {
3014              if (eptr >= md->end_subject)
3015                {
3016                SCHECK_PARTIAL();
3017                RRETURN(MATCH_NOMATCH);
3018                }
3019            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3020              }
3021          }          }
3022    
3023        if (min == max) continue;        if (min == max) continue;
# Line 2185  for (;;) Line 3028  for (;;)
3028          /* UTF-8 mode */          /* UTF-8 mode */
3029          if (utf8)          if (utf8)
3030            {            {
3031            register int d;            register unsigned int d;
3032            for (fi = min;; fi++)            for (fi = min;; fi++)
3033              {              {
3034              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3035              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3036                if (fi >= max) RRETURN(MATCH_NOMATCH);
3037                if (eptr >= md->end_subject)
3038                  {
3039                  SCHECK_PARTIAL();
3040                  RRETURN(MATCH_NOMATCH);
3041                  }
3042              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3043              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3044              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3045              }              }
3046            }            }
3047          else          else
# Line 2202  for (;;) Line 3050  for (;;)
3050            {            {
3051            for (fi = min;; fi++)            for (fi = min;; fi++)
3052              {              {
3053              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3054              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3055              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) RRETURN(MATCH_NOMATCH);
3056                if (eptr >= md->end_subject)
3057                  {
3058                  SCHECK_PARTIAL();
3059                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3060                  }
3061                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3062              }              }
3063            }            }
3064          /* Control never gets here */          /* Control never gets here */
# Line 2221  for (;;) Line 3074  for (;;)
3074          /* UTF-8 mode */          /* UTF-8 mode */
3075          if (utf8)          if (utf8)
3076            {            {
3077            register int d;            register unsigned int d;
3078            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3079              {              {
3080              int len = 1;              int len = 1;
3081              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3082                  {
3083                  SCHECK_PARTIAL();
3084                  break;
3085                  }
3086              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3087              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3088              if (fc == d) break;              if (fc == d) break;
3089              eptr += len;              eptr += len;
3090              }              }
3091            for(;;)          if (possessive) continue;
3092            for(;;)
3093              {              {
3094              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3095              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3096              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3097              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2245  for (;;) Line 3103  for (;;)
3103            {            {
3104            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3105              {              {
3106              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3107                  {
3108                  SCHECK_PARTIAL();
3109                  break;
3110                  }
3111                if (fc == md->lcc[*eptr]) break;
3112              eptr++;              eptr++;
3113              }              }
3114              if (possessive) continue;
3115            while (eptr >= pp)            while (eptr >= pp)
3116              {              {
3117              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3118              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3119              eptr--;              eptr--;
3120              }              }
# Line 2269  for (;;) Line 3133  for (;;)
3133        /* UTF-8 mode */        /* UTF-8 mode */
3134        if (utf8)        if (utf8)
3135          {          {
3136          register int d;          register unsigned int d;
3137          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3138            {            {
3139              if (eptr >= md->end_subject)
3140                {
3141                SCHECK_PARTIAL();
3142                RRETURN(MATCH_NOMATCH);
3143                }
3144            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3145            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
3146            }            }
# Line 2281  for (;;) Line 3150  for (;;)
3150        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3151          {          {
3152          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3153              {
3154              if (eptr >= md->end_subject)
3155                {
3156                SCHECK_PARTIAL();
3157                RRETURN(MATCH_NOMATCH);
3158                }
3159            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3160              }
3161          }          }
3162    
3163        if (min == max) continue;        if (min == max) continue;
# Line 2292  for (;;) Line 3168  for (;;)
3168          /* UTF-8 mode */          /* UTF-8 mode */
3169          if (utf8)          if (utf8)
3170            {            {
3171            register int d;            register unsigned int d;
3172            for (fi = min;; fi++)            for (fi = min;; fi++)
3173              {              {
3174              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3175              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3176              GETCHARINC(d, eptr);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3177              if (fi >= max || eptr >= md->end_subject || fc == d)              if (eptr >= md->end_subject)
3178                  {
3179                  SCHECK_PARTIAL();
3180                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3181                  }
3182                GETCHARINC(d, eptr);
3183                if (fc == d) RRETURN(MATCH_NOMATCH);
3184              }              }
3185            }            }
3186          else          else
# Line 2308  for (;;) Line 3189  for (;;)
3189            {            {
3190            for (fi = min;; fi++)            for (fi = min;; fi++)
3191              {              {
3192              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3193              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3194              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) RRETURN(MATCH_NOMATCH);
3195                if (eptr >= md->end_subject)
3196                  {
3197                  SCHECK_PARTIAL();
3198                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3199                  }
3200                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3201              }              }
3202            }            }
3203          /* Control never gets here */          /* Control never gets here */
# Line 2327  for (;;) Line 3213  for (;;)
3213          /* UTF-8 mode */          /* UTF-8 mode */
3214          if (utf8)          if (utf8)
3215            {            {
3216            register int d;            register unsigned int d;
3217            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3218              {              {
3219              int len = 1;              int len = 1;
3220              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3221                  {
3222                  SCHECK_PARTIAL();
3223                  break;
3224                  }
3225              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3226              if (fc == d) break;              if (fc == d) break;
3227              eptr += len;              eptr += len;
3228              }              }
3229              if (possessive) continue;
3230            for(;;)            for(;;)
3231              {              {
3232              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3233              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3234              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3235              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2350  for (;;) Line 3241  for (;;)
3241            {            {
3242            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3243              {              {
3244              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3245                  {
3246                  SCHECK_PARTIAL();
3247                  break;
3248                  }
3249                if (fc == *eptr) break;
3250              eptr++;              eptr++;
3251              }              }
3252              if (possessive) continue;
3253            while (eptr >= pp)            while (eptr >= pp)
3254              {              {
3255              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3256              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3257              eptr--;              eptr--;
3258              }              }
# Line 2384  for (;;) Line 3281  for (;;)
3281      ecode += 3;      ecode += 3;
3282      goto REPEATTYPE;      goto REPEATTYPE;
3283    
3284        case OP_TYPEPOSSTAR:
3285        possessive = TRUE;
3286        min = 0;
3287        max = INT_MAX;
3288        ecode++;
3289        goto REPEATTYPE;
3290    
3291        case OP_TYPEPOSPLUS:
3292        possessive = TRUE;
3293        min = 1;
3294        max = INT_MAX;
3295        ecode++;
3296        goto REPEATTYPE;
3297    
3298        case OP_TYPEPOSQUERY:
3299        possessive = TRUE;
3300        min = 0;
3301        max = 1;
3302        ecode++;
3303        goto REPEATTYPE;
3304    
3305        case OP_TYPEPOSUPTO:
3306        possessive = TRUE;
3307        min = 0;
3308        max = GET2(ecode, 1);
3309        ecode += 3;
3310        goto REPEATTYPE;
3311    
3312      case OP_TYPESTAR:      case OP_TYPESTAR:
3313      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3314      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 3333  for (;;)
3333        {        {
3334        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
3335        prop_type = *ecode++;        prop_type = *ecode++;
3336        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
3337        }        }
3338      else prop_type = -1;      else prop_type = -1;
3339  #endif  #endif
3340    
3341      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3342      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3343      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3344      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3345      and single-bytes. */      and single-bytes. */
3346    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3347      if (min > 0)      if (min > 0)
3348        {        {
3349  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3350        if (prop_type > 0)        if (prop_type >= 0)
3351          {          {
3352          for (i = 1; i <= min; i++)          switch(prop_type)
3353            {            {
3354            GETCHARINC(c, eptr);            case PT_ANY:
3355            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3356            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
3357              RRETURN(MATCH_NOMATCH);              {
3358                if (eptr >= md->end_subject)
3359                  {
3360                  SCHECK_PARTIAL();
3361                  RRETURN(MATCH_NOMATCH);
3362                  }
3363                GETCHARINCTEST(c, eptr);
3364                }
3365              break;
3366    
3367              case PT_LAMP:
3368              for (i = 1; i <= min; i++)
3369                {
3370                if (eptr >= md->end_subject)
3371                  {
3372                  SCHECK_PARTIAL();
3373                  RRETURN(MATCH_NOMATCH);
3374                  }
3375                GETCHARINCTEST(c, eptr);
3376                prop_chartype = UCD_CHARTYPE(c);
3377                if ((prop_chartype == ucp_Lu ||
3378                     prop_chartype == ucp_Ll ||
3379                     prop_chartype == ucp_Lt) == prop_fail_result)
3380                  RRETURN(MATCH_NOMATCH);
3381                }
3382              break;
3383    
3384              case PT_GC:
3385              for (i = 1; i <= min; i++)
3386                {
3387                if (eptr >= md->end_subject)
3388                  {
3389                  SCHECK_PARTIAL();
3390                  RRETURN(MATCH_NOMATCH);
3391                  }
3392                GETCHARINCTEST(c, eptr);
3393                prop_category = UCD_CATEGORY(c);
3394                if ((prop_category == prop_value) == prop_fail_result)
3395                  RRETURN(MATCH_NOMATCH);
3396                }
3397              break;
3398    
3399              case PT_PC:
3400              for (i = 1; i <= min; i++)
3401                {
3402                if (eptr >= md->end_subject)
3403                  {
3404                  SCHECK_PARTIAL();
3405                  RRETURN(MATCH_NOMATCH);
3406                  }
3407                GETCHARINCTEST(c, eptr);
3408                prop_chartype = UCD_CHARTYPE(c);
3409                if ((prop_chartype == prop_value) == prop_fail_result)
3410                  RRETURN(MATCH_NOMATCH);
3411                }
3412              break;
3413    
3414              case PT_SC:
3415              for (i = 1; i <= min; i++)
3416                {
3417                if (eptr >= md->end_subject)
3418                  {
3419                  SCHECK_PARTIAL();
3420                  RRETURN(MATCH_NOMATCH);
3421                  }
3422                GETCHARINCTEST(c, eptr);
3423                prop_script = UCD_SCRIPT(c);
3424                if ((prop_script == prop_value) == prop_fail_result)
3425                  RRETURN(MATCH_NOMATCH);
3426                }
3427              break;
3428    
3429              default:
3430              RRETURN(PCRE_ERROR_INTERNAL);
3431            }            }
3432          }          }
3433    
# Line 2452  for (;;) Line 3438  for (;;)
3438          {          {
3439          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3440            {            {
3441              if (eptr >= md->end_subject)
3442                {
3443                SCHECK_PARTIAL();
3444                RRETURN(MATCH_NOMATCH);
3445                }
3446            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3447            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
3448            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3449            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3450              {              {
3451              int len = 1;              int len = 1;
3452              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3453                {                else { GETCHARLEN(c, eptr, len); }
3454                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);  
3455              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3456              eptr += len;              eptr += len;
3457              }              }
# Line 2480  for (;;) Line 3469  for (;;)
3469          case OP_ANY:          case OP_ANY:
3470          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3471            {            {
3472            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3473               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))              {
3474                SCHECK_PARTIAL();
3475                RRETURN(MATCH_NOMATCH);
3476                }
3477              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3478              eptr++;
3479              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3480              }
3481            break;
3482    
3483            case OP_ALLANY:
3484            for (i = 1; i <= min; i++)
3485              {
3486              if (eptr >= md->end_subject)
3487                {
3488                SCHECK_PARTIAL();
3489              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3490                }
3491              eptr++;
3492            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3493            }            }
3494          break;          break;
3495    
3496          case OP_ANYBYTE:          case OP_ANYBYTE:
3497            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3498          eptr += min;          eptr += min;
3499          break;          break;
3500    
3501            case OP_ANYNL:
3502            for (i = 1; i <= min; i++)
3503              {
3504              if (eptr >= md->end_subject)
3505                {
3506                SCHECK_PARTIAL();
3507                RRETURN(MATCH_NOMATCH);
3508                }
3509              GETCHARINC(c, eptr);
3510              switch(c)
3511                {
3512                default: RRETURN(MATCH_NOMATCH);
3513                case 0x000d:
3514                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3515                break;
3516    
3517                case 0x000a:
3518                break;
3519    
3520                case 0x000b:
3521                case 0x000c:
3522                case 0x0085:
3523                case 0x2028:
3524                case 0x2029:
3525                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3526                break;
3527                }
3528              }
3529            break;
3530    
3531            case OP_NOT_HSPACE:
3532            for (i = 1; i <= min; i++)
3533              {
3534              if (eptr >= md->end_subject)
3535                {
3536                SCHECK_PARTIAL();
3537                RRETURN(MATCH_NOMATCH);
3538                }
3539              GETCHARINC(c, eptr);
3540              switch(c)
3541                {
3542                default: break;
3543                case 0x09:      /* HT */
3544                case 0x20:      /* SPACE */
3545                case 0xa0:      /* NBSP */
3546                case 0x1680:    /* OGHAM SPACE MARK */
3547                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3548                case 0x2000:    /* EN QUAD */
3549                case 0x2001:    /* EM QUAD */
3550                case 0x2002:    /* EN SPACE */
3551                case 0x2003:    /* EM SPACE */
3552                case 0x2004:    /* THREE-PER-EM SPACE */
3553                case 0x2005:    /* FOUR-PER-EM SPACE */
3554                case 0x2006:    /* SIX-PER-EM SPACE */
3555                case 0x2007:    /* FIGURE SPACE */
3556                case 0x2008:    /* PUNCTUATION SPACE */
3557                case 0x2009:    /* THIN SPACE */
3558                case 0x200A:    /* HAIR SPACE */
3559                case 0x202f:    /* NARROW NO-BREAK SPACE */
3560                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3561                case 0x3000:    /* IDEOGRAPHIC SPACE */
3562                RRETURN(MATCH_NOMATCH);
3563                }
3564              }
3565            break;
3566    
3567            case OP_HSPACE:
3568            for (i = 1; i <= min; i++)
3569              {
3570              if (eptr >= md->end_subject)
3571                {
3572                SCHECK_PARTIAL();
3573                RRETURN(MATCH_NOMATCH);
3574                }
3575              GETCHARINC(c, eptr);
3576              switch(c)
3577                {
3578                default: RRETURN(MATCH_NOMATCH);
3579                case 0x09:      /* HT */
3580                case 0x20:      /* SPACE */
3581                case 0xa0:      /* NBSP */
3582                case 0x1680:    /* OGHAM SPACE MARK */
3583                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3584                case 0x2000:    /* EN QUAD */
3585                case 0x2001:    /* EM QUAD */
3586                case 0x2002:    /* EN SPACE */
3587                case 0x2003:    /* EM SPACE */
3588                case 0x2004:    /* THREE-PER-EM SPACE */
3589                case 0x2005:    /* FOUR-PER-EM SPACE */
3590                case 0x2006:    /* SIX-PER-EM SPACE */
3591                case 0x2007:    /* FIGURE SPACE */
3592                case 0x2008:    /* PUNCTUATION SPACE */
3593                case 0x2009:    /* THIN SPACE */
3594                case 0x200A:    /* HAIR SPACE */
3595                case 0x202f:    /* NARROW NO-BREAK SPACE */
3596                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3597                case 0x3000:    /* IDEOGRAPHIC SPACE */
3598                break;
3599                }
3600              }
3601            break;
3602    
3603            case OP_NOT_VSPACE:
3604            for (i = 1; i <= min; i++)
3605              {
3606              if (eptr >= md->end_subject)
3607                {
3608                SCHECK_PARTIAL();
3609                RRETURN(MATCH_NOMATCH);
3610                }
3611              GETCHARINC(c, eptr);
3612              switch(c)
3613                {
3614                default: break;
3615                case 0x0a:      /* LF */
3616                case 0x0b:      /* VT */
3617                case 0x0c:      /* FF */
3618                case 0x0d:      /* CR */
3619                case 0x85:      /* NEL */
3620                case 0x2028:    /* LINE SEPARATOR */
3621                case 0x2029:    /* PARAGRAPH SEPARATOR */
3622                RRETURN(MATCH_NOMATCH);
3623                }
3624              }
3625            break;
3626    
3627            case OP_VSPACE:
3628            for (i = 1; i <= min; i++)
3629              {
3630              if (eptr >= md->end_subject)
3631                {
3632                SCHECK_PARTIAL();
3633                RRETURN(MATCH_NOMATCH);
3634                }
3635              GETCHARINC(c, eptr);
3636              switch(c)
3637                {
3638                default: RRETURN(MATCH_NOMATCH);
3639                case 0x0a:      /* LF */
3640                case 0x0b:      /* VT */
3641                case 0x0c:      /* FF */
3642                case 0x0d:      /* CR */
3643                case 0x85:      /* NEL */
3644                case 0x2028:    /* LINE SEPARATOR */
3645                case 0x2029:    /* PARAGRAPH SEPARATOR */
3646                break;
3647                }
3648              }
3649            break;
3650    
3651          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3652          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3653            {            {
3654            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3655                {
3656                SCHECK_PARTIAL();
3657                RRETURN(MATCH_NOMATCH);
3658                }
3659            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3660            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3661              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2504  for (;;) Line 3665  for (;;)
3665          case OP_DIGIT:          case OP_DIGIT:
3666          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3667            {            {
3668            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3669               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3670                SCHECK_PARTIAL();
3671                RRETURN(MATCH_NOMATCH);
3672                }
3673              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3674              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3675            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3676            }            }
# Line 2514  for (;;) Line 3679  for (;;)
3679          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3680          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3681            {            {
3682            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3683               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3684                SCHECK_PARTIAL();
3685              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3686            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              }
3687              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3688                RRETURN(MATCH_NOMATCH);
3689              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3690            }            }
3691          break;          break;
3692    
3693          case OP_WHITESPACE:          case OP_WHITESPACE:
3694          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3695            {            {
3696            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3697               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3698                SCHECK_PARTIAL();
3699                RRETURN(MATCH_NOMATCH);
3700                }
3701              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3702              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3703            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3704            }            }
# Line 2534  for (;;) Line 3707  for (;;)
3707          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3708          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3709            {            {
3710            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3711               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3712                SCHECK_PARTIAL();
3713              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3714            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              }
3715              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3716                RRETURN(MATCH_NOMATCH);
3717              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3718            }            }
3719          break;          break;
3720    
3721          case OP_WORDCHAR:          case OP_WORDCHAR:
3722          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3723            {            {
3724            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3725               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3726                SCHECK_PARTIAL();
3727                RRETURN(MATCH_NOMATCH);
3728                }
3729              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3730              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3731            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3732            }            }
# Line 2564  for (;;) Line 3745  for (;;)
3745        switch(ctype)        switch(ctype)
3746          {          {
3747          case OP_ANY:          case OP_ANY:
3748          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3749            {            {
3750            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3751              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
3752            }              SCHECK_PARTIAL();
3753          else eptr += min;              RRETURN(MATCH_NOMATCH);
3754                }
3755              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3756              eptr++;
3757              }
3758            break;
3759    
3760            case OP_ALLANY:
3761            if (eptr > md->end_subject - min)
3762              {
3763              SCHECK_PARTIAL();
3764              RRETURN(MATCH_NOMATCH);
3765              }
3766            eptr += min;
3767          break;          break;
3768    
3769          case OP_ANYBYTE:          case OP_ANYBYTE:
3770            if (eptr > md->end_subject - min)
3771              {
3772              SCHECK_PARTIAL();
3773              RRETURN(MATCH_NOMATCH);
3774              }
3775          eptr += min;          eptr += min;
3776          break;          break;
3777    
3778            case OP_ANYNL:
3779            for (i = 1; i <= min; i++)
3780              {
3781              if (eptr >= md->end_subject)
3782                {
3783                SCHECK_PARTIAL();
3784                RRETURN(MATCH_NOMATCH);
3785                }
3786              switch(*eptr++)
3787                {
3788                default: RRETURN(MATCH_NOMATCH);
3789                case 0x000d:
3790                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3791                break;
3792                case 0x000a:
3793                break;
3794    
3795                case 0x000b:
3796                case 0x000c:
3797                case 0x0085:
3798                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3799                break;
3800                }
3801              }
3802            break;
3803    
3804            case OP_NOT_HSPACE:
3805            for (i = 1; i <= min; i++)
3806              {
3807              if (eptr >= md->end_subject)
3808                {
3809                SCHECK_PARTIAL();
3810                RRETURN(MATCH_NOMATCH);
3811                }
3812              switch(*eptr++)
3813                {
3814                default: break;
3815                case 0x09:      /* HT */
3816                case 0x20:      /* SPACE */
3817                case 0xa0:      /* NBSP */
3818                RRETURN(MATCH_NOMATCH);
3819                }
3820              }
3821            break;
3822    
3823            case OP_HSPACE:
3824            for (i = 1; i <= min; i++)
3825              {
3826              if (eptr >= md->end_subject)
3827                {
3828                SCHECK_PARTIAL();
3829                RRETURN(MATCH_NOMATCH);
3830                }
3831              switch(*eptr++)
3832                {
3833                default: RRETURN(MATCH_NOMATCH);
3834                case 0x09:      /* HT */
3835                case 0x20:      /* SPACE */
3836                case 0xa0:      /* NBSP */
3837                break;
3838                }
3839              }
3840            break;
3841    
3842            case OP_NOT_VSPACE:
3843            for (i = 1; i <= min; i++)
3844              {
3845              if (eptr >= md->end_subject)
3846                {
3847                SCHECK_PARTIAL();
3848                RRETURN(MATCH_NOMATCH);
3849                }
3850              switch(*eptr++)
3851                {
3852                default: break;
3853                case 0x0a:      /* LF */
3854                case 0x0b:      /* VT */
3855                case 0x0c:      /* FF */
3856                case 0x0d:      /* CR */
3857                case 0x85:      /* NEL */
3858                RRETURN(MATCH_NOMATCH);
3859                }
3860              }
3861            break;
3862    
3863            case OP_VSPACE:
3864            for (i = 1; i <= min; i++)
3865              {
3866              if (eptr >= md->end_subject)
3867                {
3868                SCHECK_PARTIAL();
3869                RRETURN(MATCH_NOMATCH);
3870                }
3871              switch(*eptr++)
3872                {
3873                default: RRETURN(MATCH_NOMATCH);
3874                case 0x0a:      /* LF */
3875                case 0x0b:      /* VT */
3876                case 0x0c:      /* FF */
3877                case 0x0d:      /* CR */
3878                case 0x85:      /* NEL */
3879                break;
3880                }
3881              }
3882            break;
3883    
3884          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3885          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3886              {
3887              if (eptr >= md->end_subject)
3888                {
3889                SCHECK_PARTIAL();
3890                RRETURN(MATCH_NOMATCH);
3891                }
3892            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3893              }
3894          break;          break;
3895    
3896          case OP_DIGIT:          case OP_DIGIT:
3897          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3898              {
3899              if (eptr >= md->end_subject)
3900                {
3901                SCHECK_PARTIAL();
3902                RRETURN(MATCH_NOMATCH);
3903                }
3904            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3905              }
3906          break;          break;
3907    
3908          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3909          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3910              {
3911              if (eptr >= md->end_subject)
3912                {
3913                SCHECK_PARTIAL();
3914                RRETURN(MATCH_NOMATCH);
3915                }
3916            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3917              }
3918          break;          break;
3919    
3920          case OP_WHITESPACE:          case OP_WHITESPACE:
3921          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3922              {
3923              if (eptr >= md->end_subject)
3924                {
3925                SCHECK_PARTIAL();
3926                RRETURN(MATCH_NOMATCH);
3927                }
3928            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3929              }
3930          break;          break;
3931    
3932          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3933          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3934              {
3935              if (eptr >= md->end_subject)
3936                {
3937                SCHECK_PARTIAL();
3938                RRETURN(MATCH_NOMATCH);
3939                }
3940            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3941              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3942              }
3943          break;          break;
3944    
3945          case OP_WORDCHAR:          case OP_WORDCHAR:
3946          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3947              {
3948              if (eptr >= md->end_subject)
3949                {
3950                SCHECK_PARTIAL();
3951                RRETURN(MATCH_NOMATCH);
3952                }
3953            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3954              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3955              }
3956          break;          break;
3957    
3958          default:          default:
# Line 2624  for (;;) Line 3971  for (;;)
3971      if (minimize)      if (minimize)
3972        {        {
3973  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3974        if (prop_type > 0)        if (prop_type >= 0)
3975          {          {
3976          for (fi = min;; fi++)          switch(prop_type)
3977            {            {
3978            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
3979            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
3980            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
3981            GETCHARINC(c, eptr);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3982            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3983            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max) RRETURN(MATCH_NOMATCH);
3984              RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3985                  {
3986                  SCHECK_PARTIAL();
3987                  RRETURN(MATCH_NOMATCH);
3988                  }
3989                GETCHARINC(c, eptr);
3990                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3991                }
3992              /* Control never gets here */
3993    
3994              case PT_LAMP:
3995              for (fi = min;; fi++)
3996                {
3997                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3998                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3999                if (fi >= max) RRETURN(MATCH_NOMATCH);
4000                if (eptr >= md->end_subject)
4001                  {
4002                  SCHECK_PARTIAL();
4003                  RRETURN(MATCH_NOMATCH);
4004                  }
4005                GETCHARINC(c, eptr);
4006                prop_chartype = UCD_CHARTYPE(c);
4007                if ((prop_chartype == ucp_Lu ||
4008                     prop_chartype == ucp_Ll ||
4009                     prop_chartype == ucp_Lt) == prop_fail_result)
4010                  RRETURN(MATCH_NOMATCH);
4011                }
4012              /* Control never gets here */
4013    
4014              case PT_GC:
4015              for (fi = min;; fi++)
4016                {
4017                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4018                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4019                if (fi >= max) RRETURN(MATCH_NOMATCH);
4020                if (eptr >= md->end_subject)
4021                  {
4022                  SCHECK_PARTIAL();
4023                  RRETURN(MATCH_NOMATCH);
4024                  }
4025                GETCHARINC(c, eptr);
4026                prop_category = UCD_CATEGORY(c);
4027                if ((prop_category == prop_value) == prop_fail_result)
4028                  RRETURN(MATCH_NOMATCH);
4029                }
4030              /* Control never gets here */
4031    
4032              case PT_PC:
4033              for (fi = min;; fi++)
4034                {
4035                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4036                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4037                if (fi >= max) RRETURN(MATCH_NOMATCH);
4038                if (eptr >= md->end_subject)
4039                  {
4040                  SCHECK_PARTIAL();
4041                  RRETURN(MATCH_NOMATCH);
4042                  }
4043                GETCHARINC(c, eptr);
4044                prop_chartype = UCD_CHARTYPE(c);
4045                if ((prop_chartype == prop_value) == prop_fail_result)
4046                  RRETURN(MATCH_NOMATCH);
4047                }
4048              /* Control never gets here */
4049    
4050              case PT_SC:
4051              for (fi = min;; fi++)
4052                {
4053                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4054                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4055                if (fi >= max) RRETURN(MATCH_NOMATCH);
4056                if (eptr >= md->end_subject)
4057                  {
4058                  SCHECK_PARTIAL();
4059                  RRETURN(MATCH_NOMATCH);
4060                  }
4061                GETCHARINC(c, eptr);
4062                prop_script = UCD_SCRIPT(c);
4063                if ((prop_script == prop_value) == prop_fail_result)
4064                  RRETURN(MATCH_NOMATCH);
4065                }
4066              /* Control never gets here */
4067    
4068              default:
4069              RRETURN(PCRE_ERROR_INTERNAL);
4070            }            }
4071          }          }
4072    
# Line 2645  for (;;) Line 4077  for (;;)
4077          {          {
4078          for (fi = min;; fi++)          for (fi = min;; fi++)
4079            {            {
4080            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4081            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4082            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
4083              if (eptr >= md->end_subject)
4084                {