/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 101  Returns:     nothing Line 109  Returns:     nothing
109  static void  static void
110  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111  {  {
112  int c;  unsigned int c;
113  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114  while (length-- > 0)  while (length-- > 0)
115    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 136  Returns:      TRUE if matched
136  */  */
137    
138  static BOOL  static BOOL
139  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
140    unsigned long int ims)    unsigned long int ims)
141  {  {
142  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
143    
144  #ifdef DEBUG  #ifdef DEBUG
145  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 150  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 169  return TRUE; Line 203  return TRUE;
203  ****************************************************************************  ****************************************************************************
204                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
205    
206  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
207  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
208  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
209  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
210  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
211    fine.
212  It turns out that on non-Unix systems there are problems with programs that  
213  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
214  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
215  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
216    been known for decades.) So....
217    
218  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
220  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
221  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
222  always used to.  always used to.
223    
224    The original heap-recursive code used longjmp(). However, it seems that this
225    can be very slow on some operating systems. Following a suggestion from Stan
226    Switzer, the use of longjmp() has been abolished, at the cost of having to
227    provide a unique number for each call to RMATCH. There is no way of generating
228    a sequence of numbers at compile time in C. I have given them names, to make
229    them stand out more clearly.
230    
231    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233    tests. Furthermore, not using longjmp() means that local dynamic variables
234    don't have indeterminate values; this has meant that the frame size can be
235    reduced because the result can be "passed back" by straight setting of the
236    variable instead of being passed in the frame.
237  ****************************************************************************  ****************************************************************************
238  ***************************************************************************/  ***************************************************************************/
239    
240    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241    below must be updated in sync.  */
242    
243  /* These versions of the macros use the stack, as normal */  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248           RM51,  RM52, RM53, RM54 };
249    
250    /* These versions of the macros use the stack, as normal. There are debugging
251    versions and production versions. Note that the "rw" argument of RMATCH isn't
252    actuall used in this definition. */
253    
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
257    #ifdef DEBUG
258    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259      { \
260      printf("match() called in line %d\n", __LINE__); \
261      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262      printf("to line %d\n", __LINE__); \
263      }
264    #define RRETURN(ra) \
265      { \
266      printf("match() returned %d from line %d ", ra, __LINE__); \
267      return ra; \
268      }
269    #else
270    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
273    #endif
274    
275  #else  #else
276    
277    
278  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
279  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280  match(), which never changes. */  argument of match(), which never changes. */
281    
282  #define REGISTER  #define REGISTER
283    
284  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285    {\    {\
286    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
288      {\    newframe->Xeptr = ra;\
289      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
290      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
291      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
292      newframe->Xims = re;\    newframe->Xims = re;\
293      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
294      newframe->Xflags = rg;\    newframe->Xflags = rg;\
295      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
296      frame = newframe;\    newframe->Xprevframe = frame;\
297      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
298      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
299      }\    goto HEAP_RECURSE;\
300    else\    L_##rw:\
301      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
302    }    }
303    
304  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 235  match(), which never changes. */ Line 308  match(), which never changes. */
308    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
309    if (frame != NULL)\    if (frame != NULL)\
310      {\      {\
311      frame->Xresult = ra;\      rrc = ra;\
312      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
313      }\      }\
314    return ra;\    return ra;\
315    }    }
# Line 250  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327      USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
331    int Xflags;    int Xflags;
332      unsigned int Xrdepth;
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
348    BOOL Xcur_is_word;    BOOL Xcur_is_word;
349    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
350    BOOL Xprev_is_word;    BOOL Xprev_is_word;
351    
352    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
353    
354  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
355    int Xprop_type;    int Xprop_type;
356      int Xprop_value;
357    int Xprop_fail_result;    int Xprop_fail_result;
358    int Xprop_category;    int Xprop_category;
359    int Xprop_chartype;    int Xprop_chartype;
360    int Xprop_othercase;    int Xprop_script;
361    int Xprop_test_against;    int Xoclength;
362    int *Xprop_test_variable;    uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
369    int Xlength;    int Xlength;
370    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 378  typedef struct heapframe {
378    
379    eptrblock Xnewptrb;    eptrblock Xnewptrb;
380    
381    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
382    
383    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
384    
385  } heapframe;  } heapframe;
386    
# Line 320  typedef struct heapframe { Line 396  typedef struct heapframe {
396  *         Match from current position            *  *         Match from current position            *
397  *************************************************/  *************************************************/
398    
399  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
400  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
401  same response.  same response. */
402    
403    /* These macros pack up tests that are used for partial matching, and which
404    appears several times in the code. We set the "hit end" flag if the pointer is
405    at the end of the subject and also past the start of the subject (i.e.
406    something has been matched). For hard partial matching, we then return
407    immediately. The second one is used when we already know we are past the end of
408    the subject. */
409    
410    #define CHECK_PARTIAL()\
411      if (md->partial && eptr >= md->end_subject && eptr > mstart)\
412        {\
413        md->hitend = TRUE;\
414        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415        }
416    
417    #define SCHECK_PARTIAL()\
418      if (md->partial && eptr > mstart)\
419        {\
420        md->hitend = TRUE;\
421        md->hitend = TRUE;\
422        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
423        }
424    
425  Performance note: It might be tempting to extract commonly used fields from the  
426  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
427    the md structure (e.g. utf8, end_subject) into individual variables to improve
428  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
429  made performance worse.  made performance worse.
430    
431  Arguments:  Arguments:
432     eptr        pointer in subject     eptr        pointer to current character in subject
433     ecode       position in code     ecode       pointer to current position in compiled code
434       mstart      pointer to the current match start position (can be modified
435                     by encountering \K)
436     offset_top  current top pointer     offset_top  current top pointer
437     md          pointer to "static" info for the match     md          pointer to "static" info for the match
438     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 440  Arguments:
440                   brackets - for testing for empty matches                   brackets - for testing for empty matches
441     flags       can contain     flags       can contain
442                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
443                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
444                       group that can match an empty string
445       rdepth      the recursion depth
446    
447  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
448                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
449                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
450                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
451  */  */
452    
453  static int  static int
454  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
455    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
456    int flags)    int flags, unsigned int rdepth)
457  {  {
458  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
459  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
460  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
461    
462    register int  rrc;         /* Returns from recursive calls */
463    register int  i;           /* Used for loops not involving calls to RMATCH() */
464    register unsigned int c;   /* Character values not kept over RMATCH() calls */
465    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
466    
467  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
468  register int  i;      /* Used for loops not involving calls to RMATCH() */  int condcode;
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
469    
470  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
471  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 377  frame->Xprevframe = NULL;            /* Line 480  frame->Xprevframe = NULL;            /*
480    
481  frame->Xeptr = eptr;  frame->Xeptr = eptr;
482  frame->Xecode = ecode;  frame->Xecode = ecode;
483    frame->Xmstart = mstart;
484  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
485  frame->Xims = ims;  frame->Xims = ims;
486  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
487  frame->Xflags = flags;  frame->Xflags = flags;
488    frame->Xrdepth = rdepth;
489    
490  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
491    
# Line 390  HEAP_RECURSE: Line 495  HEAP_RECURSE:
495    
496  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
497  #define ecode              frame->Xecode  #define ecode              frame->Xecode
498    #define mstart             frame->Xmstart
499  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
500  #define ims                frame->Xims  #define ims                frame->Xims
501  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
502  #define flags              frame->Xflags  #define flags              frame->Xflags
503    #define rdepth             frame->Xrdepth
504    
505  /* Ditto for the local variables */  /* Ditto for the local variables */
506    
# Line 401  HEAP_RECURSE: Line 508  HEAP_RECURSE:
508  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
509  #endif  #endif
510  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
511    #define codelink           frame->Xcodelink
512  #define data               frame->Xdata  #define data               frame->Xdata
513  #define next               frame->Xnext  #define next               frame->Xnext
514  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 411  HEAP_RECURSE: Line 519  HEAP_RECURSE:
519    
520  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
521  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
522  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
523    
524  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
525    
526  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
527  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
528    #define prop_value         frame->Xprop_value
529  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
530  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
531  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
532  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
533  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
534  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
535  #endif  #endif
536    
537  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 555  HEAP_RECURSE:
555  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
556  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
557    
558  #else  #else         /* NO_RECURSE not defined */
559  #define fi i  #define fi i
560  #define fc c  #define fc c
561    
562    
563  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
564  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
565  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
566  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
567  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
568  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
569  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
570  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
571  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
572                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
573  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
574                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
575  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
576  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
577  BOOL prev_is_word;  BOOL prev_is_word;
578    
579  unsigned long int original_ims;  unsigned long int original_ims;
580    
581  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
582  int prop_type;  int prop_type;
583    int prop_value;
584  int prop_fail_result;  int prop_fail_result;
585  int prop_category;  int prop_category;
586  int prop_chartype;  int prop_chartype;
587  int prop_othercase;  int prop_script;
588  int prop_test_against;  int oclength;
589  int *prop_test_variable;  uschar occhars[8];
590  #endif  #endif
591    
592    int codelink;
593  int ctype;  int ctype;
594  int length;  int length;
595  int max;  int max;
# Line 493  int save_offset1, save_offset2, save_off Line 602  int save_offset1, save_offset2, save_off
602  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
603    
604  eptrblock newptrb;  eptrblock newptrb;
605  #endif  #endif     /* NO_RECURSE */
606    
607  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
608  variables. */  variables. */
609    
610  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
611    prop_value = 0;
612  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
613  #endif  #endif
614    
615  /* OK, now we can get on with the real code of the function. Recursion is  
616  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
617  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
618  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
619  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
620  performance when true recursion is being used. */  
621    TAIL_RECURSE:
622    
623    /* OK, now we can get on with the real code of the function. Recursive calls
624    are specified by the macro RMATCH and RRETURN is used to return. When
625    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
626    and a "return", respectively (possibly with some debugging if DEBUG is
627    defined). However, RMATCH isn't like a function call because it's quite a
628    complicated macro. It has to be used in one particular way. This shouldn't,
629    however, impact performance when true recursion is being used. */
630    
631    #ifdef SUPPORT_UTF8
632    utf8 = md->utf8;       /* Local copy of the flag */
633    #else
634    utf8 = FALSE;
635    #endif
636    
637    /* First check that we haven't called match() too many times, or that we
638    haven't exceeded the recursive call limit. */
639    
640  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
641    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
642    
643  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
644    
645  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
646  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
647  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
648  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
649    When match() is called in other circumstances, don't add to the chain. The
650    match_cbegroup flag must NOT be used with tail recursion, because the memory
651    block that is used is on the stack, so a new one may be required for each
652    match(). */
653    
654  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
655    {    {
   newptrb.epb_prev = eptrb;  
656    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
657      newptrb.epb_prev = eptrb;
658    eptrb = &newptrb;    eptrb = &newptrb;
659    }    }
660    
661  /* Now start processing the operations. */  /* Now start processing the opcodes. */
662    
663  for (;;)  for (;;)
664    {    {
665      minimize = possessive = FALSE;
666    op = *ecode;    op = *ecode;
   minimize = FALSE;  
667    
668    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
669    matching at least one subject character. */    matching at least one subject character. This code is now wrapped in a macro
670      because it appears several times below. */
671    
672    if (md->partial &&    CHECK_PARTIAL();
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
673    
674    if (op > OP_BRA)    switch(op)
675      {      {
676      number = op - OP_BRA;      case OP_FAIL:
677        RRETURN(MATCH_NOMATCH);
     /* For extended extraction brackets (large number), we have to fish out the  
     number from a dummy opcode at the start. */  
678    
679      if (number > EXTRACT_BASIC_MAX)      case OP_PRUNE:
680        number = GET2(ecode, 2+LINK_SIZE);      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
681          ims, eptrb, flags, RM51);
682        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
683        RRETURN(MATCH_PRUNE);
684    
685        case OP_COMMIT:
686        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
687          ims, eptrb, flags, RM52);
688        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
689        RRETURN(MATCH_COMMIT);
690    
691        case OP_SKIP:
692        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
693          ims, eptrb, flags, RM53);
694        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
695        md->start_match_ptr = eptr;   /* Pass back current position */
696        RRETURN(MATCH_SKIP);
697    
698        case OP_THEN:
699        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
700          ims, eptrb, flags, RM54);
701        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
702        RRETURN(MATCH_THEN);
703    
704        /* Handle a capturing bracket. If there is space in the offset vector, save
705        the current subject position in the working slot at the top of the vector.
706        We mustn't change the current values of the data slot, because they may be
707        set from a previous iteration of this group, and be referred to by a
708        reference inside the group.
709    
710        If the bracket fails to match, we need to restore this value and also the
711        values of the final offsets, in case they were set by a previous iteration
712        of the same bracket.
713    
714        If there isn't enough space in the offset vector, treat this as if it were
715        a non-capturing bracket. Don't worry about setting the flag for the error
716        case here; that is handled in the code for KET. */
717    
718        case OP_CBRA:
719        case OP_SCBRA:
720        number = GET2(ecode, 1+LINK_SIZE);
721      offset = number << 1;      offset = number << 1;
722    
723  #ifdef DEBUG  #ifdef DEBUG
724      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
725        printf("subject=");
726      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
727      printf("\n");      printf("\n");
728  #endif  #endif
# Line 584  for (;;) Line 737  for (;;)
737        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
738        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
739    
740          flags = (op == OP_SCBRA)? match_cbegroup : 0;
741        do        do
742          {          {
743          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744            match_isgroup);            ims, eptrb, flags, RM1);
745          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
746          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
747          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
748          }          }
# Line 603  for (;;) Line 757  for (;;)
757        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
758        }        }
759    
760      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
761        as a non-capturing bracket. */
762    
763      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
764      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
765    
766    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
767    
768    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
769      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
770      case OP_BRA:     /* Non-capturing bracket: optimized */  
771      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
772      do      final alternative within the brackets, we would return the result of a
773        recursive call to match() whatever happened. We can reduce stack usage by
774        turning this into a tail recursion, except in the case when match_cbegroup
775        is set.*/
776    
777        case OP_BRA:
778        case OP_SBRA:
779        DPRINTF(("start non-capturing bracket\n"));
780        flags = (op >= OP_SBRA)? match_cbegroup : 0;
781        for (;;)
782        {        {
783        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
784          match_isgroup);          {
785        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
786              {
787              ecode += _pcre_OP_lengths[*ecode];
788              DPRINTF(("bracket 0 tail recursion\n"));
789              goto TAIL_RECURSE;
790              }
791    
792            /* Possibly empty group; can't use tail recursion. */
793    
794            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
795              eptrb, flags, RM48);
796            RRETURN(rrc);
797            }
798    
799          /* For non-final alternatives, continue the loop for a NOMATCH result;
800          otherwise return. */
801    
802          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
803            eptrb, flags, RM2);
804          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
805        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
806        }        }
807      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
808    
809      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
810      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
811      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
812      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
813        obeyed, we can use tail recursion to avoid using another stack frame. */
814    
815      case OP_COND:      case OP_COND:
816      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
817        codelink= GET(ecode, 1);
818    
819        /* Because of the way auto-callout works during compile, a callout item is
820        inserted between OP_COND and an assertion condition. */
821    
822        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
823          {
824          if (pcre_callout != NULL)
825            {
826            pcre_callout_block cb;
827            cb.version          = 1;   /* Version 1 of the callout block */
828            cb.callout_number   = ecode[LINK_SIZE+2];
829            cb.offset_vector    = md->offset_vector;
830            cb.subject          = (PCRE_SPTR)md->start_subject;
831            cb.subject_length   = md->end_subject - md->start_subject;
832            cb.start_match      = mstart - md->start_subject;
833            cb.current_position = eptr - md->start_subject;
834            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
835            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
836            cb.capture_top      = offset_top/2;
837            cb.capture_last     = md->capture_last;
838            cb.callout_data     = md->callout_data;
839            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
840            if (rrc < 0) RRETURN(rrc);
841            }
842          ecode += _pcre_OP_lengths[OP_CALLOUT];
843          }
844    
845        condcode = ecode[LINK_SIZE+1];
846    
847        /* Now see what the actual condition is */
848    
849        if (condcode == OP_RREF)         /* Recursion test */
850          {
851          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
852          condition = md->recursive != NULL &&
853            (offset == RREF_ANY || offset == md->recursive->group_num);
854          ecode += condition? 3 : GET(ecode, 1);
855          }
856    
857        else if (condcode == OP_CREF)    /* Group used test */
858        {        {
859        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
860        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
861          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
862          (offset < offset_top && md->offset_vector[offset] >= 0);        }
863        RMATCH(rrc, eptr, ecode + (condition?  
864          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (condcode == OP_DEF)     /* DEFINE - always false */
865          offset_top, md, ims, eptrb, match_isgroup);        {
866        RRETURN(rrc);        condition = FALSE;
867          ecode += GET(ecode, 1);
868        }        }
869    
870      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
871      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
872        assertion. */
873    
874      else      else
875        {        {
876        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
877            match_condassert | match_isgroup);            match_condassert, RM3);
878        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
879          {          {
880          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
881            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
882          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
883          }          }
884        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
885          {          {
886          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
887          }          }
888        else ecode += GET(ecode, 1);        else
889        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
890          match_isgroup);          condition = FALSE;
891        RRETURN(rrc);          ecode += codelink;
892            }
893        }        }
     /* Control never reaches here */  
894    
895      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
896      encountered. */      we can use tail recursion to avoid using another stack frame, except when
897        match_cbegroup is required for an unlimited repeat of a possibly empty
898        group. If the second alternative doesn't exist, we can just plough on. */
899    
900      case OP_CREF:      if (condition || *ecode == OP_ALT)
901      case OP_BRANUMBER:        {
902      ecode += 3;        ecode += 1 + LINK_SIZE;
903          if (op == OP_SCOND)        /* Possibly empty group */
904            {
905            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
906            RRETURN(rrc);
907            }
908          else                       /* Group must match something */
909            {
910            flags = 0;
911            goto TAIL_RECURSE;
912            }
913          }
914        else                         /* Condition false & no alternative */
915          {
916          ecode += 1 + LINK_SIZE;
917          }
918      break;      break;
919    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
920    
921        /* End of the pattern, either real or forced. If we are in a top-level
922        recursion, we should restore the offsets appropriately and continue from
923        after the call. */
924    
925        case OP_ACCEPT:
926      case OP_END:      case OP_END:
927      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
928        {        {
929        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
930        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
931        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
932        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
933          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
934        md->start_match = rec->save_start;        mstart = rec->save_start;
935        ims = original_ims;        ims = original_ims;
936        ecode = rec->after_call;        ecode = rec->after_call;
937        break;        break;
# Line 694  for (;;) Line 940  for (;;)
940      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
941      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
942    
943      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
944      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
945      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
946        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
947      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
948    
949      /* Change option settings */      /* Change option settings */
# Line 717  for (;;) Line 964  for (;;)
964      case OP_ASSERTBACK:      case OP_ASSERTBACK:
965      do      do
966        {        {
967        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
968          match_isgroup);          RM4);
969        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
970        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
971        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
972        }        }
973      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 744  for (;;) Line 991  for (;;)
991      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
992      do      do
993        {        {
994        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
995          match_isgroup);          RM5);
996        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
997        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
998        ecode += GET(ecode,1);        ecode += GET(ecode,1);
999        }        }
1000      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 766  for (;;) Line 1013  for (;;)
1013  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1014      if (utf8)      if (utf8)
1015        {        {
1016        c = GET(ecode,1);        i = GET(ecode, 1);
1017        for (i = 0; i < c; i++)        while (i-- > 0)
1018          {          {
1019          eptr--;          eptr--;
1020          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1021          BACKCHAR(eptr)          BACKCHAR(eptr);
1022          }          }
1023        }        }
1024      else      else
# Line 780  for (;;) Line 1027  for (;;)
1027      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1028    
1029        {        {
1030        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1031        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1032        }        }
1033    
# Line 800  for (;;) Line 1047  for (;;)
1047        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
1048        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1049        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1050        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1051        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1052        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1053        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1054        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1055        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 837  for (;;) Line 1084  for (;;)
1084      case OP_RECURSE:      case OP_RECURSE:
1085        {        {
1086        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1087        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1088            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1089    
1090        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1091    
# Line 869  for (;;) Line 1111  for (;;)
1111    
1112        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1113              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1114        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
1115        md->start_match = eptr;        mstart = eptr;
1116    
1117        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1118        restore the offset and recursion data. */        restore the offset and recursion data. */
1119    
1120        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1121          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1122        do        do
1123          {          {
1124          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1125              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1126          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1127            {            {
1128              DPRINTF(("Recursion matched\n"));
1129            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1130            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1131              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1132            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1133            }            }
1134          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1135              {
1136              DPRINTF(("Recursion gave error %d\n", rrc));
1137              if (new_recursive.offset_save != stacksave)
1138                (pcre_free)(new_recursive.offset_save);
1139              RRETURN(rrc);
1140              }
1141    
1142          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1143          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 1162  for (;;)
1162      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1163    
1164      case OP_ONCE:      case OP_ONCE:
1165        {      prev = ecode;
1166        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1167    
1168        do      do
1169          {        {
1170          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1171            eptrb, match_isgroup);        if (rrc == MATCH_MATCH) break;
1172          if (rrc == MATCH_MATCH) break;        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1173          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += GET(ecode,1);
1174          ecode += GET(ecode,1);        }
1175          }      while (*ecode == OP_ALT);
       while (*ecode == OP_ALT);  
1176    
1177        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1178    
1179        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1180    
1181        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1182        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1183    
1184        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1185    
1186        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1187        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1188    
1189        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1190        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1191        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1192        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1193        course of events. */      course of events. */
1194    
1195        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1196          {        {
1197          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1198          break;        break;
1199          }        }
1200    
1201        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1202        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1203        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1204        opcode. */      any options that changed within the bracket before re-running it, so
1205        check the next opcode. */
1206    
1207        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1208          {        {
1209          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1210          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1211          }        }
1212    
1213        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1214          {        {
1215          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1216          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1217          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1218          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1219          }        goto TAIL_RECURSE;
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1220        }        }
1221      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1222          {
1223          RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1224          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1225          ecode += 1 + LINK_SIZE;
1226          flags = 0;
1227          goto TAIL_RECURSE;
1228          }
1229        /* Control never gets here */
1230    
1231      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1232      bracketed group and go to there. */      bracketed group and go to there. */
# Line 985  for (;;) Line 1235  for (;;)
1235      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1236      break;      break;
1237    
1238      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1239      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1240      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1241      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1242      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1243    
1244      case OP_BRAZERO:      case OP_BRAZERO:
1245        {        {
1246        next = ecode+1;        next = ecode+1;
1247        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1248        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1249        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1250        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1251        }        }
1252      break;      break;
1253    
1254      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1255        {        {
1256        next = ecode+1;        next = ecode+1;
1257        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1258        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1259        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1260        ecode++;        ecode++;
1261        }        }
1262      break;      break;
1263    
1264      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1265      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1266      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1267      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1268          ecode = next + 1 + LINK_SIZE;
1269          }
1270        break;
1271    
1272        /* End of a group, repeated or non-repeating. */
1273    
1274      case OP_KET:      case OP_KET:
1275      case OP_KETRMIN:      case OP_KETRMIN:
1276      case OP_KETRMAX:      case OP_KETRMAX:
1277        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1278    
1279        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1280        infinite repeats of empty string matches, retrieve the subject start from
1281        the chain. Otherwise, set it NULL. */
1282    
1283        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1284          {
1285        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1286            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1287            *prev == OP_ONCE)        }
1288          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1289    
1290        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1291        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1292        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1293    
1294        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1295          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1296          number = *prev - OP_BRA;          *prev == OP_ONCE)
1297          {
1298          md->end_match_ptr = eptr;      /* For ONCE */
1299          md->end_offset_top = offset_top;
1300          RRETURN(MATCH_MATCH);
1301          }
1302    
1303          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1304          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1305        bumping the high water mark. Note that whole-pattern recursion is coded as
1306        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1307        when the OP_END is reached. Other recursion is handled here. */
1308    
1309          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1310          offset = number << 1;        {
1311          number = GET2(prev, 1+LINK_SIZE);
1312          offset = number << 1;
1313    
1314  #ifdef DEBUG  #ifdef DEBUG
1315          printf("end bracket %d", number);        printf("end bracket %d", number);
1316          printf("\n");        printf("\n");
1317  #endif  #endif
1318    
1319          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1320          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1321          into group 0, so it won't be picked up here. Instead, we catch it when          {
1322          the OP_END is reached. */          md->offset_vector[offset] =
1323              md->offset_vector[md->offset_end - number];
1324          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1325            {          if (offset_top <= offset) offset_top = offset + 2;
1326            md->capture_last = number;          }
1327            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1328              {        /* Handle a recursively called group. Restore the offsets
1329              md->offset_vector[offset] =        appropriately and continue from after the call. */
1330                md->offset_vector[md->offset_end - number];  
1331              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1332              if (offset_top <= offset) offset_top = offset + 2;          {
1333              }          recursion_info *rec = md->recursive;
1334            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1335            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1336            appropriately and continue from after the call. */          mstart = rec->save_start;
1337            memcpy(md->offset_vector, rec->offset_save,
1338            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1339              {          ecode = rec->after_call;
1340              recursion_info *rec = md->recursive;          ims = original_ims;
1341              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1342          }          }
1343          }
1344    
1345        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1346        the group. */      flags, in case they got changed during the group. */
1347    
1348        ims = original_ims;      ims = original_ims;
1349        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1350    
1351        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1352        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1353        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1354        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1355        course of events. */      course of events. */
1356    
1357        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1358          {        {
1359          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1360          break;        break;
1361          }        }
1362    
1363        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1364        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1365        tail recursion to avoid using another stack frame, unless we have an
1366        unlimited repeat of a group that can match an empty string. */
1367    
1368        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1369          {  
1370          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);      if (*ecode == OP_KETRMIN)
1371          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1372          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1373          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1374          }        if (flags != 0)    /* Could match an empty string */
       else  /* OP_KETRMAX */  
1375          {          {
1376          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1377          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RRETURN(rrc);
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1378          }          }
1379          ecode = prev;
1380          goto TAIL_RECURSE;
1381        }        }
1382        else  /* OP_KETRMAX */
1383      RRETURN(MATCH_NOMATCH);        {
1384          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1385          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1386          ecode += 1 + LINK_SIZE;
1387          flags = 0;
1388          goto TAIL_RECURSE;
1389          }
1390        /* Control never gets here */
1391    
1392      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1393    
# Line 1135  for (;;) Line 1395  for (;;)
1395      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1396      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1397        {        {
1398        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1399              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1400          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1401        ecode++;        ecode++;
1402        break;        break;
# Line 1156  for (;;) Line 1417  for (;;)
1417      ecode++;      ecode++;
1418      break;      break;
1419    
1420        /* Reset the start of match point */
1421    
1422        case OP_SET_SOM:
1423        mstart = eptr;
1424        ecode++;
1425        break;
1426    
1427      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1428      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1429    
# Line 1163  for (;;) Line 1431  for (;;)
1431      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1432        {        {
1433        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1434          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1435        else        else
1436          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1437        ecode++;        ecode++;
# Line 1174  for (;;) Line 1442  for (;;)
1442        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1443        if (!md->endonly)        if (!md->endonly)
1444          {          {
1445          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1446             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1447            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1448          ecode++;          ecode++;
1449          break;          break;
1450          }          }
1451        }        }
1452      /* ... else fall through */      /* ... else fall through for endonly */
1453    
1454      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1455    
# Line 1193  for (;;) Line 1461  for (;;)
1461      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1462    
1463      case OP_EODN:      case OP_EODN:
1464      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1465         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1466          RRETURN(MATCH_NOMATCH);
1467      ecode++;      ecode++;
1468      break;      break;
1469    
# Line 1213  for (;;) Line 1482  for (;;)
1482          {          {
1483          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1484            {            {
1485            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1486            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1487            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1488            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1247  for (;;) Line 1516  for (;;)
1516      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1517    
1518      case OP_ANY:      case OP_ANY:
1519      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1520        RRETURN(MATCH_NOMATCH);      /* Fall through */
1521    
1522        case OP_ALLANY:
1523      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1524  #ifdef SUPPORT_UTF8      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
 #endif  
1525      ecode++;      ecode++;
1526      break;      break;
1527    
# Line 1343  for (;;) Line 1611  for (;;)
1611      ecode++;      ecode++;
1612      break;      break;
1613    
1614  #ifdef SUPPORT_UCP      case OP_ANYNL:
     /* Check the next character by Unicode property. We will get here only  
     if the support is in the binary; otherwise a compile-time error occurs. */  
   
     case OP_PROP:  
     case OP_NOTPROP:  
1615      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1616      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1617        switch(c)
1618        {        {
1619        int chartype, rqdtype;        default: RRETURN(MATCH_NOMATCH);
1620        int othercase;        case 0x000d:
1621        int category = ucp_findchar(c, &chartype, &othercase);        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1622          break;
1623    
1624        rqdtype = *(++ecode);        case 0x000a:
1625        ecode++;        break;
1626    
1627        if (rqdtype >= 128)        case 0x000b:
1628          {        case 0x000c:
1629          if ((rqdtype - 128 != category) == (op == OP_PROP))        case 0x0085:
1630            RRETURN(MATCH_NOMATCH);        case 0x2028:
1631          }        case 0x2029:
1632        else        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1633          {        break;
         if ((rqdtype != chartype) == (op == OP_PROP))  
           RRETURN(MATCH_NOMATCH);  
         }  
1634        }        }
1635        ecode++;
1636      break;      break;
1637    
1638      /* Match an extended Unicode sequence. We will get here only if the support      case OP_NOT_HSPACE:
     is in the binary; otherwise a compile-time error occurs. */  
   
     case OP_EXTUNI:  
1639      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1640      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1641        switch(c)
1642        {        {
1643        int chartype;        default: break;
1644        int othercase;        case 0x09:      /* HT */
1645        int category = ucp_findchar(c, &chartype, &othercase);        case 0x20:      /* SPACE */
1646        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        case 0xa0:      /* NBSP */
1647        while (eptr < md->end_subject)        case 0x1680:    /* OGHAM SPACE MARK */
1648          {        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1649          int len = 1;        case 0x2000:    /* EN QUAD */
1650          if (!utf8) c = *eptr; else        case 0x2001:    /* EM QUAD */
1651            {        case 0x2002:    /* EN SPACE */
1652            GETCHARLEN(c, eptr, len);        case 0x2003:    /* EM SPACE */
1653            }        case 0x2004:    /* THREE-PER-EM SPACE */
1654          category = ucp_findchar(c, &chartype, &othercase);        case 0x2005:    /* FOUR-PER-EM SPACE */
1655          if (category != ucp_M) break;        case 0x2006:    /* SIX-PER-EM SPACE */
1656          eptr += len;        case 0x2007:    /* FIGURE SPACE */
1657          }        case 0x2008:    /* PUNCTUATION SPACE */
1658          case 0x2009:    /* THIN SPACE */
1659          case 0x200A:    /* HAIR SPACE */
1660          case 0x202f:    /* NARROW NO-BREAK SPACE */
1661          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1662          case 0x3000:    /* IDEOGRAPHIC SPACE */
1663          RRETURN(MATCH_NOMATCH);
1664        }        }
1665      ecode++;      ecode++;
1666      break;      break;
 #endif  
   
   
     /* Match a back reference, possibly repeatedly. Look past the end of the  
     item to see if there is repeat information following. The code is similar  
     to that for character classes, but repeated for efficiency. Then obey  
     similar code to character type repeats - written out again for speed.  
     However, if the referenced string is the empty string, always treat  
     it as matched, any number of times (otherwise there could be infinite  
     loops). */  
1667    
1668      case OP_REF:      case OP_HSPACE:
1669        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1670        GETCHARINCTEST(c, eptr);
1671        switch(c)
1672          {
1673          default: RRETURN(MATCH_NOMATCH);
1674          case 0x09:      /* HT */
1675          case 0x20:      /* SPACE */
1676          case 0xa0:      /* NBSP */
1677          case 0x1680:    /* OGHAM SPACE MARK */
1678          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1679          case 0x2000:    /* EN QUAD */
1680          case 0x2001:    /* EM QUAD */
1681          case 0x2002:    /* EN SPACE */
1682          case 0x2003:    /* EM SPACE */
1683          case 0x2004:    /* THREE-PER-EM SPACE */
1684          case 0x2005:    /* FOUR-PER-EM SPACE */
1685          case 0x2006:    /* SIX-PER-EM SPACE */
1686          case 0x2007:    /* FIGURE SPACE */
1687          case 0x2008:    /* PUNCTUATION SPACE */
1688          case 0x2009:    /* THIN SPACE */
1689          case 0x200A:    /* HAIR SPACE */
1690          case 0x202f:    /* NARROW NO-BREAK SPACE */
1691          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1692          case 0x3000:    /* IDEOGRAPHIC SPACE */
1693          break;
1694          }
1695        ecode++;
1696        break;
1697    
1698        case OP_NOT_VSPACE:
1699        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1700        GETCHARINCTEST(c, eptr);
1701        switch(c)
1702          {
1703          default: break;
1704          case 0x0a:      /* LF */
1705          case 0x0b:      /* VT */
1706          case 0x0c:      /* FF */
1707          case 0x0d:      /* CR */
1708          case 0x85:      /* NEL */
1709          case 0x2028:    /* LINE SEPARATOR */
1710          case 0x2029:    /* PARAGRAPH SEPARATOR */
1711          RRETURN(MATCH_NOMATCH);
1712          }
1713        ecode++;
1714        break;
1715    
1716        case OP_VSPACE:
1717        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1718        GETCHARINCTEST(c, eptr);
1719        switch(c)
1720          {
1721          default: RRETURN(MATCH_NOMATCH);
1722          case 0x0a:      /* LF */
1723          case 0x0b:      /* VT */
1724          case 0x0c:      /* FF */
1725          case 0x0d:      /* CR */
1726          case 0x85:      /* NEL */
1727          case 0x2028:    /* LINE SEPARATOR */
1728          case 0x2029:    /* PARAGRAPH SEPARATOR */
1729          break;
1730          }
1731        ecode++;
1732        break;
1733    
1734    #ifdef SUPPORT_UCP
1735        /* Check the next character by Unicode property. We will get here only
1736        if the support is in the binary; otherwise a compile-time error occurs. */
1737    
1738        case OP_PROP:
1739        case OP_NOTPROP:
1740        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1741        GETCHARINCTEST(c, eptr);
1742          {
1743          const ucd_record *prop = GET_UCD(c);
1744    
1745          switch(ecode[1])
1746            {
1747            case PT_ANY:
1748            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1749            break;
1750    
1751            case PT_LAMP:
1752            if ((prop->chartype == ucp_Lu ||
1753                 prop->chartype == ucp_Ll ||
1754                 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1755              RRETURN(MATCH_NOMATCH);
1756             break;
1757    
1758            case PT_GC:
1759            if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1760              RRETURN(MATCH_NOMATCH);
1761            break;
1762    
1763            case PT_PC:
1764            if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1765              RRETURN(MATCH_NOMATCH);
1766            break;
1767    
1768            case PT_SC:
1769            if ((ecode[2] != prop->script) == (op == OP_PROP))
1770              RRETURN(MATCH_NOMATCH);
1771            break;
1772    
1773            default:
1774            RRETURN(PCRE_ERROR_INTERNAL);
1775            }
1776    
1777          ecode += 3;
1778          }
1779        break;
1780    
1781        /* Match an extended Unicode sequence. We will get here only if the support
1782        is in the binary; otherwise a compile-time error occurs. */
1783    
1784        case OP_EXTUNI:
1785        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1786        GETCHARINCTEST(c, eptr);
1787          {
1788          int category = UCD_CATEGORY(c);
1789          if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1790          while (eptr < md->end_subject)
1791            {
1792            int len = 1;
1793            if (!utf8) c = *eptr; else
1794              {
1795              GETCHARLEN(c, eptr, len);
1796              }
1797            category = UCD_CATEGORY(c);
1798            if (category != ucp_M) break;
1799            eptr += len;
1800            }
1801          }
1802        ecode++;
1803        break;
1804    #endif
1805    
1806    
1807        /* Match a back reference, possibly repeatedly. Look past the end of the
1808        item to see if there is repeat information following. The code is similar
1809        to that for character classes, but repeated for efficiency. Then obey
1810        similar code to character type repeats - written out again for speed.
1811        However, if the referenced string is the empty string, always treat
1812        it as matched, any number of times (otherwise there could be infinite
1813        loops). */
1814    
1815        case OP_REF:
1816        {        {
1817        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1818        ecode += 3;                                 /* Advance past item */        ecode += 3;
1819    
1820          /* If the reference is unset, there are two possibilities:
1821    
1822          (a) In the default, Perl-compatible state, set the length to be longer
1823          than the amount of subject left; this ensures that every attempt at a
1824          match fails. We can't just fail here, because of the possibility of
1825          quantifiers with zero minima.
1826    
1827        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
1828        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
1829        can't just fail here, because of the possibility of quantifiers with zero  
1830        minima. */        Otherwise, set the length to the length of what was matched by the
1831          referenced subpattern. */
1832        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
1833          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
1834          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1835          else
1836            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1837    
1838        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1839    
# Line 1465  for (;;) Line 1878  for (;;)
1878    
1879        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
1880          {          {
1881          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1882              {
1883              CHECK_PARTIAL();
1884              RRETURN(MATCH_NOMATCH);
1885              }
1886          eptr += length;          eptr += length;
1887          }          }
1888    
# Line 1480  for (;;) Line 1897  for (;;)
1897          {          {
1898          for (fi = min;; fi++)          for (fi = min;; fi++)
1899            {            {
1900            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1901            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1902            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1903                {
1904                CHECK_PARTIAL();
1905              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1906                }
1907            eptr += length;            eptr += length;
1908            }            }
1909          /* Control never gets here */          /* Control never gets here */
# Line 1499  for (;;) Line 1919  for (;;)
1919            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
1920            eptr += length;            eptr += length;
1921            }            }
1922            CHECK_PARTIAL();
1923          while (eptr >= pp)          while (eptr >= pp)
1924            {            {
1925            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1926            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1927            eptr -= length;            eptr -= length;
1928            }            }
# Line 1566  for (;;) Line 1987  for (;;)
1987          {          {
1988          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
1989            {            {
1990            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
1991                {
1992                CHECK_PARTIAL();
1993                RRETURN(MATCH_NOMATCH);
1994                }
1995            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
1996            if (c > 255)            if (c > 255)
1997              {              {
# Line 1584  for (;;) Line 2009  for (;;)
2009          {          {
2010          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2011            {            {
2012            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2013                {
2014                CHECK_PARTIAL();
2015                RRETURN(MATCH_NOMATCH);
2016                }
2017            c = *eptr++;            c = *eptr++;
2018            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2019            }            }
# Line 1606  for (;;) Line 2035  for (;;)
2035            {            {
2036            for (fi = min;; fi++)            for (fi = min;; fi++)
2037              {              {
2038              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2039              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2040              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2041                  {
2042                  CHECK_PARTIAL();
2043                  RRETURN(MATCH_NOMATCH);
2044                  }
2045                if (eptr >= md->end_subject)
2046                  {
2047                  SCHECK_PARTIAL();
2048                  RRETURN(MATCH_NOMATCH);
2049                  }
2050              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2051              if (c > 255)              if (c > 255)
2052                {                {
# Line 1626  for (;;) Line 2064  for (;;)
2064            {            {
2065            for (fi = min;; fi++)            for (fi = min;; fi++)
2066              {              {
2067              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2068              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2069              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2070                  {
2071                  CHECK_PARTIAL();
2072                  RRETURN(MATCH_NOMATCH);
2073                  }
2074                if (eptr >= md->end_subject)
2075                  {
2076                  SCHECK_PARTIAL();
2077                  RRETURN(MATCH_NOMATCH);
2078                  }
2079              c = *eptr++;              c = *eptr++;
2080              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2081              }              }
# Line 1661  for (;;) Line 2108  for (;;)
2108                }                }
2109              eptr += len;              eptr += len;
2110              }              }
2111              CHECK_PARTIAL();
2112            for (;;)            for (;;)
2113              {              {
2114              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2115              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2116              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2117              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1680  for (;;) Line 2128  for (;;)
2128              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2129              eptr++;              eptr++;
2130              }              }
2131              CHECK_PARTIAL();
2132            while (eptr >= pp)            while (eptr >= pp)
2133              {              {
2134              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
2135              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2136                eptr--;
2137              }              }
2138            }            }
2139    
# Line 1695  for (;;) Line 2144  for (;;)
2144    
2145    
2146      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2147      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2148        mode, because Unicode properties are supported in non-UTF-8 mode. */
2149    
2150  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2151      case OP_XCLASS:      case OP_XCLASS:
# Line 1736  for (;;) Line 2186  for (;;)
2186    
2187        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2188          {          {
2189          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2190          GETCHARINC(c, eptr);            {
2191              SCHECK_PARTIAL();
2192              RRETURN(MATCH_NOMATCH);
2193              }
2194            GETCHARINCTEST(c, eptr);
2195          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2196          }          }
2197    
# Line 1753  for (;;) Line 2207  for (;;)
2207          {          {
2208          for (fi = min;; fi++)          for (fi = min;; fi++)
2209            {            {
2210            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2211            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
2213            GETCHARINC(c, eptr);              {
2214                CHECK_PARTIAL();
2215                RRETURN(MATCH_NOMATCH);
2216                }
2217              if (eptr >= md->end_subject)
2218                {
2219                SCHECK_PARTIAL();
2220                RRETURN(MATCH_NOMATCH);
2221                }
2222              GETCHARINCTEST(c, eptr);
2223            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2224            }            }
2225          /* Control never gets here */          /* Control never gets here */
# Line 1771  for (;;) Line 2234  for (;;)
2234            {            {
2235            int len = 1;            int len = 1;
2236            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2237            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2238            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2239            eptr += len;            eptr += len;
2240            }            }
2241            CHECK_PARTIAL();
2242          for(;;)          for(;;)
2243            {            {
2244            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2245            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2246            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2247            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2248            }            }
2249          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2250          }          }
# Line 1836  for (;;) Line 2300  for (;;)
2300    
2301        else        else
2302          {          {
2303          int dc;          unsigned int dc;
2304          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2305          ecode += length;          ecode += length;
2306    
2307          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2308          case of the character, if there is one. The result of ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2309    
2310          if (fc != dc)          if (fc != dc)
2311            {            {
2312  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2313            int chartype;            if (dc != UCD_OTHERCASE(fc))
           int othercase;  
           if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2314  #endif  #endif
2315              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2316            }            }
# Line 1867  for (;;) Line 2327  for (;;)
2327        }        }
2328      break;      break;
2329    
2330      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2331    
2332      case OP_EXACT:      case OP_EXACT:
2333      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2334      ecode += 3;      ecode += 3;
2335      goto REPEATCHAR;      goto REPEATCHAR;
2336    
2337        case OP_POSUPTO:
2338        possessive = TRUE;
2339        /* Fall through */
2340    
2341      case OP_UPTO:      case OP_UPTO:
2342      case OP_MINUPTO:      case OP_MINUPTO:
2343      min = 0;      min = 0;
# Line 1882  for (;;) Line 2346  for (;;)
2346      ecode += 3;      ecode += 3;
2347      goto REPEATCHAR;      goto REPEATCHAR;
2348    
2349        case OP_POSSTAR:
2350        possessive = TRUE;
2351        min = 0;
2352        max = INT_MAX;
2353        ecode++;
2354        goto REPEATCHAR;
2355    
2356        case OP_POSPLUS:
2357        possessive = TRUE;
2358        min = 1;
2359        max = INT_MAX;
2360        ecode++;
2361        goto REPEATCHAR;
2362    
2363        case OP_POSQUERY:
2364        possessive = TRUE;
2365        min = 0;
2366        max = 1;
2367        ecode++;
2368        goto REPEATCHAR;
2369    
2370      case OP_STAR:      case OP_STAR:
2371      case OP_MINSTAR:      case OP_MINSTAR:
2372      case OP_PLUS:      case OP_PLUS:
# Line 1894  for (;;) Line 2379  for (;;)
2379      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2380      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2381    
2382      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2383    
2384      REPEATCHAR:      REPEATCHAR:
2385  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1905  for (;;) Line 2388  for (;;)
2388        length = 1;        length = 1;
2389        charptr = ecode;        charptr = ecode;
2390        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2391        ecode += length;        ecode += length;
2392    
2393        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1913  for (;;) Line 2395  for (;;)
2395    
2396        if (length > 1)        if (length > 1)
2397          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2398  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2399          int othercase;          unsigned int othercase;
         int chartype;  
2400          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2401               ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase > 0)  
2402            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2403            else oclength = 0;
2404  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2405    
2406          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2407            {            {
2408            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2409            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2410            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2411              else if (oclength > 0 &&
2412                       eptr <= md->end_subject - oclength &&
2413                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2414    #endif  /* SUPPORT_UCP */
2415            else            else
2416              {              {
2417              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2418              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2419              }              }
2420            }            }
2421    
# Line 1943  for (;;) Line 2425  for (;;)
2425            {            {
2426            for (fi = min;; fi++)            for (fi = min;; fi++)
2427              {              {
2428              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2429              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2430              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2431              if (memcmp(eptr, charptr, length) == 0) eptr += length;                {
2432              /* Need braces because of following else */                CHECK_PARTIAL();
2433              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                RRETURN(MATCH_NOMATCH);
2434                  }
2435                if (eptr <= md->end_subject - length &&
2436                  memcmp(eptr, charptr, length) == 0) eptr += length;
2437    #ifdef SUPPORT_UCP
2438                else if (oclength > 0 &&
2439                         eptr <= md->end_subject - oclength &&
2440                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2441    #endif  /* SUPPORT_UCP */
2442              else              else
2443                {                {
2444                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2445                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2446                }                }
2447              }              }
2448            /* Control never gets here */            /* Control never gets here */
2449            }            }
2450          else  
2451            else  /* Maximize */
2452            {            {
2453            pp = eptr;            pp = eptr;
2454            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2455              {              {
2456              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2457              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2458              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2459              else              else if (oclength > 0 &&
2460                {                       eptr <= md->end_subject - oclength &&
2461                if (memcmp(eptr, occhars, oclength) != 0) break;                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2462                eptr += oclength;  #endif  /* SUPPORT_UCP */
2463                }              else break;
2464                }
2465    
2466              CHECK_PARTIAL();
2467              if (possessive) continue;
2468    
2469              for(;;)
2470                {
2471                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2472                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2473                if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2474    #ifdef SUPPORT_UCP
2475                eptr--;
2476                BACKCHAR(eptr);
2477    #else   /* without SUPPORT_UCP */
2478                eptr -= length;
2479    #endif  /* SUPPORT_UCP */
2480              }              }
           while (eptr >= pp)  
            {  
            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
            eptr -= length;  
            }  
           RRETURN(MATCH_NOMATCH);  
2481            }            }
2482          /* Control never gets here */          /* Control never gets here */
2483          }          }
# Line 1990  for (;;) Line 2490  for (;;)
2490  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2491    
2492      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2493        {  
2494        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2495    
2496      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2497      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2011  for (;;) Line 2509  for (;;)
2509        {        {
2510        fc = md->lcc[fc];        fc = md->lcc[fc];
2511        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2512            {
2513            if (eptr >= md->end_subject)
2514              {
2515              SCHECK_PARTIAL();
2516              RRETURN(MATCH_NOMATCH);
2517              }
2518          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2519            }
2520        if (min == max) continue;        if (min == max) continue;
2521        if (minimize)        if (minimize)
2522          {          {
2523          for (fi = min;; fi++)          for (fi = min;; fi++)
2524            {            {
2525            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2526            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
2528                fc != md->lcc[*eptr++])              {
2529                CHECK_PARTIAL();
2530              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2531                }
2532              if (eptr >= md->end_subject)
2533                {
2534                SCHECK_PARTIAL();
2535                RRETURN(MATCH_NOMATCH);
2536                }
2537              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2538            }            }
2539          /* Control never gets here */          /* Control never gets here */
2540          }          }
2541        else        else  /* Maximize */
2542          {          {
2543          pp = eptr;          pp = eptr;
2544          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2546  for (;;)
2546            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2547            eptr++;            eptr++;
2548            }            }
2549    
2550            CHECK_PARTIAL();
2551            if (possessive) continue;
2552    
2553          while (eptr >= pp)          while (eptr >= pp)
2554            {            {
2555            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2556            eptr--;            eptr--;
2557            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2558            }            }
# Line 2048  for (;;) Line 2565  for (;;)
2565    
2566      else      else
2567        {        {
2568        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2569            {
2570            if (eptr >= md->end_subject)
2571              {
2572              SCHECK_PARTIAL();
2573              RRETURN(MATCH_NOMATCH);
2574              }
2575            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2576            }
2577        if (min == max) continue;        if (min == max) continue;
2578        if (minimize)        if (minimize)
2579          {          {
2580          for (fi = min;; fi++)          for (fi = min;; fi++)
2581            {            {
2582            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2583            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2584            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max)
2585                {
2586                CHECK_PARTIAL();
2587                RRETURN(MATCH_NOMATCH);
2588                }
2589              if (eptr >= md->end_subject)
2590                {
2591                SCHECK_PARTIAL();
2592              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2593                }
2594              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2595            }            }
2596          /* Control never gets here */          /* Control never gets here */
2597          }          }
2598        else        else  /* Maximize */
2599          {          {
2600          pp = eptr;          pp = eptr;
2601          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2603  for (;;)
2603            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2604            eptr++;            eptr++;
2605            }            }
2606            CHECK_PARTIAL();
2607            if (possessive) continue;
2608          while (eptr >= pp)          while (eptr >= pp)
2609            {            {
2610            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2611            eptr--;            eptr--;
2612            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2613            }            }
# Line 2121  for (;;) Line 2657  for (;;)
2657      ecode += 3;      ecode += 3;
2658      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2659    
2660        case OP_NOTPOSSTAR:
2661        possessive = TRUE;
2662        min = 0;
2663        max = INT_MAX;
2664        ecode++;
2665        goto REPEATNOTCHAR;
2666    
2667        case OP_NOTPOSPLUS:
2668        possessive = TRUE;
2669        min = 1;
2670        max = INT_MAX;
2671        ecode++;
2672        goto REPEATNOTCHAR;
2673    
2674        case OP_NOTPOSQUERY:
2675        possessive = TRUE;
2676        min = 0;
2677        max = 1;
2678        ecode++;
2679        goto REPEATNOTCHAR;
2680    
2681        case OP_NOTPOSUPTO:
2682        possessive = TRUE;
2683        min = 0;
2684        max = GET2(ecode, 1);
2685        ecode += 3;
2686        goto REPEATNOTCHAR;
2687    
2688      case OP_NOTSTAR:      case OP_NOTSTAR:
2689      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2690      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2133  for (;;) Line 2697  for (;;)
2697      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2698      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2699    
2700      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2701    
2702      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2703      fc = *ecode++;      fc = *ecode++;
2704    
2705      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2160  for (;;) Line 2721  for (;;)
2721        /* UTF-8 mode */        /* UTF-8 mode */
2722        if (utf8)        if (utf8)
2723          {          {
2724          register int d;          register unsigned int d;
2725          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2726            {            {
2727              if (eptr >= md->end_subject)
2728                {
2729                SCHECK_PARTIAL();
2730                RRETURN(MATCH_NOMATCH);
2731                }
2732            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2733            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
2734            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2174  for (;;) Line 2740  for (;;)
2740        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2741          {          {
2742          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2743              {
2744              if (eptr >= md->end_subject)
2745                {
2746                SCHECK_PARTIAL();
2747                RRETURN(MATCH_NOMATCH);
2748                }
2749            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2750              }
2751          }          }
2752    
2753        if (min == max) continue;        if (min == max) continue;
# Line 2185  for (;;) Line 2758  for (;;)
2758          /* UTF-8 mode */          /* UTF-8 mode */
2759          if (utf8)          if (utf8)
2760            {            {
2761            register int d;            register unsigned int d;
2762            for (fi = min;; fi++)            for (fi = min;; fi++)
2763              {              {
2764              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2765              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2766                if (fi >= max)
2767                  {
2768                  CHECK_PARTIAL();
2769                  RRETURN(MATCH_NOMATCH);
2770                  }
2771                if (eptr >= md->end_subject)
2772                  {
2773                  SCHECK_PARTIAL();
2774                  RRETURN(MATCH_NOMATCH);
2775                  }
2776              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2777              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2778              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2779              }              }
2780            }            }
2781          else          else
# Line 2202  for (;;) Line 2784  for (;;)
2784            {            {
2785            for (fi = min;; fi++)            for (fi = min;; fi++)
2786              {              {
2787              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2788              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2789              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max)
2790                  {
2791                  CHECK_PARTIAL();
2792                  RRETURN(MATCH_NOMATCH);
2793                  }
2794                if (eptr >= md->end_subject)
2795                  {
2796                  SCHECK_PARTIAL();
2797                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2798                  }
2799                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2800              }              }
2801            }            }
2802          /* Control never gets here */          /* Control never gets here */
# Line 2221  for (;;) Line 2812  for (;;)
2812          /* UTF-8 mode */          /* UTF-8 mode */
2813          if (utf8)          if (utf8)
2814            {            {
2815            register int d;            register unsigned int d;
2816            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2817              {              {
2818              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2822  for (;;)
2822              if (fc == d) break;              if (fc == d) break;
2823              eptr += len;              eptr += len;
2824              }              }
2825            for(;;)          CHECK_PARTIAL();
2826            if (possessive) continue;
2827            for(;;)
2828              {              {
2829              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2830              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2831              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2832              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2248  for (;;) Line 2841  for (;;)
2841              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2842              eptr++;              eptr++;
2843              }              }
2844              CHECK_PARTIAL();
2845              if (possessive) continue;
2846            while (eptr >= pp)            while (eptr >= pp)
2847              {              {
2848              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2849              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2850              eptr--;              eptr--;
2851              }              }
# Line 2269  for (;;) Line 2864  for (;;)
2864        /* UTF-8 mode */        /* UTF-8 mode */
2865        if (utf8)        if (utf8)
2866          {          {
2867          register int d;          register unsigned int d;
2868          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2869            {            {
2870              if (eptr >= md->end_subject)
2871                {
2872                SCHECK_PARTIAL();
2873                RRETURN(MATCH_NOMATCH);
2874                }
2875            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2876            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
2877            }            }
# Line 2281  for (;;) Line 2881  for (;;)
2881        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2882          {          {
2883          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2884              {
2885              if (eptr >= md->end_subject)
2886                {
2887                SCHECK_PARTIAL();
2888                RRETURN(MATCH_NOMATCH);
2889                }
2890            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2891              }
2892          }          }
2893    
2894        if (min == max) continue;        if (min == max) continue;
# Line 2292  for (;;) Line 2899  for (;;)
2899          /* UTF-8 mode */          /* UTF-8 mode */
2900          if (utf8)          if (utf8)
2901            {            {
2902            register int d;            register unsigned int d;
2903            for (fi = min;; fi++)            for (fi = min;; fi++)
2904              {              {
2905              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2906              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2907              GETCHARINC(d, eptr);              if (fi >= max)
2908              if (fi >= max || eptr >= md->end_subject || fc == d)                {
2909                  CHECK_PARTIAL();
2910                  RRETURN(MATCH_NOMATCH);
2911                  }
2912                if (eptr >= md->end_subject)
2913                  {
2914                  SCHECK_PARTIAL();
2915                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2916                  }
2917                GETCHARINC(d, eptr);
2918                if (fc == d) RRETURN(MATCH_NOMATCH);
2919              }              }
2920            }            }
2921          else          else
# Line 2308  for (;;) Line 2924  for (;;)
2924            {            {
2925            for (fi = min;; fi++)            for (fi = min;; fi++)
2926              {              {
2927              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2928              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2929              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max)
2930                  {
2931                  CHECK_PARTIAL();
2932                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2933              }                }
2934            }              if (eptr >= md->end_subject)
2935          /* Control never gets here */                {
2936                  SCHECK_PARTIAL();
2937                  RRETURN(MATCH_NOMATCH);
2938                  }
2939                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2940                }
2941              }
2942            /* Control never gets here */
2943          }          }
2944    
2945        /* Maximize case */        /* Maximize case */
# Line 2327  for (;;) Line 2952  for (;;)
2952          /* UTF-8 mode */          /* UTF-8 mode */
2953          if (utf8)          if (utf8)
2954            {            {
2955            register int d;            register unsigned int d;
2956            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2957              {              {
2958              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2961  for (;;)
2961              if (fc == d) break;              if (fc == d) break;
2962              eptr += len;              eptr += len;
2963              }              }
2964              CHECK_PARTIAL();
2965              if (possessive) continue;
2966            for(;;)            for(;;)
2967              {              {
2968              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2969              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2970              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2971              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2353  for (;;) Line 2980  for (;;)
2980              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2981              eptr++;              eptr++;
2982              }              }
2983              CHECK_PARTIAL();
2984              if (possessive) continue;
2985            while (eptr >= pp)            while (eptr >= pp)
2986              {              {
2987              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2988              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2989              eptr--;              eptr--;
2990              }              }
# Line 2384  for (;;) Line 3013  for (;;)
3013      ecode += 3;      ecode += 3;
3014      goto REPEATTYPE;      goto REPEATTYPE;
3015    
3016        case OP_TYPEPOSSTAR:
3017        possessive = TRUE;
3018        min = 0;
3019        max = INT_MAX;
3020        ecode++;
3021        goto REPEATTYPE;
3022    
3023        case OP_TYPEPOSPLUS:
3024        possessive = TRUE;
3025        min = 1;
3026        max = INT_MAX;
3027        ecode++;
3028        goto REPEATTYPE;
3029    
3030        case OP_TYPEPOSQUERY:
3031        possessive = TRUE;
3032        min = 0;
3033        max = 1;
3034        ecode++;
3035        goto REPEATTYPE;
3036    
3037        case OP_TYPEPOSUPTO:
3038        possessive = TRUE;
3039        min = 0;
3040        max = GET2(ecode, 1);
3041        ecode += 3;
3042        goto REPEATTYPE;
3043    
3044      case OP_TYPESTAR:      case OP_TYPESTAR:
3045      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3046      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 3065  for (;;)
3065        {        {
3066        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
3067        prop_type = *ecode++;        prop_type = *ecode++;
3068        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
3069        }        }
3070      else prop_type = -1;      else prop_type = -1;
3071  #endif  #endif
3072    
3073      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3074      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3075      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3076      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3077      and single-bytes. */      and single-bytes. */
3078    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3079      if (min > 0)      if (min > 0)
3080        {        {
3081  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3082        if (prop_type > 0)        if (prop_type >= 0)
3083          {          {
3084          for (i = 1; i <= min; i++)          switch(prop_type)
3085            {            {
3086            GETCHARINC(c, eptr);            case PT_ANY:
3087            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3088            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
3089              RRETURN(MATCH_NOMATCH);              {
3090                if (eptr >= md->end_subject)
3091                  {
3092                  SCHECK_PARTIAL();
3093                  RRETURN(MATCH_NOMATCH);
3094                  }
3095                GETCHARINCTEST(c, eptr);
3096                }
3097              break;
3098    
3099              case PT_LAMP:
3100              for (i = 1; i <= min; i++)
3101                {
3102                if (eptr >= md->end_subject)
3103                  {
3104                  SCHECK_PARTIAL();
3105                  RRETURN(MATCH_NOMATCH);
3106                  }
3107                GETCHARINCTEST(c, eptr);
3108                prop_chartype = UCD_CHARTYPE(c);
3109                if ((prop_chartype == ucp_Lu ||
3110                     prop_chartype == ucp_Ll ||
3111                     prop_chartype == ucp_Lt) == prop_fail_result)
3112                  RRETURN(MATCH_NOMATCH);
3113                }
3114              break;
3115    
3116              case PT_GC:
3117              for (i = 1; i <= min; i++)
3118                {
3119                if (eptr >= md->end_subject)
3120                  {
3121                  SCHECK_PARTIAL();
3122                  RRETURN(MATCH_NOMATCH);
3123                  }
3124                GETCHARINCTEST(c, eptr);
3125                prop_category = UCD_CATEGORY(c);
3126                if ((prop_category == prop_value) == prop_fail_result)
3127                  RRETURN(MATCH_NOMATCH);
3128                }
3129              break;
3130    
3131              case PT_PC:
3132              for (i = 1; i <= min; i++)
3133                {
3134                if (eptr >= md->end_subject)
3135                  {
3136                  SCHECK_PARTIAL();
3137                  RRETURN(MATCH_NOMATCH);
3138                  }
3139                GETCHARINCTEST(c, eptr);
3140                prop_chartype = UCD_CHARTYPE(c);
3141                if ((prop_chartype == prop_value) == prop_fail_result)
3142                  RRETURN(MATCH_NOMATCH);
3143                }
3144              break;
3145    
3146              case PT_SC:
3147              for (i = 1; i <= min; i++)
3148                {
3149                if (eptr >= md->end_subject)
3150                  {
3151                  SCHECK_PARTIAL();
3152                  RRETURN(MATCH_NOMATCH);
3153                  }
3154                GETCHARINCTEST(c, eptr);
3155                prop_script = UCD_SCRIPT(c);
3156                if ((prop_script == prop_value) == prop_fail_result)
3157                  RRETURN(MATCH_NOMATCH);
3158                }
3159              break;
3160    
3161              default:
3162              RRETURN(PCRE_ERROR_INTERNAL);
3163            }            }
3164          }          }
3165    
# Line 2452  for (;;) Line 3170  for (;;)
3170          {          {
3171          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3172            {            {
3173              if (eptr >= md->end_subject)
3174                {
3175                SCHECK_PARTIAL();
3176                RRETURN(MATCH_NOMATCH);
3177                }
3178            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3179            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
3180            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3181            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3182              {              {
3183              int len = 1;              int len = 1;
3184              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3185                {                else { GETCHARLEN(c, eptr, len); }
3186                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);  
3187              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3188              eptr += len;              eptr += len;
3189              }              }
# Line 2480  for (;;) Line 3201  for (;;)
3201          case OP_ANY:          case OP_ANY:
3202          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3203            {            {
3204            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3205               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))              {
3206                SCHECK_PARTIAL();
3207              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3208                }
3209              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3210              eptr++;
3211              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3212              }
3213            break;
3214    
3215            case OP_ALLANY:
3216            for (i = 1; i <= min; i++)
3217              {
3218              if (eptr >= md->end_subject)
3219                {
3220                SCHECK_PARTIAL();
3221                RRETURN(MATCH_NOMATCH);
3222                }
3223              eptr++;
3224            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3225            }            }
3226          break;          break;
3227    
3228          case OP_ANYBYTE:          case OP_ANYBYTE:
3229            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3230          eptr += min;          eptr += min;
3231          break;          break;
3232    
3233            case OP_ANYNL:
3234            for (i = 1; i <= min; i++)
3235              {
3236              if (eptr >= md->end_subject)
3237                {
3238                SCHECK_PARTIAL();
3239                RRETURN(MATCH_NOMATCH);
3240                }
3241              GETCHARINC(c, eptr);
3242              switch(c)
3243                {
3244                default: RRETURN(MATCH_NOMATCH);
3245                case 0x000d:
3246                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3247                break;
3248    
3249                case 0x000a:
3250                break;
3251    
3252                case 0x000b:
3253                case 0x000c:
3254                case 0x0085:
3255                case 0x2028:
3256                case 0x2029:
3257                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3258                break;
3259                }
3260              }
3261            break;
3262    
3263            case OP_NOT_HSPACE:
3264            for (i = 1; i <= min; i++)
3265              {
3266              if (eptr >= md->end_subject)
3267                {
3268                SCHECK_PARTIAL();
3269                RRETURN(MATCH_NOMATCH);
3270                }
3271              GETCHARINC(c, eptr);
3272              switch(c)
3273                {
3274                default: break;
3275                case 0x09:      /* HT */
3276                case 0x20:      /* SPACE */
3277                case 0xa0:      /* NBSP */
3278                case 0x1680:    /* OGHAM SPACE MARK */
3279                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3280                case 0x2000:    /* EN QUAD */
3281                case 0x2001:    /* EM QUAD */
3282                case 0x2002:    /* EN SPACE */
3283                case 0x2003:    /* EM SPACE */
3284                case 0x2004:    /* THREE-PER-EM SPACE */
3285                case 0x2005:    /* FOUR-PER-EM SPACE */
3286                case 0x2006:    /* SIX-PER-EM SPACE */
3287                case 0x2007:    /* FIGURE SPACE */
3288                case 0x2008:    /* PUNCTUATION SPACE */
3289                case 0x2009:    /* THIN SPACE */
3290                case 0x200A:    /* HAIR SPACE */
3291                case 0x202f:    /* NARROW NO-BREAK SPACE */
3292                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3293                case 0x3000:    /* IDEOGRAPHIC SPACE */
3294                RRETURN(MATCH_NOMATCH);
3295                }
3296              }
3297            break;
3298    
3299            case OP_HSPACE:
3300            for (i = 1; i <= min; i++)
3301              {
3302              if (eptr >= md->end_subject)
3303                {
3304                SCHECK_PARTIAL();
3305                RRETURN(MATCH_NOMATCH);
3306                }
3307              GETCHARINC(c, eptr);
3308              switch(c)
3309                {
3310                default: RRETURN(MATCH_NOMATCH);
3311                case 0x09:      /* HT */
3312                case 0x20:      /* SPACE */
3313                case 0xa0:      /* NBSP */
3314                case 0x1680:    /* OGHAM SPACE MARK */
3315                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3316                case 0x2000:    /* EN QUAD */
3317                case 0x2001:    /* EM QUAD */
3318                case 0x2002:    /* EN SPACE */
3319                case 0x2003:    /* EM SPACE */
3320                case 0x2004:    /* THREE-PER-EM SPACE */
3321                case 0x2005:    /* FOUR-PER-EM SPACE */
3322                case 0x2006:    /* SIX-PER-EM SPACE */
3323                case 0x2007:    /* FIGURE SPACE */
3324                case 0x2008:    /* PUNCTUATION SPACE */
3325                case 0x2009:    /* THIN SPACE */
3326                case 0x200A:    /* HAIR SPACE */
3327                case 0x202f:    /* NARROW NO-BREAK SPACE */
3328                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3329                case 0x3000:    /* IDEOGRAPHIC SPACE */
3330                break;
3331                }
3332              }
3333            break;
3334    
3335            case OP_NOT_VSPACE:
3336            for (i = 1; i <= min; i++)
3337              {
3338              if (eptr >= md->end_subject)
3339                {
3340                SCHECK_PARTIAL();
3341                RRETURN(MATCH_NOMATCH);
3342                }
3343              GETCHARINC(c, eptr);
3344              switch(c)
3345                {
3346                default: break;
3347                case 0x0a:      /* LF */
3348                case 0x0b:      /* VT */
3349                case 0x0c:      /* FF */
3350                case 0x0d:      /* CR */
3351                case 0x85:      /* NEL */
3352                case 0x2028:    /* LINE SEPARATOR */
3353                case 0x2029:    /* PARAGRAPH SEPARATOR */
3354                RRETURN(MATCH_NOMATCH);
3355                }
3356              }
3357            break;
3358    
3359            case OP_VSPACE:
3360            for (i = 1; i <= min; i++)
3361              {
3362              if (eptr >= md->end_subject)
3363                {
3364                SCHECK_PARTIAL();
3365                RRETURN(MATCH_NOMATCH);
3366                }
3367              GETCHARINC(c, eptr);
3368              switch(c)
3369                {
3370                default: RRETURN(MATCH_NOMATCH);
3371                case 0x0a:      /* LF */
3372                case 0x0b:      /* VT */
3373                case 0x0c:      /* FF */
3374                case 0x0d:      /* CR */
3375                case 0x85:      /* NEL */
3376                case 0x2028:    /* LINE SEPARATOR */
3377                case 0x2029:    /* PARAGRAPH SEPARATOR */
3378                break;
3379                }
3380              }
3381            break;
3382    
3383          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3384          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3385            {            {
3386            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3387                {
3388                SCHECK_PARTIAL();
3389                RRETURN(MATCH_NOMATCH);
3390                }
3391            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3392            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3393              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2504  for (;;) Line 3397  for (;;)
3397          case OP_DIGIT:          case OP_DIGIT:
3398          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3399            {            {
3400            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3401               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3402                SCHECK_PARTIAL();
3403                RRETURN(MATCH_NOMATCH);
3404                }
3405              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3406              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3407            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3408            }            }
# Line 2514  for (;;) Line 3411  for (;;)
3411          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3412          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3413            {            {
3414            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3415               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3416                SCHECK_PARTIAL();
3417              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3418            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              }
3419              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3420                RRETURN(MATCH_NOMATCH);
3421              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3422            }            }
3423          break;          break;
3424    
3425          case OP_WHITESPACE:          case OP_WHITESPACE:
3426          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3427            {            {
3428            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3429               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3430                SCHECK_PARTIAL();
3431                RRETURN(MATCH_NOMATCH);
3432                }
3433              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3434              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3435            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3436            }            }
# Line 2535  for (;;) Line 3440  for (;;)
3440          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3441            {            {
3442            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3443               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3444              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3445            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3446            }            }
3447          break;          break;
3448    
3449          case OP_WORDCHAR:          case OP_WORDCHAR:
3450          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3451            {            {
3452            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3453               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3454                SCHECK_PARTIAL();
3455                RRETURN(MATCH_NOMATCH);
3456                }
3457              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3458              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3459            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3460            }            }
# Line 2564  for (;;) Line 3473  for (;;)
3473        switch(ctype)        switch(ctype)
3474          {          {
3475          case OP_ANY:          case OP_ANY:
3476          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3477            {            {
3478            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3479              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
3480                SCHECK_PARTIAL();
3481                RRETURN(MATCH_NOMATCH);
3482                }
3483              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3484              eptr++;
3485            }            }
3486          else eptr += min;          break;
3487    
3488            case OP_ALLANY:
3489            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3490            eptr += min;
3491          break;          break;
3492    
3493          case OP_ANYBYTE:          case OP_ANYBYTE:
3494            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3495          eptr += min;          eptr += min;
3496          break;          break;
3497    
3498            case OP_ANYNL:
3499            for (i = 1; i <= min; i++)
3500              {
3501              if (eptr >= md->end_subject)
3502                {
3503                SCHECK_PARTIAL();
3504                RRETURN(MATCH_NOMATCH);
3505                }
3506              switch(*eptr++)
3507                {
3508                default: RRETURN(MATCH_NOMATCH);
3509                case 0x000d:
3510                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3511                break;
3512                case 0x000a:
3513                break;
3514    
3515                case 0x000b:
3516                case 0x000c:
3517                case 0x0085:
3518                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3519                break;
3520                }
3521              }
3522            break;
3523    
3524            case OP_NOT_HSPACE:
3525            for (i = 1; i <= min; i++)
3526              {
3527              if (eptr >= md->end_subject)
3528                {
3529                SCHECK_PARTIAL();
3530                RRETURN(MATCH_NOMATCH);
3531                }
3532              switch(*eptr++)
3533                {
3534                default: break;
3535                case 0x09:      /* HT */
3536                case 0x20:      /* SPACE */
3537                case 0xa0:      /* NBSP */
3538                RRETURN(MATCH_NOMATCH);
3539                }
3540              }
3541            break;
3542    
3543            case OP_HSPACE:
3544            for (i = 1; i <= min; i++)
3545              {
3546              if (eptr >= md->end_subject)
3547                {
3548                SCHECK_PARTIAL();
3549                RRETURN(MATCH_NOMATCH);
3550                }
3551              switch(*eptr++)
3552                {
3553                default: RRETURN(MATCH_NOMATCH);
3554                case 0x09:      /* HT */
3555                case 0x20:      /* SPACE */
3556                case 0xa0:      /* NBSP */
3557                break;
3558                }
3559              }
3560            break;
3561    
3562            case OP_NOT_VSPACE:
3563            for (i = 1; i <= min; i++)
3564              {
3565              if (eptr >= md->end_subject)
3566                {
3567                SCHECK_PARTIAL();
3568                RRETURN(MATCH_NOMATCH);
3569                }
3570              switch(*eptr++)
3571                {
3572                default: break;
3573                case 0x0a:      /* LF */
3574                case 0x0b:      /* VT */
3575                case 0x0c:      /* FF */
3576                case 0x0d:      /* CR */
3577                case 0x85:      /* NEL */
3578                RRETURN(MATCH_NOMATCH);
3579                }
3580              }
3581            break;
3582    
3583            case OP_VSPACE:
3584            for (i = 1; i <= min; i++)
3585              {
3586              if (eptr >= md->end_subject)
3587                {
3588                SCHECK_PARTIAL();
3589                RRETURN(MATCH_NOMATCH);
3590                }
3591              switch(*eptr++)
3592                {
3593                default: RRETURN(MATCH_NOMATCH);
3594                case 0x0a:      /* LF */
3595                case 0x0b:      /* VT */
3596                case 0x0c:      /* FF */
3597                case 0x0d:      /* CR */
3598                case 0x85:      /* NEL */
3599                break;
3600                }
3601              }
3602            break;
3603    
3604          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3605          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3606              {
3607              if (eptr >= md->end_subject)
3608                {
3609                SCHECK_PARTIAL();
3610                RRETURN(MATCH_NOMATCH);
3611                }
3612            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3613              }
3614          break;          break;
3615    
3616          case OP_DIGIT:          case OP_DIGIT:
3617          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3618              {
3619              if (eptr >= md->end_subject)
3620                {
3621                SCHECK_PARTIAL();
3622                RRETURN(MATCH_NOMATCH);
3623                }
3624            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3625              }
3626          break;          break;
3627    
3628          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3629          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3630              {
3631              if (eptr >= md->end_subject)
3632                {
3633                SCHECK_PARTIAL();
3634                RRETURN(MATCH_NOMATCH);
3635                }
3636            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3637              }
3638          break;          break;
3639    
3640          case OP_WHITESPACE:          case OP_WHITESPACE:
3641          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3642              {
3643              if (eptr >= md->end_subject)
3644                {
3645                SCHECK_PARTIAL();
3646                RRETURN(MATCH_NOMATCH);
3647                }
3648            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3649              }
3650          break;          break;
3651    
3652          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3653          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3654              {
3655              if (eptr >= md->end_subject)
3656                {
3657                SCHECK_PARTIAL();
3658                RRETURN(MATCH_NOMATCH);
3659                }
3660            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3661              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3662              }
3663          break;          break;
3664    
3665          case OP_WORDCHAR:          case OP_WORDCHAR:
3666          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3667              {
3668              if (eptr >= md->end_subject)
3669                {
3670                SCHECK_PARTIAL();
3671                RRETURN(MATCH_NOMATCH);
3672                }
3673            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3674              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3675              }
3676          break;          break;
3677    
3678          default:          default:
# Line 2624  for (;;) Line 3691  for (;;)
3691      if (minimize)      if (minimize)
3692        {        {
3693  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3694        if (prop_type > 0)        if (prop_type >= 0)
3695          {          {
3696          for (fi = min;; fi++)          switch(prop_type)
3697            {            {
3698            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
3699            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
3700            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
3701            GETCHARINC(c, eptr);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3702            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3703            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max)
3704              RRETURN(MATCH_NOMATCH);                {
3705                  CHECK_PARTIAL();
3706                  RRETURN(MATCH_NOMATCH);
3707                  }
3708                if (eptr >= md->end_subject)
3709                  {
3710                  SCHECK_PARTIAL();
3711                  RRETURN(MATCH_NOMATCH);
3712                  }
3713                GETCHARINC(c, eptr);
3714                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3715                }
3716              /* Control never gets here */
3717    
3718              case PT_LAMP:
3719              for (fi = min;; fi++)
3720                {
3721                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3722                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3723                if (fi >= max)
3724                  {
3725                  CHECK_PARTIAL();
3726                  RRETURN(MATCH_NOMATCH);
3727                  }
3728                if (eptr >= md->end_subject)
3729                  {
3730                  SCHECK_PARTIAL();
3731                  RRETURN(MATCH_NOMATCH);
3732                  }
3733                GETCHARINC(c, eptr);
3734                prop_chartype = UCD_CHARTYPE(c);
3735                if ((prop_chartype == ucp_Lu ||
3736                     prop_chartype == ucp_Ll ||
3737                     prop_chartype == ucp_Lt) == prop_fail_result)
3738                  RRETURN(MATCH_NOMATCH);
3739                }
3740              /* Control never gets here */
3741    
3742              case PT_GC:
3743              for (fi = min;; fi++)
3744                {
3745                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3746                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3747                if (fi >= max)
3748                  {
3749                  CHECK_PARTIAL();
3750                  RRETURN(MATCH_NOMATCH);
3751                  }
3752                if (eptr >= md->end_subject)
3753                  {
3754                  SCHECK_PARTIAL();
3755                  RRETURN(MATCH_NOMATCH);
3756                  }
3757                GETCHARINC(c, eptr);
3758                prop_category = UCD_CATEGORY(c);
3759                if ((prop_category == prop_value) == prop_fail_result)
3760                  RRETURN(MATCH_NOMATCH);
3761                }
3762              /* Control never gets here */
3763    
3764              case PT_PC:
3765              for (fi = min;; fi++)
3766                {
3767                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3768                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3769                if (fi >= max)
3770                  {
3771                  CHECK_PARTIAL();
3772                  RRETURN(MATCH_NOMATCH);
3773                  }
3774                if (eptr >= md->end_subject)
3775                  {
3776                  SCHECK_PARTIAL();
3777                  RRETURN(MATCH_NOMATCH);
3778                  }
3779                GETCHARINC(c, eptr);
3780                prop_chartype = UCD_CHARTYPE(c);
3781                if ((prop_chartype == prop_value) == prop_fail_result)
3782                  RRETURN(MATCH_NOMATCH);
3783                }
3784              /* Control never gets here */
3785    
3786              case PT_SC:
3787              for (fi = min;; fi++)
3788                {
3789                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3790                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791                if (fi >= max)
3792                  {
3793                  CHECK_PARTIAL();
3794                  RRETURN(MATCH_NOMATCH);
3795                  }
3796                if (eptr >= md->end_subject)
3797                  {
3798                  SCHECK_PARTIAL();
3799                  RRETURN(MATCH_NOMATCH);
3800                  }
3801                GETCHARINC(c, eptr);
3802                prop_script = UCD_SCRIPT(c);
3803                if ((prop_script == prop_value) == prop_fail_result)
3804                  RRETURN(MATCH_NOMATCH);
3805                }
3806              /* Control never gets here */
3807    
3808              default:
3809              RRETURN(PCRE_ERROR_INTERNAL);
3810            }            }
3811          }          }
3812    
# Line 2645  for (;;) Line 3817  for (;;)
3817          {          {
3818          for (fi = min;; fi++)          for (fi = min;; fi++)
3819            {            {
3820            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3821            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3822            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
3823                {
3824                CHECK_PARTIAL();
3825                RRETURN(MATCH_NOMATCH);
3826                }
3827              if (eptr >= md->end_subject)
3828                {
3829                SCHECK_PARTIAL();
3830                RRETURN(MATCH_NOMATCH);
3831                }
3832            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3833            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
3834            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3835            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3836              {              {
3837              int len = 1;              int len = 1;
3838              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3839                {                else { GETCHARLEN(c, eptr, len); }
3840                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);  
3841              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3842              eptr += len;              eptr += len;
3843              }              }
# Line 2674  for (;;) Line 3853  for (;;)
3853          {          {
3854          for (fi = min;; fi++)          for (fi = min;; fi++)
3855            {            {
3856            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3857            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3858            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
3859                {
3860                CHECK_PARTIAL();
3861                RRETURN(MATCH_NOMATCH);
3862                }
3863              if (eptr >= md->end_subject)
3864                {
3865                SCHECK_PARTIAL();
3866                RRETURN(MATCH_NOMATCH);
3867                }
3868              if (ctype == OP_ANY && IS_NEWLINE(eptr))
3869                RRETURN(MATCH_NOMATCH);
3870            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3871            switch(ctype)            switch(ctype)
3872              {              {
3873              case OP_ANY:              case OP_ANY:        /* This is the non-NL case */
3874              if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);              case OP_ALLANY:
3875                case OP_ANYBYTE:
3876              break;              break;
3877    
3878              case OP_ANYBYTE:              case OP_ANYNL:
3879                switch(c)
3880                  {
3881                  default: RRETURN(MATCH_NOMATCH);
3882                  case 0x000d:
3883                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3884                  break;
3885                  case 0x000a:
3886                  break;
3887    
3888                  case 0x000b:
3889                  case 0x000c:
3890                  case 0x0085:
3891                  case 0x2028:
3892                  case 0x2029:
3893                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3894                  break;
3895                  }
3896                break;
3897    
3898                case OP_NOT_HSPACE:
3899                switch(c)
3900                  {
3901                  default: break;
3902                  case 0x09:      /* HT */
3903                  case 0x20:      /* SPACE */
3904                  case 0xa0:      /* NBSP */
3905                  case 0x1680:    /* OGHAM SPACE MARK */
3906                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3907                  case 0x2000:    /* EN QUAD */
3908                  case 0x2001:    /* EM QUAD */
3909                  case 0x2002:    /* EN SPACE */
3910                  case 0x2003:    /* EM SPACE */
3911                  case 0x2004:    /* THREE-PER-EM SPACE */
3912                  case 0x2005:    /* FOUR-PER-EM SPACE */
3913                  case 0x2006:    /* SIX-PER-EM SPACE */
3914                  case 0x2007:    /* FIGURE SPACE */
3915                  case 0x2008:    /* PUNCTUATION SPACE */
3916                  case 0x2009:    /* THIN SPACE */
3917                  case 0x200A:    /* HAIR SPACE */
3918                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3919                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3920                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3921                  RRETURN(MATCH_NOMATCH);
3922                  }
3923                break;
3924    
3925                case OP_HSPACE:
3926                switch(c)
3927                  {
3928                  default: RRETURN(MATCH_NOMATCH);
3929                  case 0x09:      /* HT */
3930                  case 0x20:      /* SPACE */
3931                  case 0xa0:      /* NBSP */
3932                  case 0x1680:    /* OGHAM SPACE MARK */
3933                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3934                  case 0x2000:    /* EN QUAD */
3935                  case 0x2001:    /* EM QUAD */
3936                  case 0x2002:    /* EN SPACE */
3937                  case 0x2003:    /* EM SPACE */
3938                  case 0x2004:    /* THREE-PER-EM SPACE */
3939                  case 0x2005:    /* FOUR-PER-EM SPACE */
3940                  case 0x2006:    /* SIX-PER-EM SPACE */
3941                  case 0x2007:    /* FIGURE SPACE */
3942                  case 0x2008:    /* PUNCTUATION SPACE */
3943                  case 0x2009:    /* THIN SPACE */
3944                  case 0x200A:    /* HAIR SPACE */
3945                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3946                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3947                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3948                  break;
3949                  }
3950                break;
3951    
3952                case OP_NOT_VSPACE:
3953                switch(c)
3954                  {
3955                  default: break;
3956                  case 0x0a:      /* LF */
3957                  case 0x0b:      /* VT */
3958                  case 0x0c:      /* FF */
3959                  case 0x0d:      /* CR */
3960                  case 0x85:      /* NEL */
3961                  case 0x2028:    /* LINE SEPARATOR */
3962                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3963                  RRETURN(MATCH_NOMATCH);
3964                  }
3965                break;
3966    
3967                case OP_VSPACE:
3968                switch(c)
3969                  {
3970                  default: RRETURN(MATCH_NOMATCH);
3971                  case 0x0a:      /* LF */
3972                  case 0x0b:      /* VT */
3973                  case 0x0c:      /* FF */
3974                  case 0x0d:      /* CR */
3975                  case 0x85:      /* NEL */
3976                  case 0x2028:    /* LINE SEPARATOR */
3977                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3978                  break;
3979                  }
3980              break;              break;
3981    
3982              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
# Line 2729  for (;;) Line 4020  for (;;)
4020          {          {
4021          for (fi = min;; fi++)          for (fi = min;; fi++)
4022            {            {
4023            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4024            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4025            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
4026                {
4027                CHECK_PARTIAL();
4028                RRETURN(MATCH_NOMATCH);
4029                }
4030              if (eptr >= md->end_subject)
4031                {
4032                SCHECK_PARTIAL();
4033                RRETURN(MATCH_NOMATCH);
4034                }
4035              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4036                RRETURN(MATCH_NOMATCH);
4037            c = *eptr++;            c = *eptr++;
4038            switch(ctype)            switch(ctype)
4039              {              {
4040              case OP_ANY:              case OP_ANY:     /* This is the non-NL case */
4041              if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);              case OP_ALLANY:
4042                case OP_ANYBYTE:
4043              break;              break;
4044    
4045              case OP_ANYBYTE:              case OP_ANYNL:
4046                switch(c)
4047                  {
4048                  default: RRETURN(MATCH_NOMATCH);
4049                  case 0x000d:
4050                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4051                  break;
4052    
4053                  case 0x000a:
4054                  break;
4055    
4056                  case 0x000b:
4057                  case 0x000c:
4058                  case 0x0085:
4059                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4060                  break;
4061                  }
4062                break;
4063    
4064                case OP_NOT_HSPACE:
4065                switch(c)
4066                  {
4067                  default: break;
4068                  case 0x09:      /* HT */
4069                  case 0x20:      /* SPACE */
4070                  case 0xa0:      /* NBSP */
4071                  RRETURN(MATCH_NOMATCH);
4072                  }
4073                break;
4074    
4075                case OP_HSPACE:
4076                switch(c)
4077                  {
4078                  default: RRETURN(MATCH_NOMATCH);
4079                  case 0x09:      /* HT */
4080                  case 0x20:      /* SPACE */
4081                  case 0xa0:      /* NBSP */
4082                  break;
4083                  }
4084                break;
4085    
4086                case OP_NOT_VSPACE:
4087                switch(c)
4088                  {
4089                  default: break;
4090                  case 0x0a:      /* LF */
4091                  case 0x0b:      /* VT */
4092                  case 0x0c:      /* FF */
4093                  case 0x0d:      /* CR */
4094                  case 0x85:      /* NEL */
4095                  RRETURN(MATCH_NOMATCH);
4096                  }
4097                break;
4098    
4099                case OP_VSPACE:
4100                switch(c)
4101                  {
4102                  default: RRETURN(MATCH_NOMATCH);
4103                  case 0x0a:      /* LF */
4104                  case 0x0b:      /* VT */
4105                  case 0x0c:      /* FF */
4106                  case 0x0d:      /* CR */
4107                  case 0x85:      /* NEL */
4108                  break;
4109                  }
4110              break;              break;
4111    
4112              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
# Line 2774  for (;;) Line 4141  for (;;)
4141        /* Control never gets here */        /* Control never gets here */
4142        }        }
4143    
4144      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
4145      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
4146      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
4147    
# Line 2782  for (;;) Line 4149  for (;;)
4149