/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 426 by ph10, Wed Aug 26 15:38:32 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 101  Returns:     nothing Line 109  Returns:     nothing
109  static void  static void
110  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111  {  {
112  int c;  unsigned int c;
113  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114  while (length-- > 0)  while (length-- > 0)
115    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 136  Returns:      TRUE if matched
136  */  */
137    
138  static BOOL  static BOOL
139  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
140    unsigned long int ims)    unsigned long int ims)
141  {  {
142  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
143    
144  #ifdef DEBUG  #ifdef DEBUG
145  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 150  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 169  return TRUE; Line 203  return TRUE;
203  ****************************************************************************  ****************************************************************************
204                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
205    
206  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
207  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
208  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
209  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
210  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
211    fine.
212  It turns out that on non-Unix systems there are problems with programs that  
213  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
214  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
215  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
216    been known for decades.) So....
217    
218  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
220  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
221  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
222  always used to.  always used to.
223    
224    The original heap-recursive code used longjmp(). However, it seems that this
225    can be very slow on some operating systems. Following a suggestion from Stan
226    Switzer, the use of longjmp() has been abolished, at the cost of having to
227    provide a unique number for each call to RMATCH. There is no way of generating
228    a sequence of numbers at compile time in C. I have given them names, to make
229    them stand out more clearly.
230    
231    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233    tests. Furthermore, not using longjmp() means that local dynamic variables
234    don't have indeterminate values; this has meant that the frame size can be
235    reduced because the result can be "passed back" by straight setting of the
236    variable instead of being passed in the frame.
237  ****************************************************************************  ****************************************************************************
238  ***************************************************************************/  ***************************************************************************/
239    
240    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241    below must be updated in sync.  */
242    
243  /* These versions of the macros use the stack, as normal */  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248           RM51,  RM52, RM53, RM54 };
249    
250    /* These versions of the macros use the stack, as normal. There are debugging
251    versions and production versions. Note that the "rw" argument of RMATCH isn't
252    actuall used in this definition. */
253    
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
257    #ifdef DEBUG
258    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259      { \
260      printf("match() called in line %d\n", __LINE__); \
261      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262      printf("to line %d\n", __LINE__); \
263      }
264    #define RRETURN(ra) \
265      { \
266      printf("match() returned %d from line %d ", ra, __LINE__); \
267      return ra; \
268      }
269    #else
270    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
273    #endif
274    
275  #else  #else
276    
277    
278  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
279  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280  match(), which never changes. */  argument of match(), which never changes. */
281    
282  #define REGISTER  #define REGISTER
283    
284  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285    {\    {\
286    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
288      {\    newframe->Xeptr = ra;\
289      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
290      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
291      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
292      newframe->Xims = re;\    newframe->Xims = re;\
293      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
294      newframe->Xflags = rg;\    newframe->Xflags = rg;\
295      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
296      frame = newframe;\    newframe->Xprevframe = frame;\
297      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
298      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
299      }\    goto HEAP_RECURSE;\
300    else\    L_##rw:\
301      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
302    }    }
303    
304  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 235  match(), which never changes. */ Line 308  match(), which never changes. */
308    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
309    if (frame != NULL)\    if (frame != NULL)\
310      {\      {\
311      frame->Xresult = ra;\      rrc = ra;\
312      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
313      }\      }\
314    return ra;\    return ra;\
315    }    }
# Line 250  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327      USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
331    int Xflags;    int Xflags;
332      unsigned int Xrdepth;
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
348    BOOL Xcur_is_word;    BOOL Xcur_is_word;
349    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
350    BOOL Xprev_is_word;    BOOL Xprev_is_word;
351    
352    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
353    
354  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
355    int Xprop_type;    int Xprop_type;
356      int Xprop_value;
357    int Xprop_fail_result;    int Xprop_fail_result;
358    int Xprop_category;    int Xprop_category;
359    int Xprop_chartype;    int Xprop_chartype;
360    int Xprop_othercase;    int Xprop_script;
361    int Xprop_test_against;    int Xoclength;
362    int *Xprop_test_variable;    uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
369    int Xlength;    int Xlength;
370    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 378  typedef struct heapframe {
378    
379    eptrblock Xnewptrb;    eptrblock Xnewptrb;
380    
381    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
382    
383    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
384    
385  } heapframe;  } heapframe;
386    
# Line 320  typedef struct heapframe { Line 396  typedef struct heapframe {
396  *         Match from current position            *  *         Match from current position            *
397  *************************************************/  *************************************************/
398    
399  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
400  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
401  same response.  same response. */
402    
403    /* These macros pack up tests that are used for partial matching, and which
404    appears several times in the code. We set the "hit end" flag if the pointer is
405    at the end of the subject and also past the start of the subject (i.e.
406    something has been matched). The second one is used when we already know we are
407    past the end of the subject. */
408    
409    #define CHECK_PARTIAL()\
410      if (md->partial && eptr >= md->end_subject && eptr > mstart)\
411        md->hitend = TRUE
412    
413  Performance note: It might be tempting to extract commonly used fields from the  #define SCHECK_PARTIAL()\
414  md structure (e.g. utf8, end_subject) into individual variables to improve    if (md->partial && eptr > mstart) md->hitend = TRUE
415    
416    /* Performance note: It might be tempting to extract commonly used fields from
417    the md structure (e.g. utf8, end_subject) into individual variables to improve
418  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
419  made performance worse.  made performance worse.
420    
421  Arguments:  Arguments:
422     eptr        pointer in subject     eptr        pointer to current character in subject
423     ecode       position in code     ecode       pointer to current position in compiled code
424       mstart      pointer to the current match start position (can be modified
425                     by encountering \K)
426     offset_top  current top pointer     offset_top  current top pointer
427     md          pointer to "static" info for the match     md          pointer to "static" info for the match
428     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 430  Arguments:
430                   brackets - for testing for empty matches                   brackets - for testing for empty matches
431     flags       can contain     flags       can contain
432                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
433                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
434                       group that can match an empty string
435       rdepth      the recursion depth
436    
437  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
438                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
439                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
440                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
441  */  */
442    
443  static int  static int
444  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
445    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
446    int flags)    int flags, unsigned int rdepth)
447  {  {
448  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
449  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
450  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
451    
452    register int  rrc;         /* Returns from recursive calls */
453    register int  i;           /* Used for loops not involving calls to RMATCH() */
454    register unsigned int c;   /* Character values not kept over RMATCH() calls */
455    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
456    
457  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
458  register int  i;      /* Used for loops not involving calls to RMATCH() */  int condcode;
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
459    
460  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
461  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 377  frame->Xprevframe = NULL;            /* Line 470  frame->Xprevframe = NULL;            /*
470    
471  frame->Xeptr = eptr;  frame->Xeptr = eptr;
472  frame->Xecode = ecode;  frame->Xecode = ecode;
473    frame->Xmstart = mstart;
474  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
475  frame->Xims = ims;  frame->Xims = ims;
476  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
477  frame->Xflags = flags;  frame->Xflags = flags;
478    frame->Xrdepth = rdepth;
479    
480  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
481    
# Line 390  HEAP_RECURSE: Line 485  HEAP_RECURSE:
485    
486  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
487  #define ecode              frame->Xecode  #define ecode              frame->Xecode
488    #define mstart             frame->Xmstart
489  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
490  #define ims                frame->Xims  #define ims                frame->Xims
491  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
492  #define flags              frame->Xflags  #define flags              frame->Xflags
493    #define rdepth             frame->Xrdepth
494    
495  /* Ditto for the local variables */  /* Ditto for the local variables */
496    
# Line 401  HEAP_RECURSE: Line 498  HEAP_RECURSE:
498  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
499  #endif  #endif
500  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
501    #define codelink           frame->Xcodelink
502  #define data               frame->Xdata  #define data               frame->Xdata
503  #define next               frame->Xnext  #define next               frame->Xnext
504  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 411  HEAP_RECURSE: Line 509  HEAP_RECURSE:
509    
510  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
511  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
512  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
513    
514  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
515    
516  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
517  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
518    #define prop_value         frame->Xprop_value
519  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
520  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
521  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
522  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
523  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
524  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
525  #endif  #endif
526    
527  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 545  HEAP_RECURSE:
545  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
546  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
547    
548  #else  #else         /* NO_RECURSE not defined */
549  #define fi i  #define fi i
550  #define fc c  #define fc c
551    
552    
553  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
554  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
555  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
556  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
557  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
558  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
559  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
560  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
561  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
562                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
563  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
564                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
565  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
566  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
567  BOOL prev_is_word;  BOOL prev_is_word;
568    
569  unsigned long int original_ims;  unsigned long int original_ims;
570    
571  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
572  int prop_type;  int prop_type;
573    int prop_value;
574  int prop_fail_result;  int prop_fail_result;
575  int prop_category;  int prop_category;
576  int prop_chartype;  int prop_chartype;
577  int prop_othercase;  int prop_script;
578  int prop_test_against;  int oclength;
579  int *prop_test_variable;  uschar occhars[8];
580  #endif  #endif
581    
582    int codelink;
583  int ctype;  int ctype;
584  int length;  int length;
585  int max;  int max;
# Line 493  int save_offset1, save_offset2, save_off Line 592  int save_offset1, save_offset2, save_off
592  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
593    
594  eptrblock newptrb;  eptrblock newptrb;
595  #endif  #endif     /* NO_RECURSE */
596    
597  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
598  variables. */  variables. */
599    
600  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
601    prop_value = 0;
602  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
603  #endif  #endif
604    
605  /* OK, now we can get on with the real code of the function. Recursion is  
606  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
607  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
608  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
609  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
610  performance when true recursion is being used. */  
611    TAIL_RECURSE:
612    
613    /* OK, now we can get on with the real code of the function. Recursive calls
614    are specified by the macro RMATCH and RRETURN is used to return. When
615    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
616    and a "return", respectively (possibly with some debugging if DEBUG is
617    defined). However, RMATCH isn't like a function call because it's quite a
618    complicated macro. It has to be used in one particular way. This shouldn't,
619    however, impact performance when true recursion is being used. */
620    
621    #ifdef SUPPORT_UTF8
622    utf8 = md->utf8;       /* Local copy of the flag */
623    #else
624    utf8 = FALSE;
625    #endif
626    
627    /* First check that we haven't called match() too many times, or that we
628    haven't exceeded the recursive call limit. */
629    
630  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
631    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
632    
633  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
634    
635  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
636  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
637  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
638  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
639    When match() is called in other circumstances, don't add to the chain. The
640    match_cbegroup flag must NOT be used with tail recursion, because the memory
641    block that is used is on the stack, so a new one may be required for each
642    match(). */
643    
644  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
645    {    {
   newptrb.epb_prev = eptrb;  
646    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
647      newptrb.epb_prev = eptrb;
648    eptrb = &newptrb;    eptrb = &newptrb;
649    }    }
650    
651  /* Now start processing the operations. */  /* Now start processing the opcodes. */
652    
653  for (;;)  for (;;)
654    {    {
655      minimize = possessive = FALSE;
656    op = *ecode;    op = *ecode;
   minimize = FALSE;  
657    
658    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
659    matching at least one subject character. */    matching at least one subject character. This code is now wrapped in a macro
660      because it appears several times below. */
661    
662    if (md->partial &&    CHECK_PARTIAL();
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
663    
664    if (op > OP_BRA)    switch(op)
665      {      {
666      number = op - OP_BRA;      case OP_FAIL:
667        RRETURN(MATCH_NOMATCH);
     /* For extended extraction brackets (large number), we have to fish out the  
     number from a dummy opcode at the start. */  
668    
669      if (number > EXTRACT_BASIC_MAX)      case OP_PRUNE:
670        number = GET2(ecode, 2+LINK_SIZE);      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
671          ims, eptrb, flags, RM51);
672        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
673        RRETURN(MATCH_PRUNE);
674    
675        case OP_COMMIT:
676        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
677          ims, eptrb, flags, RM52);
678        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
679        RRETURN(MATCH_COMMIT);
680    
681        case OP_SKIP:
682        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
683          ims, eptrb, flags, RM53);
684        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
685        md->start_match_ptr = eptr;   /* Pass back current position */
686        RRETURN(MATCH_SKIP);
687    
688        case OP_THEN:
689        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
690          ims, eptrb, flags, RM54);
691        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
692        RRETURN(MATCH_THEN);
693    
694        /* Handle a capturing bracket. If there is space in the offset vector, save
695        the current subject position in the working slot at the top of the vector.
696        We mustn't change the current values of the data slot, because they may be
697        set from a previous iteration of this group, and be referred to by a
698        reference inside the group.
699    
700        If the bracket fails to match, we need to restore this value and also the
701        values of the final offsets, in case they were set by a previous iteration
702        of the same bracket.
703    
704        If there isn't enough space in the offset vector, treat this as if it were
705        a non-capturing bracket. Don't worry about setting the flag for the error
706        case here; that is handled in the code for KET. */
707    
708        case OP_CBRA:
709        case OP_SCBRA:
710        number = GET2(ecode, 1+LINK_SIZE);
711      offset = number << 1;      offset = number << 1;
712    
713  #ifdef DEBUG  #ifdef DEBUG
714      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
715        printf("subject=");
716      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
717      printf("\n");      printf("\n");
718  #endif  #endif
# Line 584  for (;;) Line 727  for (;;)
727        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
728        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
729    
730          flags = (op == OP_SCBRA)? match_cbegroup : 0;
731        do        do
732          {          {
733          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734            match_isgroup);            ims, eptrb, flags, RM1);
735          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
737          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
738          }          }
# Line 603  for (;;) Line 747  for (;;)
747        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
748        }        }
749    
750      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
751        as a non-capturing bracket. */
752    
753      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
754      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
755    
756    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
757    
758    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
759      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
760      case OP_BRA:     /* Non-capturing bracket: optimized */  
761      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
762      do      final alternative within the brackets, we would return the result of a
763        recursive call to match() whatever happened. We can reduce stack usage by
764        turning this into a tail recursion, except in the case when match_cbegroup
765        is set.*/
766    
767        case OP_BRA:
768        case OP_SBRA:
769        DPRINTF(("start non-capturing bracket\n"));
770        flags = (op >= OP_SBRA)? match_cbegroup : 0;
771        for (;;)
772        {        {
773        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
774          match_isgroup);          {
775        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
776              {
777              ecode += _pcre_OP_lengths[*ecode];
778              DPRINTF(("bracket 0 tail recursion\n"));
779              goto TAIL_RECURSE;
780              }
781    
782            /* Possibly empty group; can't use tail recursion. */
783    
784            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
785              eptrb, flags, RM48);
786            RRETURN(rrc);
787            }
788    
789          /* For non-final alternatives, continue the loop for a NOMATCH result;
790          otherwise return. */
791    
792          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
793            eptrb, flags, RM2);
794          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
795        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
796        }        }
797      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
798    
799      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
800      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
801      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
802      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
803        obeyed, we can use tail recursion to avoid using another stack frame. */
804    
805      case OP_COND:      case OP_COND:
806      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
807        codelink= GET(ecode, 1);
808    
809        /* Because of the way auto-callout works during compile, a callout item is
810        inserted between OP_COND and an assertion condition. */
811    
812        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
813          {
814          if (pcre_callout != NULL)
815            {
816            pcre_callout_block cb;
817            cb.version          = 1;   /* Version 1 of the callout block */
818            cb.callout_number   = ecode[LINK_SIZE+2];
819            cb.offset_vector    = md->offset_vector;
820            cb.subject          = (PCRE_SPTR)md->start_subject;
821            cb.subject_length   = md->end_subject - md->start_subject;
822            cb.start_match      = mstart - md->start_subject;
823            cb.current_position = eptr - md->start_subject;
824            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
825            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
826            cb.capture_top      = offset_top/2;
827            cb.capture_last     = md->capture_last;
828            cb.callout_data     = md->callout_data;
829            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
830            if (rrc < 0) RRETURN(rrc);
831            }
832          ecode += _pcre_OP_lengths[OP_CALLOUT];
833          }
834    
835        condcode = ecode[LINK_SIZE+1];
836    
837        /* Now see what the actual condition is */
838    
839        if (condcode == OP_RREF)         /* Recursion test */
840          {
841          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
842          condition = md->recursive != NULL &&
843            (offset == RREF_ANY || offset == md->recursive->group_num);
844          ecode += condition? 3 : GET(ecode, 1);
845          }
846    
847        else if (condcode == OP_CREF)    /* Group used test */
848        {        {
849        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
850        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
851          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
852          (offset < offset_top && md->offset_vector[offset] >= 0);        }
853        RMATCH(rrc, eptr, ecode + (condition?  
854          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (condcode == OP_DEF)     /* DEFINE - always false */
855          offset_top, md, ims, eptrb, match_isgroup);        {
856        RRETURN(rrc);        condition = FALSE;
857          ecode += GET(ecode, 1);
858        }        }
859    
860      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
861      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
862        assertion. */
863    
864      else      else
865        {        {
866        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
867            match_condassert | match_isgroup);            match_condassert, RM3);
868        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
869          {          {
870          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
871            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
872          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
873          }          }
874        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
875          {          {
876          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
877          }          }
878        else ecode += GET(ecode, 1);        else
879        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
880          match_isgroup);          condition = FALSE;
881        RRETURN(rrc);          ecode += codelink;
882            }
883        }        }
     /* Control never reaches here */  
884    
885      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
886      encountered. */      we can use tail recursion to avoid using another stack frame, except when
887        match_cbegroup is required for an unlimited repeat of a possibly empty
888        group. If the second alternative doesn't exist, we can just plough on. */
889    
890      case OP_CREF:      if (condition || *ecode == OP_ALT)
891      case OP_BRANUMBER:        {
892      ecode += 3;        ecode += 1 + LINK_SIZE;
893          if (op == OP_SCOND)        /* Possibly empty group */
894            {
895            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
896            RRETURN(rrc);
897            }
898          else                       /* Group must match something */
899            {
900            flags = 0;
901            goto TAIL_RECURSE;
902            }
903          }
904        else                         /* Condition false & no alternative */
905          {
906          ecode += 1 + LINK_SIZE;
907          }
908      break;      break;
909    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
910    
911        /* End of the pattern, either real or forced. If we are in a top-level
912        recursion, we should restore the offsets appropriately and continue from
913        after the call. */
914    
915        case OP_ACCEPT:
916      case OP_END:      case OP_END:
917      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
918        {        {
919        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
920        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
921        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
922        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
923          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
924        md->start_match = rec->save_start;        mstart = rec->save_start;
925        ims = original_ims;        ims = original_ims;
926        ecode = rec->after_call;        ecode = rec->after_call;
927        break;        break;
# Line 694  for (;;) Line 930  for (;;)
930      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
931      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
932    
933      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
934      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
935      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
936        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
937      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
938    
939      /* Change option settings */      /* Change option settings */
# Line 717  for (;;) Line 954  for (;;)
954      case OP_ASSERTBACK:      case OP_ASSERTBACK:
955      do      do
956        {        {
957        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
958          match_isgroup);          RM4);
959        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
960        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
961        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
962        }        }
963      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 744  for (;;) Line 981  for (;;)
981      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
982      do      do
983        {        {
984        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
985          match_isgroup);          RM5);
986        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
987        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
988        ecode += GET(ecode,1);        ecode += GET(ecode,1);
989        }        }
990      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 766  for (;;) Line 1003  for (;;)
1003  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1004      if (utf8)      if (utf8)
1005        {        {
1006        c = GET(ecode,1);        i = GET(ecode, 1);
1007        for (i = 0; i < c; i++)        while (i-- > 0)
1008          {          {
1009          eptr--;          eptr--;
1010          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1011          BACKCHAR(eptr)          BACKCHAR(eptr);
1012          }          }
1013        }        }
1014      else      else
# Line 780  for (;;) Line 1017  for (;;)
1017      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1018    
1019        {        {
1020        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1021        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1022        }        }
1023    
# Line 800  for (;;) Line 1037  for (;;)
1037        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
1038        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1039        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1040        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1041        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1042        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1043        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1044        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1045        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 837  for (;;) Line 1074  for (;;)
1074      case OP_RECURSE:      case OP_RECURSE:
1075        {        {
1076        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1077        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1078            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1079    
1080        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1081    
# Line 869  for (;;) Line 1101  for (;;)
1101    
1102        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1103              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1104        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
1105        md->start_match = eptr;        mstart = eptr;
1106    
1107        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1108        restore the offset and recursion data. */        restore the offset and recursion data. */
1109    
1110        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1111          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1112        do        do
1113          {          {
1114          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1115              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1116          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1117            {            {
1118              DPRINTF(("Recursion matched\n"));
1119            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1120            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1121              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1122            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1123            }            }
1124          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1125              {
1126              DPRINTF(("Recursion gave error %d\n", rrc));
1127              if (new_recursive.offset_save != stacksave)
1128                (pcre_free)(new_recursive.offset_save);
1129              RRETURN(rrc);
1130              }
1131    
1132          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1133          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 1152  for (;;)
1152      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1153    
1154      case OP_ONCE:      case OP_ONCE:
1155        {      prev = ecode;
1156        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1157    
1158        do      do
1159          {        {
1160          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1161            eptrb, match_isgroup);        if (rrc == MATCH_MATCH) break;
1162          if (rrc == MATCH_MATCH) break;        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1163          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += GET(ecode,1);
1164          ecode += GET(ecode,1);        }
1165          }      while (*ecode == OP_ALT);
       while (*ecode == OP_ALT);  
1166    
1167        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1168    
1169        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1170    
1171        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1172        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1173    
1174        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1175    
1176        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1177        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1178    
1179        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1180        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1181        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1182        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1183        course of events. */      course of events. */
1184    
1185        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1186          {        {
1187          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1188          break;        break;
1189          }        }
1190    
1191        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1192        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1193        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1194        opcode. */      any options that changed within the bracket before re-running it, so
1195        check the next opcode. */
1196    
1197        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1198          {        {
1199          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1200          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1201          }        }
1202    
1203        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1204          {        {
1205          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1206          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1207          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1208          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1209          }        goto TAIL_RECURSE;
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1210        }        }
1211      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1212          {
1213          RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1214          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1215          ecode += 1 + LINK_SIZE;
1216          flags = 0;
1217          goto TAIL_RECURSE;
1218          }
1219        /* Control never gets here */
1220    
1221      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1222      bracketed group and go to there. */      bracketed group and go to there. */
# Line 985  for (;;) Line 1225  for (;;)
1225      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1226      break;      break;
1227    
1228      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1229      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1230      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1231      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1232      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1233    
1234      case OP_BRAZERO:      case OP_BRAZERO:
1235        {        {
1236        next = ecode+1;        next = ecode+1;
1237        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1238        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1239        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1240        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1241        }        }
1242      break;      break;
1243    
1244      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1245        {        {
1246        next = ecode+1;        next = ecode+1;
1247        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1248        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1249        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1250        ecode++;        ecode++;
1251        }        }
1252      break;      break;
1253    
1254      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1255      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1256      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1257      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1258          ecode = next + 1 + LINK_SIZE;
1259          }
1260        break;
1261    
1262        /* End of a group, repeated or non-repeating. */
1263    
1264      case OP_KET:      case OP_KET:
1265      case OP_KETRMIN:      case OP_KETRMIN:
1266      case OP_KETRMAX:      case OP_KETRMAX:
1267        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1268    
1269        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1270        infinite repeats of empty string matches, retrieve the subject start from
1271        the chain. Otherwise, set it NULL. */
1272    
1273        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1274          {
1275        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1276            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1277            *prev == OP_ONCE)        }
1278          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1279    
1280        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1281        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1282        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1283    
1284        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1285          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1286          number = *prev - OP_BRA;          *prev == OP_ONCE)
1287          {
1288          md->end_match_ptr = eptr;      /* For ONCE */
1289          md->end_offset_top = offset_top;
1290          RRETURN(MATCH_MATCH);
1291          }
1292    
1293          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1294          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1295        bumping the high water mark. Note that whole-pattern recursion is coded as
1296        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1297        when the OP_END is reached. Other recursion is handled here. */
1298    
1299          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1300          offset = number << 1;        {
1301          number = GET2(prev, 1+LINK_SIZE);
1302          offset = number << 1;
1303    
1304  #ifdef DEBUG  #ifdef DEBUG
1305          printf("end bracket %d", number);        printf("end bracket %d", number);
1306          printf("\n");        printf("\n");
1307  #endif  #endif
1308    
1309          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1310          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1311          into group 0, so it won't be picked up here. Instead, we catch it when          {
1312          the OP_END is reached. */          md->offset_vector[offset] =
1313              md->offset_vector[md->offset_end - number];
1314          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1315            {          if (offset_top <= offset) offset_top = offset + 2;
1316            md->capture_last = number;          }
1317            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1318              {        /* Handle a recursively called group. Restore the offsets
1319              md->offset_vector[offset] =        appropriately and continue from after the call. */
1320                md->offset_vector[md->offset_end - number];  
1321              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1322              if (offset_top <= offset) offset_top = offset + 2;          {
1323              }          recursion_info *rec = md->recursive;
1324            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1325            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1326            appropriately and continue from after the call. */          mstart = rec->save_start;
1327            memcpy(md->offset_vector, rec->offset_save,
1328            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1329              {          ecode = rec->after_call;
1330              recursion_info *rec = md->recursive;          ims = original_ims;
1331              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1332          }          }
1333          }
1334    
1335        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1336        the group. */      flags, in case they got changed during the group. */
1337    
1338        ims = original_ims;      ims = original_ims;
1339        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1340    
1341        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1342        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1343        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1344        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1345        course of events. */      course of events. */
1346    
1347        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1348          {        {
1349          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1350          break;        break;
1351          }        }
1352    
1353        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1354        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1355        tail recursion to avoid using another stack frame, unless we have an
1356        unlimited repeat of a group that can match an empty string. */
1357    
1358        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1359          {  
1360          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);      if (*ecode == OP_KETRMIN)
1361          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1362          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1363          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1364          }        if (flags != 0)    /* Could match an empty string */
       else  /* OP_KETRMAX */  
1365          {          {
1366          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1367          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RRETURN(rrc);
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1368          }          }
1369          ecode = prev;
1370          goto TAIL_RECURSE;
1371        }        }
1372        else  /* OP_KETRMAX */
1373      RRETURN(MATCH_NOMATCH);        {
1374          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1375          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1376          ecode += 1 + LINK_SIZE;
1377          flags = 0;
1378          goto TAIL_RECURSE;
1379          }
1380        /* Control never gets here */
1381    
1382      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1383    
# Line 1135  for (;;) Line 1385  for (;;)
1385      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1386      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1387        {        {
1388        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1389              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1390          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1391        ecode++;        ecode++;
1392        break;        break;
# Line 1156  for (;;) Line 1407  for (;;)
1407      ecode++;      ecode++;
1408      break;      break;
1409    
1410        /* Reset the start of match point */
1411    
1412        case OP_SET_SOM:
1413        mstart = eptr;
1414        ecode++;
1415        break;
1416    
1417      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1418      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1419    
# Line 1163  for (;;) Line 1421  for (;;)
1421      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1422        {        {
1423        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1424          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1425        else        else
1426          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1427        ecode++;        ecode++;
# Line 1174  for (;;) Line 1432  for (;;)
1432        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1433        if (!md->endonly)        if (!md->endonly)
1434          {          {
1435          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1436             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1437            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1438          ecode++;          ecode++;
1439          break;          break;
1440          }          }
1441        }        }
1442      /* ... else fall through */      /* ... else fall through for endonly */
1443    
1444      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1445    
# Line 1193  for (;;) Line 1451  for (;;)
1451      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1452    
1453      case OP_EODN:      case OP_EODN:
1454      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1455         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1456          RRETURN(MATCH_NOMATCH);
1457      ecode++;      ecode++;
1458      break;      break;
1459    
# Line 1213  for (;;) Line 1472  for (;;)
1472          {          {
1473          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1474            {            {
1475            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1476            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1477            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1478            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1247  for (;;) Line 1506  for (;;)
1506      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1507    
1508      case OP_ANY:      case OP_ANY:
1509      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1510        RRETURN(MATCH_NOMATCH);      /* Fall through */
1511    
1512        case OP_ALLANY:
1513      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1514  #ifdef SUPPORT_UTF8      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
 #endif  
1515      ecode++;      ecode++;
1516      break;      break;
1517    
# Line 1343  for (;;) Line 1601  for (;;)
1601      ecode++;      ecode++;
1602      break;      break;
1603    
1604  #ifdef SUPPORT_UCP      case OP_ANYNL:
     /* Check the next character by Unicode property. We will get here only  
     if the support is in the binary; otherwise a compile-time error occurs. */  
   
     case OP_PROP:  
     case OP_NOTPROP:  
1605      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1606      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1607        switch(c)
1608        {        {
1609        int chartype, rqdtype;        default: RRETURN(MATCH_NOMATCH);
1610        int othercase;        case 0x000d:
1611        int category = _pcre_ucp_findchar(c, &chartype, &othercase);        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1612          break;
1613    
1614        rqdtype = *(++ecode);        case 0x000a:
1615        ecode++;        break;
1616    
1617        if (rqdtype >= 128)        case 0x000b:
1618          {        case 0x000c:
1619          if ((rqdtype - 128 != category) == (op == OP_PROP))        case 0x0085:
1620            RRETURN(MATCH_NOMATCH);        case 0x2028:
1621          }        case 0x2029:
1622        else        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1623          {        break;
         if ((rqdtype != chartype) == (op == OP_PROP))  
           RRETURN(MATCH_NOMATCH);  
         }  
1624        }        }
1625        ecode++;
1626      break;      break;
1627    
1628      /* Match an extended Unicode sequence. We will get here only if the support      case OP_NOT_HSPACE:
     is in the binary; otherwise a compile-time error occurs. */  
   
     case OP_EXTUNI:  
1629      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1630      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1631        switch(c)
1632        {        {
1633        int chartype;        default: break;
1634        int othercase;        case 0x09:      /* HT */
1635        int category = _pcre_ucp_findchar(c, &chartype, &othercase);        case 0x20:      /* SPACE */
1636        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        case 0xa0:      /* NBSP */
1637        while (eptr < md->end_subject)        case 0x1680:    /* OGHAM SPACE MARK */
1638          {        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1639          int len = 1;        case 0x2000:    /* EN QUAD */
1640          if (!utf8) c = *eptr; else        case 0x2001:    /* EM QUAD */
1641            {        case 0x2002:    /* EN SPACE */
1642            GETCHARLEN(c, eptr, len);        case 0x2003:    /* EM SPACE */
1643            }        case 0x2004:    /* THREE-PER-EM SPACE */
1644          category = _pcre_ucp_findchar(c, &chartype, &othercase);        case 0x2005:    /* FOUR-PER-EM SPACE */
1645          if (category != ucp_M) break;        case 0x2006:    /* SIX-PER-EM SPACE */
1646          eptr += len;        case 0x2007:    /* FIGURE SPACE */
1647          }        case 0x2008:    /* PUNCTUATION SPACE */
1648          case 0x2009:    /* THIN SPACE */
1649          case 0x200A:    /* HAIR SPACE */
1650          case 0x202f:    /* NARROW NO-BREAK SPACE */
1651          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1652          case 0x3000:    /* IDEOGRAPHIC SPACE */
1653          RRETURN(MATCH_NOMATCH);
1654        }        }
1655      ecode++;      ecode++;
1656      break;      break;
 #endif  
   
   
     /* Match a back reference, possibly repeatedly. Look past the end of the  
     item to see if there is repeat information following. The code is similar  
     to that for character classes, but repeated for efficiency. Then obey  
     similar code to character type repeats - written out again for speed.  
     However, if the referenced string is the empty string, always treat  
     it as matched, any number of times (otherwise there could be infinite  
     loops). */  
1657    
1658      case OP_REF:      case OP_HSPACE:
1659        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1660        GETCHARINCTEST(c, eptr);
1661        switch(c)
1662        {        {
1663        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        default: RRETURN(MATCH_NOMATCH);
1664        ecode += 3;                                 /* Advance past item */        case 0x09:      /* HT */
1665          case 0x20:      /* SPACE */
1666        /* If the reference is unset, set the length to be longer than the amount        case 0xa0:      /* NBSP */
1667        of subject left; this ensures that every attempt at a match fails. We        case 0x1680:    /* OGHAM SPACE MARK */
1668        can't just fail here, because of the possibility of quantifiers with zero        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1669        minima. */        case 0x2000:    /* EN QUAD */
1670          case 0x2001:    /* EM QUAD */
1671        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        case 0x2002:    /* EN SPACE */
1672          md->end_subject - eptr + 1 :        case 0x2003:    /* EM SPACE */
1673          md->offset_vector[offset+1] - md->offset_vector[offset];        case 0x2004:    /* THREE-PER-EM SPACE */
1674          case 0x2005:    /* FOUR-PER-EM SPACE */
1675          case 0x2006:    /* SIX-PER-EM SPACE */
1676          case 0x2007:    /* FIGURE SPACE */
1677          case 0x2008:    /* PUNCTUATION SPACE */
1678          case 0x2009:    /* THIN SPACE */
1679          case 0x200A:    /* HAIR SPACE */
1680          case 0x202f:    /* NARROW NO-BREAK SPACE */
1681          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1682          case 0x3000:    /* IDEOGRAPHIC SPACE */
1683          break;
1684          }
1685        ecode++;
1686        break;
1687    
1688        case OP_NOT_VSPACE:
1689        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1690        GETCHARINCTEST(c, eptr);
1691        switch(c)
1692          {
1693          default: break;
1694          case 0x0a:      /* LF */
1695          case 0x0b:      /* VT */
1696          case 0x0c:      /* FF */
1697          case 0x0d:      /* CR */
1698          case 0x85:      /* NEL */
1699          case 0x2028:    /* LINE SEPARATOR */
1700          case 0x2029:    /* PARAGRAPH SEPARATOR */
1701          RRETURN(MATCH_NOMATCH);
1702          }
1703        ecode++;
1704        break;
1705    
1706        case OP_VSPACE:
1707        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1708        GETCHARINCTEST(c, eptr);
1709        switch(c)
1710          {
1711          default: RRETURN(MATCH_NOMATCH);
1712          case 0x0a:      /* LF */
1713          case 0x0b:      /* VT */
1714          case 0x0c:      /* FF */
1715          case 0x0d:      /* CR */
1716          case 0x85:      /* NEL */
1717          case 0x2028:    /* LINE SEPARATOR */
1718          case 0x2029:    /* PARAGRAPH SEPARATOR */
1719          break;
1720          }
1721        ecode++;
1722        break;
1723    
1724    #ifdef SUPPORT_UCP
1725        /* Check the next character by Unicode property. We will get here only
1726        if the support is in the binary; otherwise a compile-time error occurs. */
1727    
1728        case OP_PROP:
1729        case OP_NOTPROP:
1730        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1731        GETCHARINCTEST(c, eptr);
1732          {
1733          const ucd_record *prop = GET_UCD(c);
1734    
1735          switch(ecode[1])
1736            {
1737            case PT_ANY:
1738            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1739            break;
1740    
1741            case PT_LAMP:
1742            if ((prop->chartype == ucp_Lu ||
1743                 prop->chartype == ucp_Ll ||
1744                 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1745              RRETURN(MATCH_NOMATCH);
1746             break;
1747    
1748            case PT_GC:
1749            if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1750              RRETURN(MATCH_NOMATCH);
1751            break;
1752    
1753            case PT_PC:
1754            if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1755              RRETURN(MATCH_NOMATCH);
1756            break;
1757    
1758            case PT_SC:
1759            if ((ecode[2] != prop->script) == (op == OP_PROP))
1760              RRETURN(MATCH_NOMATCH);
1761            break;
1762    
1763            default:
1764            RRETURN(PCRE_ERROR_INTERNAL);
1765            }
1766    
1767          ecode += 3;
1768          }
1769        break;
1770    
1771        /* Match an extended Unicode sequence. We will get here only if the support
1772        is in the binary; otherwise a compile-time error occurs. */
1773    
1774        case OP_EXTUNI:
1775        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1776        GETCHARINCTEST(c, eptr);
1777          {
1778          int category = UCD_CATEGORY(c);
1779          if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1780          while (eptr < md->end_subject)
1781            {
1782            int len = 1;
1783            if (!utf8) c = *eptr; else
1784              {
1785              GETCHARLEN(c, eptr, len);
1786              }
1787            category = UCD_CATEGORY(c);
1788            if (category != ucp_M) break;
1789            eptr += len;
1790            }
1791          }
1792        ecode++;
1793        break;
1794    #endif
1795    
1796    
1797        /* Match a back reference, possibly repeatedly. Look past the end of the
1798        item to see if there is repeat information following. The code is similar
1799        to that for character classes, but repeated for efficiency. Then obey
1800        similar code to character type repeats - written out again for speed.
1801        However, if the referenced string is the empty string, always treat
1802        it as matched, any number of times (otherwise there could be infinite
1803        loops). */
1804    
1805        case OP_REF:
1806          {
1807          offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1808          ecode += 3;
1809    
1810          /* If the reference is unset, there are two possibilities:
1811    
1812          (a) In the default, Perl-compatible state, set the length to be longer
1813          than the amount of subject left; this ensures that every attempt at a
1814          match fails. We can't just fail here, because of the possibility of
1815          quantifiers with zero minima.
1816    
1817          (b) If the JavaScript compatibility flag is set, set the length to zero
1818          so that the back reference matches an empty string.
1819    
1820          Otherwise, set the length to the length of what was matched by the
1821          referenced subpattern. */
1822    
1823          if (offset >= offset_top || md->offset_vector[offset] < 0)
1824            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1825          else
1826            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1827    
1828        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1829    
# Line 1465  for (;;) Line 1868  for (;;)
1868    
1869        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
1870          {          {
1871          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1872              {
1873              CHECK_PARTIAL();
1874              RRETURN(MATCH_NOMATCH);
1875              }
1876          eptr += length;          eptr += length;
1877          }          }
1878    
# Line 1480  for (;;) Line 1887  for (;;)
1887          {          {
1888          for (fi = min;; fi++)          for (fi = min;; fi++)
1889            {            {
1890            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1891            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1892            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1893                {
1894                CHECK_PARTIAL();
1895              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1896                }
1897            eptr += length;            eptr += length;
1898            }            }
1899          /* Control never gets here */          /* Control never gets here */
# Line 1499  for (;;) Line 1909  for (;;)
1909            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
1910            eptr += length;            eptr += length;
1911            }            }
1912            CHECK_PARTIAL();
1913          while (eptr >= pp)          while (eptr >= pp)
1914            {            {
1915            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1916            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1917            eptr -= length;            eptr -= length;
1918            }            }
# Line 1566  for (;;) Line 1977  for (;;)
1977          {          {
1978          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
1979            {            {
1980            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
1981                {
1982                CHECK_PARTIAL();
1983                RRETURN(MATCH_NOMATCH);
1984                }
1985            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
1986            if (c > 255)            if (c > 255)
1987              {              {
# Line 1584  for (;;) Line 1999  for (;;)
1999          {          {
2000          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2001            {            {
2002            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2003                {
2004                CHECK_PARTIAL();
2005                RRETURN(MATCH_NOMATCH);
2006                }
2007            c = *eptr++;            c = *eptr++;
2008            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2009            }            }
# Line 1606  for (;;) Line 2025  for (;;)
2025            {            {
2026            for (fi = min;; fi++)            for (fi = min;; fi++)
2027              {              {
2028              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2029              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2030              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2031                  {
2032                  CHECK_PARTIAL();
2033                  RRETURN(MATCH_NOMATCH);
2034                  }
2035                if (eptr >= md->end_subject)
2036                  {
2037                  SCHECK_PARTIAL();
2038                  RRETURN(MATCH_NOMATCH);
2039                  }
2040              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2041              if (c > 255)              if (c > 255)
2042                {                {
# Line 1626  for (;;) Line 2054  for (;;)
2054            {            {
2055            for (fi = min;; fi++)            for (fi = min;; fi++)
2056              {              {
2057              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2058              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2059              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2060                  {
2061                  CHECK_PARTIAL();
2062                  RRETURN(MATCH_NOMATCH);
2063                  }
2064                if (eptr >= md->end_subject)
2065                  {
2066                  SCHECK_PARTIAL();
2067                  RRETURN(MATCH_NOMATCH);
2068                  }
2069              c = *eptr++;              c = *eptr++;
2070              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2071              }              }
# Line 1661  for (;;) Line 2098  for (;;)
2098                }                }
2099              eptr += len;              eptr += len;
2100              }              }
2101              CHECK_PARTIAL();
2102            for (;;)            for (;;)
2103              {              {
2104              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2105              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2106              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2107              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1680  for (;;) Line 2118  for (;;)
2118              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2119              eptr++;              eptr++;
2120              }              }
2121              CHECK_PARTIAL();
2122            while (eptr >= pp)            while (eptr >= pp)
2123              {              {
2124              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
2125              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2126                eptr--;
2127              }              }
2128            }            }
2129    
# Line 1695  for (;;) Line 2134  for (;;)
2134    
2135    
2136      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2137      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2138        mode, because Unicode properties are supported in non-UTF-8 mode. */
2139    
2140  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2141      case OP_XCLASS:      case OP_XCLASS:
# Line 1736  for (;;) Line 2176  for (;;)
2176    
2177        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2178          {          {
2179          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2180          GETCHARINC(c, eptr);            {
2181              SCHECK_PARTIAL();
2182              RRETURN(MATCH_NOMATCH);
2183              }
2184            GETCHARINCTEST(c, eptr);
2185          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2186          }          }
2187    
# Line 1753  for (;;) Line 2197  for (;;)
2197          {          {
2198          for (fi = min;; fi++)          for (fi = min;; fi++)
2199            {            {
2200            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2201            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2202            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
2203            GETCHARINC(c, eptr);              {
2204                CHECK_PARTIAL();
2205                RRETURN(MATCH_NOMATCH);
2206                }
2207              if (eptr >= md->end_subject)
2208                {
2209                SCHECK_PARTIAL();
2210                RRETURN(MATCH_NOMATCH);
2211                }
2212              GETCHARINCTEST(c, eptr);
2213            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2214            }            }
2215          /* Control never gets here */          /* Control never gets here */
# Line 1771  for (;;) Line 2224  for (;;)
2224            {            {
2225            int len = 1;            int len = 1;
2226            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2227            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2228            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2229            eptr += len;            eptr += len;
2230            }            }
2231            CHECK_PARTIAL();
2232          for(;;)          for(;;)
2233            {            {
2234            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2235            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2236            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2237            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2238            }            }
2239          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2240          }          }
# Line 1836  for (;;) Line 2290  for (;;)
2290    
2291        else        else
2292          {          {
2293          int dc;          unsigned int dc;
2294          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2295          ecode += length;          ecode += length;
2296    
2297          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2298          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2299    
2300          if (fc != dc)          if (fc != dc)
2301            {            {
2302  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2303            int chartype;            if (dc != UCD_OTHERCASE(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2304  #endif  #endif
2305              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2306            }            }
# Line 1867  for (;;) Line 2317  for (;;)
2317        }        }
2318      break;      break;
2319    
2320      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2321    
2322      case OP_EXACT:      case OP_EXACT:
2323      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2324      ecode += 3;      ecode += 3;
2325      goto REPEATCHAR;      goto REPEATCHAR;
2326    
2327        case OP_POSUPTO:
2328        possessive = TRUE;
2329        /* Fall through */
2330    
2331      case OP_UPTO:      case OP_UPTO:
2332      case OP_MINUPTO:      case OP_MINUPTO:
2333      min = 0;      min = 0;
# Line 1882  for (;;) Line 2336  for (;;)
2336      ecode += 3;      ecode += 3;
2337      goto REPEATCHAR;      goto REPEATCHAR;
2338    
2339        case OP_POSSTAR:
2340        possessive = TRUE;
2341        min = 0;
2342        max = INT_MAX;
2343        ecode++;
2344        goto REPEATCHAR;
2345    
2346        case OP_POSPLUS:
2347        possessive = TRUE;
2348        min = 1;
2349        max = INT_MAX;
2350        ecode++;
2351        goto REPEATCHAR;
2352    
2353        case OP_POSQUERY:
2354        possessive = TRUE;
2355        min = 0;
2356        max = 1;
2357        ecode++;
2358        goto REPEATCHAR;
2359    
2360      case OP_STAR:      case OP_STAR:
2361      case OP_MINSTAR:      case OP_MINSTAR:
2362      case OP_PLUS:      case OP_PLUS:
# Line 1894  for (;;) Line 2369  for (;;)
2369      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2370      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2371    
2372      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2373    
2374      REPEATCHAR:      REPEATCHAR:
2375  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1905  for (;;) Line 2378  for (;;)
2378        length = 1;        length = 1;
2379        charptr = ecode;        charptr = ecode;
2380        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2381        ecode += length;        ecode += length;
2382    
2383        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1913  for (;;) Line 2385  for (;;)
2385    
2386        if (length > 1)        if (length > 1)
2387          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2388  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2389          int othercase;          unsigned int othercase;
         int chartype;  
2390          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2391               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase > 0)  
2392            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2393            else oclength = 0;
2394  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2395    
2396          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2397            {            {
2398            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2399            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2400            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2401              else if (oclength > 0 &&
2402                       eptr <= md->end_subject - oclength &&
2403                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2404    #endif  /* SUPPORT_UCP */
2405            else            else
2406              {              {
2407              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2408              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2409              }              }
2410            }            }
2411    
# Line 1943  for (;;) Line 2415  for (;;)
2415            {            {
2416            for (fi = min;; fi++)            for (fi = min;; fi++)
2417              {              {
2418              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2419              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2420              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2421              if (memcmp(eptr, charptr, length) == 0) eptr += length;                {
2422              /* Need braces because of following else */                CHECK_PARTIAL();
2423              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                RRETURN(MATCH_NOMATCH);
2424                  }
2425                if (eptr <= md->end_subject - length &&
2426                  memcmp(eptr, charptr, length) == 0) eptr += length;
2427    #ifdef SUPPORT_UCP
2428                else if (oclength > 0 &&
2429                         eptr <= md->end_subject - oclength &&
2430                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2431    #endif  /* SUPPORT_UCP */
2432              else              else
2433                {                {
2434                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2435                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2436                }                }
2437              }              }
2438            /* Control never gets here */            /* Control never gets here */
2439            }            }
2440          else  
2441            else  /* Maximize */
2442            {            {
2443            pp = eptr;            pp = eptr;
2444            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2445              {              {
2446              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2447              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2448              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2449              else              else if (oclength > 0 &&
2450                {                       eptr <= md->end_subject - oclength &&
2451                if (memcmp(eptr, occhars, oclength) != 0) break;                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2452                eptr += oclength;  #endif  /* SUPPORT_UCP */
2453                }              else break;
2454                }
2455    
2456              CHECK_PARTIAL();
2457              if (possessive) continue;
2458    
2459              for(;;)
2460                {
2461                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2462                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2463                if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2464    #ifdef SUPPORT_UCP
2465                eptr--;
2466                BACKCHAR(eptr);
2467    #else   /* without SUPPORT_UCP */
2468                eptr -= length;
2469    #endif  /* SUPPORT_UCP */
2470              }              }
           while (eptr >= pp)  
            {  
            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
            eptr -= length;  
            }  
           RRETURN(MATCH_NOMATCH);  
2471            }            }
2472          /* Control never gets here */          /* Control never gets here */
2473          }          }
# Line 1990  for (;;) Line 2480  for (;;)
2480  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2481    
2482      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2483        {  
2484        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2485    
2486      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2487      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2011  for (;;) Line 2499  for (;;)
2499        {        {
2500        fc = md->lcc[fc];        fc = md->lcc[fc];
2501        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2502            {
2503            if (eptr >= md->end_subject)
2504              {
2505              SCHECK_PARTIAL();
2506              RRETURN(MATCH_NOMATCH);
2507              }
2508          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2509            }
2510        if (min == max) continue;        if (min == max) continue;
2511        if (minimize)        if (minimize)
2512          {          {
2513          for (fi = min;; fi++)          for (fi = min;; fi++)
2514            {            {
2515            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2516            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2517            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
2518                fc != md->lcc[*eptr++])              {
2519                CHECK_PARTIAL();
2520                RRETURN(MATCH_NOMATCH);
2521                }
2522              if (eptr >= md->end_subject)
2523                {
2524                SCHECK_PARTIAL();
2525              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2526                }
2527              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2528            }            }
2529          /* Control never gets here */          /* Control never gets here */
2530          }          }
2531        else        else  /* Maximize */
2532          {          {
2533          pp = eptr;          pp = eptr;
2534          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2536  for (;;)
2536            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2537            eptr++;            eptr++;
2538            }            }
2539    
2540            CHECK_PARTIAL();
2541            if (possessive) continue;
2542    
2543          while (eptr >= pp)          while (eptr >= pp)
2544            {            {
2545            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2546            eptr--;            eptr--;
2547            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2548            }            }
# Line 2048  for (;;) Line 2555  for (;;)
2555    
2556      else      else
2557        {        {
2558        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2559            {
2560            if (eptr >= md->end_subject)
2561              {
2562              SCHECK_PARTIAL();
2563              RRETURN(MATCH_NOMATCH);
2564              }
2565            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2566            }
2567        if (min == max) continue;        if (min == max) continue;
2568        if (minimize)        if (minimize)
2569          {          {
2570          for (fi = min;; fi++)          for (fi = min;; fi++)
2571            {            {
2572            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2573            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max)
2575                {
2576                CHECK_PARTIAL();
2577              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2578                }
2579              if (eptr >= md->end_subject)
2580                {
2581                SCHECK_PARTIAL();
2582                RRETURN(MATCH_NOMATCH);
2583                }
2584              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2585            }            }
2586          /* Control never gets here */          /* Control never gets here */
2587          }          }
2588        else        else  /* Maximize */
2589          {          {
2590          pp = eptr;          pp = eptr;
2591          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2593  for (;;)
2593            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2594            eptr++;            eptr++;
2595            }            }
2596            CHECK_PARTIAL();
2597            if (possessive) continue;
2598          while (eptr >= pp)          while (eptr >= pp)
2599            {            {
2600            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2601            eptr--;            eptr--;
2602            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2603            }            }
# Line 2121  for (;;) Line 2647  for (;;)
2647      ecode += 3;      ecode += 3;
2648      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2649    
2650        case OP_NOTPOSSTAR:
2651        possessive = TRUE;
2652        min = 0;
2653        max = INT_MAX;
2654        ecode++;
2655        goto REPEATNOTCHAR;
2656    
2657        case OP_NOTPOSPLUS:
2658        possessive = TRUE;
2659        min = 1;
2660        max = INT_MAX;
2661        ecode++;
2662        goto REPEATNOTCHAR;
2663    
2664        case OP_NOTPOSQUERY:
2665        possessive = TRUE;
2666        min = 0;
2667        max = 1;
2668        ecode++;
2669        goto REPEATNOTCHAR;
2670    
2671        case OP_NOTPOSUPTO:
2672        possessive = TRUE;
2673        min = 0;
2674        max = GET2(ecode, 1);
2675        ecode += 3;
2676        goto REPEATNOTCHAR;
2677    
2678      case OP_NOTSTAR:      case OP_NOTSTAR:
2679      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2680      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2133  for (;;) Line 2687  for (;;)
2687      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2688      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2689    
2690      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2691    
2692      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2693      fc = *ecode++;      fc = *ecode++;
2694    
2695      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2160  for (;;) Line 2711  for (;;)
2711        /* UTF-8 mode */        /* UTF-8 mode */
2712        if (utf8)        if (utf8)
2713          {          {
2714          register int d;          register unsigned int d;
2715          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2716            {            {
2717              if (eptr >= md->end_subject)
2718                {
2719                SCHECK_PARTIAL();
2720                RRETURN(MATCH_NOMATCH);
2721                }
2722            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2723            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
2724            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2174  for (;;) Line 2730  for (;;)
2730        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2731          {          {
2732          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2733              {
2734              if (eptr >= md->end_subject)
2735                {
2736                SCHECK_PARTIAL();
2737                RRETURN(MATCH_NOMATCH);
2738                }
2739            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2740              }
2741          }          }
2742    
2743        if (min == max) continue;        if (min == max) continue;
# Line 2185  for (;;) Line 2748  for (;;)
2748          /* UTF-8 mode */          /* UTF-8 mode */
2749          if (utf8)          if (utf8)
2750            {            {
2751            register int d;            register unsigned int d;
2752            for (fi = min;; fi++)            for (fi = min;; fi++)
2753              {              {
2754              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2755              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2756                if (fi >= max)
2757                  {
2758                  CHECK_PARTIAL();
2759                  RRETURN(MATCH_NOMATCH);
2760                  }
2761                if (eptr >= md->end_subject)
2762                  {
2763                  SCHECK_PARTIAL();
2764                  RRETURN(MATCH_NOMATCH);
2765                  }
2766              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2767              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2768              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2769              }              }
2770            }            }
2771          else          else
# Line 2202  for (;;) Line 2774  for (;;)
2774            {            {
2775            for (fi = min;; fi++)            for (fi = min;; fi++)
2776              {              {
2777              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2778              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2779              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max)
2780                  {
2781                  CHECK_PARTIAL();
2782                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2783                  }
2784                if (eptr >= md->end_subject)
2785                  {
2786                  SCHECK_PARTIAL();
2787                  RRETURN(MATCH_NOMATCH);
2788                  }
2789                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2790              }              }
2791            }            }
2792          /* Control never gets here */          /* Control never gets here */
# Line 2221  for (;;) Line 2802  for (;;)
2802          /* UTF-8 mode */          /* UTF-8 mode */
2803          if (utf8)          if (utf8)
2804            {            {
2805            register int d;            register unsigned int d;
2806            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2807              {              {
2808              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2812  for (;;)
2812              if (fc == d) break;              if (fc == d) break;
2813              eptr += len;              eptr += len;
2814              }              }
2815            for(;;)          CHECK_PARTIAL();
2816            if (possessive) continue;
2817            for(;;)
2818              {              {
2819              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2820              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2821              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2822              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2248  for (;;) Line 2831  for (;;)
2831              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2832              eptr++;              eptr++;
2833              }              }
2834              CHECK_PARTIAL();
2835              if (possessive) continue;
2836            while (eptr >= pp)            while (eptr >= pp)
2837              {              {
2838              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2839              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2840              eptr--;              eptr--;
2841              }              }
# Line 2269  for (;;) Line 2854  for (;;)
2854        /* UTF-8 mode */        /* UTF-8 mode */
2855        if (utf8)        if (utf8)
2856          {          {
2857          register int d;          register unsigned int d;
2858          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2859            {            {
2860              if (eptr >= md->end_subject)
2861                {
2862                SCHECK_PARTIAL();
2863                RRETURN(MATCH_NOMATCH);
2864                }
2865            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2866            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
2867            }            }
# Line 2281  for (;;) Line 2871  for (;;)
2871        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2872          {          {
2873          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2874              {
2875              if (eptr >= md->end_subject)
2876                {
2877                SCHECK_PARTIAL();
2878                RRETURN(MATCH_NOMATCH);
2879                }
2880            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2881              }
2882          }          }
2883    
2884        if (min == max) continue;        if (min == max) continue;
# Line 2292  for (;;) Line 2889  for (;;)
2889          /* UTF-8 mode */          /* UTF-8 mode */
2890          if (utf8)          if (utf8)
2891            {            {
2892            register int d;            register unsigned int d;
2893            for (fi = min;; fi++)            for (fi = min;; fi++)
2894              {              {
2895              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2896              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2897              GETCHARINC(d, eptr);              if (fi >= max)
2898              if (fi >= max || eptr >= md->end_subject || fc == d)                {
2899                  CHECK_PARTIAL();
2900                  RRETURN(MATCH_NOMATCH);
2901                  }
2902                if (eptr >= md->end_subject)
2903                  {
2904                  SCHECK_PARTIAL();
2905                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2906                  }
2907                GETCHARINC(d, eptr);
2908                if (fc == d) RRETURN(MATCH_NOMATCH);
2909              }              }
2910            }            }
2911          else          else
# Line 2308  for (;;) Line 2914  for (;;)
2914            {            {
2915            for (fi = min;; fi++)            for (fi = min;; fi++)
2916              {              {
2917              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2918              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2919              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max)
2920                  {
2921                  CHECK_PARTIAL();
2922                  RRETURN(MATCH_NOMATCH);
2923                  }
2924                if (eptr >= md->end_subject)
2925                  {
2926                  SCHECK_PARTIAL();
2927                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2928                  }
2929                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2930              }              }
2931            }            }
2932          /* Control never gets here */          /* Control never gets here */
# Line 2327  for (;;) Line 2942  for (;;)
2942          /* UTF-8 mode */          /* UTF-8 mode */
2943          if (utf8)          if (utf8)
2944            {            {
2945            register int d;            register unsigned int d;
2946            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2947              {              {
2948              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2951  for (;;)
2951              if (fc == d) break;              if (fc == d) break;
2952              eptr += len;              eptr += len;
2953              }              }
2954              CHECK_PARTIAL();
2955              if (possessive) continue;
2956            for(;;)            for(;;)
2957              {              {
2958              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2959              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2960              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2961              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2353  for (;;) Line 2970  for (;;)
2970              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2971              eptr++;              eptr++;
2972              }              }
2973              CHECK_PARTIAL();
2974              if (possessive) continue;
2975            while (eptr >= pp)            while (eptr >= pp)
2976              {              {
2977              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2978              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2979              eptr--;              eptr--;
2980              }              }
# Line 2384  for (;;) Line 3003  for (;;)
3003      ecode += 3;      ecode += 3;
3004      goto REPEATTYPE;      goto REPEATTYPE;
3005    
3006        case OP_TYPEPOSSTAR:
3007        possessive = TRUE;
3008        min = 0;
3009        max = INT_MAX;
3010        ecode++;
3011        goto REPEATTYPE;
3012    
3013        case OP_TYPEPOSPLUS:
3014        possessive = TRUE;
3015        min = 1;
3016        max = INT_MAX;
3017        ecode++;
3018        goto REPEATTYPE;
3019    
3020        case OP_TYPEPOSQUERY:
3021        possessive = TRUE;
3022        min = 0;
3023        max = 1;
3024        ecode++;
3025        goto REPEATTYPE;
3026    
3027        case OP_TYPEPOSUPTO:
3028        possessive = TRUE;
3029        min = 0;
3030        max = GET2(ecode, 1);
3031        ecode += 3;
3032        goto REPEATTYPE;
3033    
3034      case OP_TYPESTAR:      case OP_TYPESTAR:
3035      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3036      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 3055  for (;;)
3055        {        {
3056        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
3057        prop_type = *ecode++;        prop_type = *ecode++;
3058        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
3059        }        }
3060      else prop_type = -1;      else prop_type = -1;
3061  #endif  #endif
3062    
3063      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3064      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3065      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3066      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3067      and single-bytes. */      and single-bytes. */
3068    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3069      if (min > 0)      if (min > 0)
3070        {        {
3071  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3072        if (prop_type > 0)        if (prop_type >= 0)
3073          {          {
3074          for (i = 1; i <= min; i++)          switch(prop_type)
3075            {            {
3076            GETCHARINC(c, eptr);            case PT_ANY:
3077            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3078            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
3079              RRETURN(MATCH_NOMATCH);              {
3080            }              if (eptr >= md->end_subject)
3081          }                {
3082                  SCHECK_PARTIAL();
3083                  RRETURN(MATCH_NOMATCH);
3084                  }
3085                GETCHARINCTEST(c, eptr);
3086                }
3087              break;
3088    
3089        /* Match extended Unicode sequences. We will get here only if the            case PT_LAMP:
3090        support is in the binary; otherwise a compile-time error occurs. */            for (i = 1; i <= min; i++)
3091                {
3092                if (eptr >= md->end_subject)
3093                  {
3094                  SCHECK_PARTIAL();
3095                  RRETURN(MATCH_NOMATCH);
3096                  }
3097                GETCHARINCTEST(c, eptr);
3098                prop_chartype = UCD_CHARTYPE(c);
3099                if ((prop_chartype == ucp_Lu ||
3100                     prop_chartype == ucp_Ll ||
3101                     prop_chartype == ucp_Lt) == prop_fail_result)
3102                  RRETURN(MATCH_NOMATCH);
3103                }
3104              break;
3105    
3106              case PT_GC:
3107              for (i = 1; i <= min; i++)
3108                {
3109                if (eptr >= md->end_subject)
3110                  {
3111                  SCHECK_PARTIAL();
3112                  RRETURN(MATCH_NOMATCH);
3113                  }
3114                GETCHARINCTEST(c, eptr);
3115                prop_category = UCD_CATEGORY(c);
3116                if ((prop_category == prop_value) == prop_fail_result)
3117                  RRETURN(MATCH_NOMATCH);
3118                }
3119              break;
3120    
3121              case PT_PC:
3122              for (i = 1; i <= min; i++)
3123                {
3124                if (eptr >= md->end_subject)
3125                  {
3126                  SCHECK_PARTIAL();
3127                  RRETURN(MATCH_NOMATCH);
3128                  }
3129                GETCHARINCTEST(c, eptr);
3130                prop_chartype = UCD_CHARTYPE(c);
3131                if ((prop_chartype == prop_value) == prop_fail_result)
3132                  RRETURN(MATCH_NOMATCH);
3133                }
3134              break;
3135    
3136              case PT_SC:
3137              for (i = 1; i <= min; i++)
3138                {
3139                if (eptr >= md->end_subject)
3140                  {
3141                  SCHECK_PARTIAL();
3142                  RRETURN(MATCH_NOMATCH);
3143                  }
3144                GETCHARINCTEST(c, eptr);
3145                prop_script = UCD_SCRIPT(c);
3146                if ((prop_script == prop_value) == prop_fail_result)
3147                  RRETURN(MATCH_NOMATCH);
3148                }
3149              break;
3150    
3151              default:
3152              RRETURN(PCRE_ERROR_INTERNAL);
3153              }
3154            }
3155    
3156          /* Match extended Unicode sequences. We will get here only if the
3157          support is in the binary; otherwise a compile-time error occurs. */
3158    
3159        else if (ctype == OP_EXTUNI)        else if (ctype == OP_EXTUNI)
3160          {          {
3161          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3162            {            {
3163              if (eptr >= md->end_subject)
3164                {
3165                SCHECK_PARTIAL();
3166                RRETURN(MATCH_NOMATCH);
3167                }
3168            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3169            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
3170            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3171            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3172              {              {
3173              int len = 1;              int len = 1;
3174              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3175                {                else { GETCHARLEN(c, eptr, len); }
3176                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);  
3177              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3178              eptr += len;              eptr += len;
3179              }              }
# Line 2480  for (;;) Line 3191  for (;;)
3191          case OP_ANY:          case OP_ANY:
3192          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3193            {            {
3194            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3195               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))              {
3196                SCHECK_PARTIAL();
3197              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3198                }
3199              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3200              eptr++;
3201              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3202              }
3203            break;
3204    
3205            case OP_ALLANY:
3206            for (i = 1; i <= min; i++)
3207              {
3208              if (eptr >= md->end_subject)
3209                {
3210                SCHECK_PARTIAL();
3211                RRETURN(MATCH_NOMATCH);
3212                }
3213              eptr++;
3214            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3215            }            }
3216          break;          break;
3217    
3218          case OP_ANYBYTE:          case OP_ANYBYTE:
3219            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3220          eptr += min;          eptr += min;
3221          break;          break;
3222    
3223            case OP_ANYNL:
3224            for (i = 1; i <= min; i++)
3225              {
3226              if (eptr >= md->end_subject)
3227                {
3228                SCHECK_PARTIAL();
3229                RRETURN(MATCH_NOMATCH);
3230                }
3231              GETCHARINC(c, eptr);
3232              switch(c)
3233                {
3234                default: RRETURN(MATCH_NOMATCH);
3235                case 0x000d:
3236                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3237                break;
3238    
3239                case 0x000a:
3240                break;
3241    
3242                case 0x000b:
3243                case 0x000c:
3244                case 0x0085:
3245                case 0x2028:
3246                case 0x2029:
3247                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3248                break;
3249                }
3250              }
3251            break;
3252    
3253            case OP_NOT_HSPACE:
3254            for (i = 1; i <= min; i++)
3255              {
3256              if (eptr >= md->end_subject)
3257                {
3258                SCHECK_PARTIAL();
3259                RRETURN(MATCH_NOMATCH);
3260                }
3261              GETCHARINC(c, eptr);
3262              switch(c)
3263                {
3264                default: break;
3265                case 0x09:      /* HT */
3266                case 0x20:      /* SPACE */
3267                case 0xa0:      /* NBSP */
3268                case 0x1680:    /* OGHAM SPACE MARK */
3269                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3270                case 0x2000:    /* EN QUAD */
3271                case 0x2001:    /* EM QUAD */
3272                case 0x2002:    /* EN SPACE */
3273                case 0x2003:    /* EM SPACE */
3274                case 0x2004:    /* THREE-PER-EM SPACE */
3275                case 0x2005:    /* FOUR-PER-EM SPACE */
3276                case 0x2006:    /* SIX-PER-EM SPACE */
3277                case 0x2007:    /* FIGURE SPACE */
3278                case 0x2008:    /* PUNCTUATION SPACE */
3279                case 0x2009:    /* THIN SPACE */
3280                case 0x200A:    /* HAIR SPACE */
3281                case 0x202f:    /* NARROW NO-BREAK SPACE */
3282                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3283                case 0x3000:    /* IDEOGRAPHIC SPACE */
3284                RRETURN(MATCH_NOMATCH);
3285                }
3286              }
3287            break;
3288    
3289            case OP_HSPACE:
3290            for (i = 1; i <= min; i++)
3291              {
3292              if (eptr >= md->end_subject)
3293                {
3294                SCHECK_PARTIAL();
3295                RRETURN(MATCH_NOMATCH);
3296                }
3297              GETCHARINC(c, eptr);
3298              switch(c)
3299                {
3300                default: RRETURN(MATCH_NOMATCH);
3301                case 0x09:      /* HT */
3302                case 0x20:      /* SPACE */
3303                case 0xa0:      /* NBSP */
3304                case 0x1680:    /* OGHAM SPACE MARK */
3305                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3306                case 0x2000:    /* EN QUAD */
3307                case 0x2001:    /* EM QUAD */
3308                case 0x2002:    /* EN SPACE */
3309                case 0x2003:    /* EM SPACE */
3310                case 0x2004:    /* THREE-PER-EM SPACE */
3311                case 0x2005:    /* FOUR-PER-EM SPACE */
3312                case 0x2006:    /* SIX-PER-EM SPACE */
3313                case 0x2007:    /* FIGURE SPACE */
3314                case 0x2008:    /* PUNCTUATION SPACE */
3315                case 0x2009:    /* THIN SPACE */
3316                case 0x200A:    /* HAIR SPACE */
3317                case 0x202f:    /* NARROW NO-BREAK SPACE */
3318                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3319                case 0x3000:    /* IDEOGRAPHIC SPACE */
3320                break;
3321                }
3322              }
3323            break;
3324    
3325            case OP_NOT_VSPACE:
3326            for (i = 1; i <= min; i++)
3327              {
3328              if (eptr >= md->end_subject)
3329                {
3330                SCHECK_PARTIAL();
3331                RRETURN(MATCH_NOMATCH);
3332                }
3333              GETCHARINC(c, eptr);
3334              switch(c)
3335                {
3336                default: break;
3337                case 0x0a:      /* LF */
3338                case 0x0b:      /* VT */
3339                case 0x0c:      /* FF */
3340                case 0x0d:      /* CR */
3341                case 0x85:      /* NEL */
3342                case 0x2028:    /* LINE SEPARATOR */
3343                case 0x2029:    /* PARAGRAPH SEPARATOR */
3344                RRETURN(MATCH_NOMATCH);
3345                }
3346              }
3347            break;
3348    
3349            case OP_VSPACE:
3350            for (i = 1; i <= min; i++)
3351              {
3352              if (eptr >= md->end_subject)
3353                {
3354                SCHECK_PARTIAL();
3355                RRETURN(MATCH_NOMATCH);
3356                }
3357              GETCHARINC(c, eptr);
3358              switch(c)
3359                {
3360                default: RRETURN(MATCH_NOMATCH);
3361                case 0x0a:      /* LF */
3362                case 0x0b:      /* VT */
3363                case 0x0c:      /* FF */
3364                case 0x0d:      /* CR */
3365                case 0x85:      /* NEL */
3366                case 0x2028:    /* LINE SEPARATOR */
3367                case 0x2029:    /* PARAGRAPH SEPARATOR */
3368                break;
3369                }
3370              }
3371            break;
3372    
3373          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3374          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3375            {            {
3376            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3377                {
3378                SCHECK_PARTIAL();
3379                RRETURN(MATCH_NOMATCH);
3380                }
3381            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3382            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3383              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2504  for (;;) Line 3387  for (;;)
3387          case OP_DIGIT:          case OP_DIGIT:
3388          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3389            {            {
3390            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3391               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3392                SCHECK_PARTIAL();
3393                RRETURN(MATCH_NOMATCH);
3394                }
3395              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3396              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3397            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3398            }            }
# Line 2514  for (;;) Line 3401  for (;;)
3401          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3402          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3403            {            {
3404            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3405               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3406                SCHECK_PARTIAL();
3407              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3408            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              }
3409              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3410                RRETURN(MATCH_NOMATCH);
3411              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3412            }            }
3413          break;          break;
3414    
3415          case OP_WHITESPACE:          case OP_WHITESPACE:
3416          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3417            {            {
3418            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3419               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3420                SCHECK_PARTIAL();
3421                RRETURN(MATCH_NOMATCH);
3422                }
3423              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3424              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3425            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3426            }            }
# Line 2535  for (;;) Line 3430  for (;;)
3430          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3431            {            {
3432            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3433               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3434              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3435            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3436            }            }
3437          break;          break;
3438    
3439          case OP_WORDCHAR:          case OP_WORDCHAR:
3440          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3441            {            {
3442            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3443               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3444                SCHECK_PARTIAL();
3445                RRETURN(MATCH_NOMATCH);
3446                }
3447              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3448              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3449            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3450            }            }
# Line 2564  for (;;) Line 3463  for (;;)
3463        switch(ctype)        switch(ctype)
3464          {          {
3465          case OP_ANY:          case OP_ANY:
3466          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3467            {            {
3468            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3469              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
3470                SCHECK_PARTIAL();
3471                RRETURN(MATCH_NOMATCH);
3472                }
3473              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3474              eptr++;
3475            }            }
3476          else eptr += min;          break;
3477    
3478            case OP_ALLANY:
3479            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3480            eptr += min;
3481          break;          break;
3482    
3483          case OP_ANYBYTE:          case OP_ANYBYTE:
3484            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3485          eptr += min;          eptr += min;
3486          break;          break;
3487    
3488            case OP_ANYNL:
3489            for (i = 1; i <= min; i++)
3490              {
3491              if (eptr >= md->end_subject)
3492                {
3493                SCHECK_PARTIAL();
3494                RRETURN(MATCH_NOMATCH);
3495                }
3496              switch(*eptr++)
3497                {
3498                default: RRETURN(MATCH_NOMATCH);
3499                case 0x000d:
3500                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3501                break;
3502                case 0x000a:
3503                break;
3504    
3505                case 0x000b:
3506                case 0x000c:
3507                case 0x0085:
3508                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3509                break;
3510                }
3511              }
3512            break;
3513    
3514            case OP_NOT_HSPACE:
3515            for (i = 1; i <= min; i++)
3516              {
3517              if (eptr >= md->end_subject)
3518                {
3519                SCHECK_PARTIAL();
3520                RRETURN(MATCH_NOMATCH);
3521                }
3522              switch(*eptr++)
3523                {
3524                default: break;
3525                case 0x09:      /* HT */
3526                case 0x20:      /* SPACE */
3527                case 0xa0:      /* NBSP */
3528                RRETURN(MATCH_NOMATCH);
3529                }
3530              }
3531            break;
3532    
3533            case OP_HSPACE:
3534            for (i = 1; i <= min; i++)
3535              {
3536              if (eptr >= md->end_subject)
3537                {
3538                SCHECK_PARTIAL();
3539                RRETURN(MATCH_NOMATCH);
3540                }
3541              switch(*eptr++)
3542                {
3543                default: RRETURN(MATCH_NOMATCH);
3544                case 0x09:      /* HT */
3545                case 0x20:      /* SPACE */
3546                case 0xa0:      /* NBSP */
3547                break;
3548                }
3549              }
3550            break;
3551    
3552            case OP_NOT_VSPACE:
3553            for (i = 1; i <= min; i++)
3554              {
3555              if (eptr >= md->end_subject)
3556                {
3557                SCHECK_PARTIAL();
3558                RRETURN(MATCH_NOMATCH);
3559                }
3560              switch(*eptr++)
3561                {
3562                default: break;
3563                case 0x0a:      /* LF */
3564                case 0x0b:      /* VT */
3565                case 0x0c:      /* FF */
3566                case 0x0d:      /* CR */
3567                case 0x85:      /* NEL */
3568                RRETURN(MATCH_NOMATCH);
3569                }
3570              }
3571            break;
3572    
3573            case OP_VSPACE:
3574            for (i = 1; i <= min; i++)
3575              {
3576              if (eptr >= md->end_subject)
3577                {
3578                SCHECK_PARTIAL();
3579                RRETURN(MATCH_NOMATCH);
3580                }
3581              switch(*eptr++)
3582                {
3583                default: RRETURN(MATCH_NOMATCH);
3584                case 0x0a:      /* LF */
3585                case 0x0b:      /* VT */
3586                case 0x0c:      /* FF */
3587                case 0x0d:      /* CR */
3588                case 0x85:      /* NEL */
3589                break;
3590                }
3591              }
3592            break;
3593    
3594          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3595          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3596              {
3597              if (eptr >= md->end_subject)
3598                {
3599                SCHECK_PARTIAL();
3600                RRETURN(MATCH_NOMATCH);
3601                }
3602            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3603              }
3604          break;          break;
3605    
3606          case OP_DIGIT:          case OP_DIGIT:
3607          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3608              {
3609              if (eptr >= md->end_subject)
3610                {
3611                SCHECK_PARTIAL();
3612                RRETURN(MATCH_NOMATCH);
3613                }
3614            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3615              }
3616          break;          break;
3617    
3618          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3619          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3620              {
3621              if (eptr >= md->end_subject)
3622                {
3623                SCHECK_PARTIAL();
3624                RRETURN(MATCH_NOMATCH);
3625                }
3626            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3627              }
3628          break;          break;
3629    
3630          case OP_WHITESPACE:          case OP_WHITESPACE:
3631          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3632              {
3633              if (eptr >= md->end_subject)
3634                {
3635                SCHECK_PARTIAL();
3636                RRETURN(MATCH_NOMATCH);
3637                }
3638            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3639              }
3640          break;          break;
3641    
3642          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3643          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3644              {
3645              if (eptr >= md->end_subject)
3646                {
3647                SCHECK_PARTIAL();
3648                RRETURN(MATCH_NOMATCH);
3649                }
3650            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3651              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3652              }
3653          break;          break;
3654    
3655          case OP_WORDCHAR:          case OP_WORDCHAR:
3656          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3657              {
3658              if (eptr >= md->end_subject)
3659                {
3660                SCHECK_PARTIAL();
3661                RRETURN(MATCH_NOMATCH);
3662                }
3663            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3664              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3665              }
3666          break;          break;
3667    
3668          default:          default:
# Line 2624  for (;;) Line 3681  for (;;)
3681      if (minimize)      if (minimize)
3682        {        {
3683  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3684        if (prop_type > 0)        if (prop_type >= 0)
3685          {          {
3686          for (fi = min;; fi++)          switch(prop_type)
3687            {            {
3688            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
3689            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
3690            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
3691            GETCHARINC(c, eptr);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3692            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3693            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max)
3694              RRETURN(MATCH_NOMATCH);                {
3695                  CHECK_PARTIAL();
3696                  RRETURN(MATCH_NOMATCH);
3697                  }
3698                if (eptr >= md->end_subject)
3699                  {
3700                  SCHECK_PARTIAL();
3701                  RRETURN(MATCH_NOMATCH);
3702                  }
3703                GETCHARINC(c, eptr);
3704                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3705                }
3706              /* Control never gets here */
3707    
3708              case PT_LAMP:
3709              for (fi = min;; fi++)
3710                {
3711                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3712                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3713                if (fi >= max)
3714                  {
3715                  CHECK_PARTIAL();
3716                  RRETURN(MATCH_NOMATCH);
3717                  }
3718                if (eptr >= md->end_subject)
3719                  {
3720                  SCHECK_PARTIAL();
3721                  RRETURN(MATCH_NOMATCH);
3722                  }
3723                GETCHARINC(c, eptr);
3724                prop_chartype = UCD_CHARTYPE(c);
3725                if ((prop_chartype == ucp_Lu ||
3726                     prop_chartype == ucp_Ll ||
3727                     prop_chartype == ucp_Lt) == prop_fail_result)
3728                  RRETURN(MATCH_NOMATCH);
3729                }
3730              /* Control never gets here */
3731    
3732              case PT_GC:
3733              for (fi = min;; fi++)
3734                {
3735                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3736                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3737                if (fi >= max)
3738                  {
3739                  CHECK_PARTIAL();
3740                  RRETURN(MATCH_NOMATCH);
3741                  }
3742                if (eptr >= md->end_subject)
3743                  {
3744                  SCHECK_PARTIAL();
3745                  RRETURN(MATCH_NOMATCH);
3746                  }
3747                GETCHARINC(c, eptr);
3748                prop_category = UCD_CATEGORY(c);
3749                if ((prop_category == prop_value) == prop_fail_result)
3750                  RRETURN(MATCH_NOMATCH);
3751                }
3752              /* Control never gets here */
3753    
3754              case PT_PC:
3755              for (fi = min;; fi++)
3756                {
3757                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3758                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3759                if (fi >= max)
3760                  {
3761                  CHECK_PARTIAL();
3762                  RRETURN(MATCH_NOMATCH);
3763                  }
3764                if (eptr >= md->end_subject)
3765                  {
3766                  SCHECK_PARTIAL();
3767                  RRETURN(MATCH_NOMATCH);
3768                  }
3769                GETCHARINC(c, eptr);
3770                prop_chartype = UCD_CHARTYPE(c);
3771                if ((prop_chartype == prop_value) == prop_fail_result)
3772                  RRETURN(MATCH_NOMATCH);
3773                }
3774              /* Control never gets here */
3775    
3776              case PT_SC:
3777              for (fi = min;; fi++)
3778                {
3779                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3780                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3781                if (fi >= max)
3782                  {
3783                  CHECK_PARTIAL();
3784                  RRETURN(MATCH_NOMATCH);
3785                  }
3786                if (eptr >= md->end_subject)
3787                  {
3788                  SCHECK_PARTIAL();
3789                  RRETURN(MATCH_NOMATCH);
3790                  }
3791                GETCHARINC(c, eptr);
3792                prop_script = UCD_SCRIPT(c);
3793                if ((prop_script == prop_value) == prop_fail_result)
3794                  RRETURN(MATCH_NOMATCH);
3795                }
3796              /* Control never gets here */
3797    
3798              default:
3799              RRETURN(PCRE_ERROR_INTERNAL);
3800            }            }
3801          }          }
3802    
# Line 2645  for (;;) Line 3807  for (;;)
3807          {          {
3808          for (fi = min;; fi++)          for (fi = min;; fi++)
3809            {            {
3810            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3811            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3812            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
3813                {
3814                CHECK_PARTIAL();
3815                RRETURN(MATCH_NOMATCH);
3816                }
3817              if (eptr >= md->end_subject)
3818                {
3819                SCHECK_PARTIAL();
3820                RRETURN(MATCH_NOMATCH);
3821                }
3822            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3823            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
3824            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3825            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3826              {              {
3827              int len = 1;              int len = 1;
3828              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3829                {                else { GETCHARLEN(c, eptr, len); }
3830                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);  
3831              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3832              eptr += len;              eptr += len;
3833              }              }
# Line 2674  for (;;) Line 3843  for (;;)
3843          {          {
3844          for (fi = min;; fi++)          for (fi = min;; fi++)
3845            {            {
3846            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3847            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3848            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
3849                {
3850                CHECK_PARTIAL();
3851                RRETURN(MATCH_NOMATCH);
3852                }
3853              if (eptr >= md->end_subject)
3854                {
3855                SCHECK_PARTIAL();
3856                RRETURN(MATCH_NOMATCH);
3857                }
3858              if (ctype == OP_ANY && IS_NEWLINE(eptr))
3859                RRETURN(MATCH_NOMATCH);
3860            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3861            switch(ctype)            switch(ctype)
3862              {              {
3863              case OP_ANY:              case OP_ANY:        /* This is the non-NL case */
3864              if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);              case OP_ALLANY:
3865                case OP_ANYBYTE:
3866              break;              break;
3867    
3868              case OP_ANYBYTE:              case OP_ANYNL:
3869                switch(c)
3870                  {
3871                  default: RRETURN(MATCH_NOMATCH);
3872                  case 0x000d:
3873                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3874                  break;
3875                  case 0x000a:
3876                  break;
3877    
3878                  case 0x000b:
3879                  case 0x000c:
3880                  case 0x0085:
3881                  case 0x2028:
3882                  case 0x2029:
3883                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3884                  break;
3885                  }
3886                break;
3887    
3888                case OP_NOT_HSPACE:
3889                switch(c)
3890                  {
3891                  default: break;
3892                  case 0x09:      /* HT */
3893                  case 0x20:      /* SPACE */
3894                  case 0xa0:      /* NBSP */
3895                  case 0x1680:    /* OGHAM SPACE MARK */
3896                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3897                  case 0x2000:    /* EN QUAD */
3898                  case 0x2001:    /* EM QUAD */
3899                  case 0x2002:    /* EN SPACE */
3900                  case 0x2003:    /* EM SPACE */
3901                  case 0x2004:    /* THREE-PER-EM SPACE */
3902                  case 0x2005:    /* FOUR-PER-EM SPACE */
3903                  case 0x2006:    /* SIX-PER-EM SPACE */
3904                  case 0x2007:    /* FIGURE SPACE */
3905                  case 0x2008:    /* PUNCTUATION SPACE */
3906                  case 0x2009:    /* THIN SPACE */
3907                  case 0x200A:    /* HAIR SPACE */
3908                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3909                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3910                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3911                  RRETURN(MATCH_NOMATCH);
3912                  }
3913                break;
3914    
3915                case OP_HSPACE:
3916                switch(c)
3917                  {
3918                  default: RRETURN(MATCH_NOMATCH);
3919                  case 0x09:      /* HT */
3920                  case 0x20:      /* SPACE */
3921                  case 0xa0:      /* NBSP */
3922                  case 0x1680:    /* OGHAM SPACE MARK */
3923                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3924                  case 0x2000:    /* EN QUAD */
3925                  case 0x2001:    /* EM QUAD */
3926                  case 0x2002:    /* EN SPACE */
3927                  case 0x2003:    /* EM SPACE */
3928                  case 0x2004:    /* THREE-PER-EM SPACE */
3929                  case 0x2005:    /* FOUR-PER-EM SPACE */
3930                  case 0x2006:    /* SIX-PER-EM SPACE */
3931                  case 0x2007:    /* FIGURE SPACE */
3932                  case 0x2008:    /* PUNCTUATION SPACE */
3933                  case 0x2009:    /* THIN SPACE */
3934                  case 0x200A:    /* HAIR SPACE */
3935                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3936                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3937                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3938                  break;
3939                  }
3940                break;
3941    
3942                case OP_NOT_VSPACE:
3943                switch(c)
3944                  {
3945                  default: break;
3946                  case 0x0a:      /* LF */
3947                  case 0x0b:      /* VT */
3948                  case 0x0c:      /* FF */
3949                  case 0x0d:      /* CR */
3950                  case 0x85:      /* NEL */
3951                  case 0x2028:    /* LINE SEPARATOR */
3952                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3953                  RRETURN(MATCH_NOMATCH);
3954                  }
3955                break;
3956    
3957                case OP_VSPACE:
3958                switch(c)
3959                  {
3960                  default: RRETURN(MATCH_NOMATCH);
3961                  case 0x0a:      /* LF */
3962                  case 0x0b:      /* VT */
3963                  case 0x0c:      /* FF */
3964                  case 0x0d:      /* CR */
3965                  case 0x85:      /* NEL */
3966                  case 0x2028:    /* LINE SEPARATOR */
3967                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3968                  break;
3969                  }
3970              break;              break;
3971    
3972              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
# Line 2729  for (;;) Line 4010  for (;;)
4010          {          {
4011          for (fi = min;; fi++)          for (fi = min;; fi++)
4012            {            {
4013            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4014            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4015            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
4016                {
4017                CHECK_PARTIAL();
4018                RRETURN(MATCH_NOMATCH);
4019                }
4020              if (eptr >= md->end_subject)
4021                {
4022                SCHECK_PARTIAL();
4023                RRETURN(MATCH_NOMATCH);
4024                }
4025              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4026                RRETURN(MATCH_NOMATCH);
4027            c = *eptr++;            c = *eptr++;
4028            switch(ctype)            switch(ctype)
4029              {              {
4030              case OP_ANY:              case OP_ANY:     /* This is the non-NL case */
4031              if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);              case OP_ALLANY:
4032                case OP_ANYBYTE:
4033              break;              break;
4034    
4035              case OP_ANYBYTE:              case OP_ANYNL:
4036                switch(c)
4037                  {
4038                  default: RRETURN(MATCH_NOMATCH);
4039                  case 0x000d:
4040                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4041                  break;
4042    
4043                  case 0x000a:
4044                  break;
4045    
4046                  case 0x000b:
4047                  case 0x000c:
4048                  case 0x0085:
4049                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4050                  break;
4051                  }
4052                break;
4053    
4054                case OP_NOT_HSPACE:
4055                switch(c)
4056                  {
4057                  default: break;
4058                  case 0x09:      /* HT */
4059                  case 0x20:      /* SPACE */
4060                  case 0xa0:      /* NBSP */
4061                  RRETURN(MATCH_NOMATCH);
4062                  }
4063                break;
4064    
4065                case OP_HSPACE:
4066                switch(c)
4067                  {
4068                  default: RRETURN(MATCH_NOMATCH);
4069                  case 0x09:      /* HT */
4070                  case 0x20:      /* SPACE */
4071                  case 0xa0:      /* NBSP */
4072                  break;
4073                  }
4074                break;
4075    
4076                case OP_NOT_VSPACE:
4077                switch(c)
4078                  {
4079                  default: break;
4080                  case 0x0a:      /* LF */
4081                  case 0x0b:      /* VT */
4082                  case 0x0c:      /* FF */
4083                  case 0x0d:      /* CR */
4084                  case 0x85:      /* NEL */
4085                  RRETURN(MATCH_NOMATCH);
4086                  }
4087                break;
4088    
4089                case OP_VSPACE:
4090                switch(c)
4091                  {
4092                  default: RRETURN(MATCH_NOMATCH);
4093                  case 0x0a:      /* LF */
4094                  case 0x0b:      /* VT */
4095                  case 0x0c:      /* FF */
4096                  case 0x0d:      /* CR */
4097                  case 0x85:      /* NEL */
4098                  break;
4099                  }
4100              break;              break;
4101    
4102              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
# Line 2774  for (;;) Line 4131  for (;;)
4131        /* Control never gets here */        /* Control never gets here */
4132        }        }
4133    
4134      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
4135      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
4136      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
4137    
# Line 2782  for (;;) Line 4139  for (;;)
4139        {        {
4140        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
4141    
4142  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4143        if (prop_type > 0)        if (prop_type >= 0)
4144          {          {
4145          for (i = min; i < max; i++)          switch(prop_type)
4146            {            {
4147            int len = 1;            case PT_ANY:
4148            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
4149            GETCHARLEN(c, eptr, len);              {
4150            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
4151            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
4152              break;              GETCHARLEN(c, eptr, len);
4153            eptr+= len;              if (prop_fail_result) break;
4154                eptr+= len;
4155                }
4156              break;
4157    
4158              case PT_LAMP:
4159              for (i = min; i < max; i++)
4160                {
4161                int len = 1;
4162                if (eptr >= md->end_subject) break;
4163                GETCHARLEN(c, eptr, len);
4164                prop_chartype = UCD_CHARTYPE(c);
4165                if ((prop_chartype == ucp_Lu ||
4166                     prop_chartype == ucp_Ll ||
4167                     prop_chartype == ucp_Lt) == prop_fail_result)
4168                  break;
4169                eptr+= len;
4170                }
4171              break;
4172    
4173              case PT_GC:
4174              for (i = min; i < max; i++)
4175                {
4176                int len = 1;
4177                if (eptr >= md->end_subject) break;
4178                GETCHARLEN(c, eptr, len);
4179                prop_category = UCD_CATEGORY(c);
4180                if ((prop_category == prop_value) == prop_fail_result)
4181                  break;
4182                eptr+= len;
4183                }
4184              break;
4185    
4186              case PT_PC:
4187              for (i = min; i < max; i++)
4188                {
4189                int len = 1;
4190                if (eptr >= md->end_subject) break;
4191                GETCHARLEN(c, eptr, len);
4192                prop_chartype = UCD_CHARTYPE(c);
4193                if ((prop_chartype == prop_value) == prop_fail_result)
4194                  break;
4195                eptr+= len;
4196                }
4197              break;
4198    
4199              case PT_SC:
4200              for (i = min; i < max; i++)
4201                {
4202                int len = 1;
4203                if (eptr >= md->end_subject) break;
4204                GETCHARLEN(c, eptr, len);
4205                prop_script = UCD_SCRIPT(c);
4206                if ((prop_script == prop_value) == prop_fail_result)
4207                  break;
4208                eptr+= len;
4209                }
4210              break;
4211            }            }
4212    
4213          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4214    
4215            CHECK_PARTIAL();
4216            if (possessive) continue;
4217          for(;;)          for(;;)
4218            {            {
4219            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4220            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4221            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4222            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
4223            }            }
4224          }          }
4225    
# Line 2816  for (;;) Line 4232  for (;;)
4232            {            {
4233            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
4234            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4235            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
4236            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
4237            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4238              {              {
# Line 2825  for (;;) Line 4241  for (;;)
4241                {                {
4242                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4243                }                }
4244              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = UCD_CATEGORY(c);
4245              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4246              eptr += len;              eptr += len;
4247              }              }
# Line 2833  for (;;) Line 4249  for (;;)
4249    
4250          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4251    
4252            CHECK_PARTIAL();
4253            if (possessive) continue;
4254          for(;;)          for(;;)
4255            {            {
4256            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4257            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4258            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4259            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
4260              {              {
4261              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
4262              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
4263                {                {
4264                  BACKCHAR(eptr);
4265                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4266                }                }
4267              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = UCD_CATEGORY(c);
4268              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4269              eptr--;              eptr--;
4270              }              }
# Line 2864  for (;;) Line 4282  for (;;)
4282          switch(ctype)          switch(ctype)
4283            {            {
4284            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is unlimited  
           we don't need it, so we repeat the non-UTF8 code. This is probably  
           worth it, because .* is quite a common idiom. */  
   
4285            if (max < INT_MAX)            if (max < INT_MAX)
4286              {              {
4287              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || *eptr == NEWLINE) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
4288                {                {
4289                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4290                  {                eptr++;
4291                  eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
4292                }                }
4293              }              }
4294