/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 531 by ph10, Tue Jun 1 13:53:26 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 119  Returns:     nothing
119  static void  static void
120  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121  {  {
122  int c;  unsigned int c;
123  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124  while (length-- > 0)  while (length-- > 0)
125    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 146  Returns:      TRUE if matched
146  */  */
147    
148  static BOOL  static BOOL
149  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
150    unsigned long int ims)    unsigned long int ims)
151  {  {
152  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 150  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 169  return TRUE; Line 213  return TRUE;
213  ****************************************************************************  ****************************************************************************
214                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
215    
216  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
217  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
218  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
219  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
220  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
221    fine.
222  It turns out that on non-Unix systems there are problems with programs that  
223  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
224  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
225  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
226    been known for decades.) So....
227    
228  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
230  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253  /* These versions of the macros use the stack, as normal */  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261    /* These versions of the macros use the stack, as normal. There are debugging
262    versions and production versions. Note that the "rw" argument of RMATCH isn't
263    actually used in this definition. */
264    
265  #ifndef NO_RECURSE  #ifndef NO_RECURSE
266  #define REGISTER register  #define REGISTER register
267  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
268    #ifdef PCRE_DEBUG
269    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270      { \
271      printf("match() called in line %d\n", __LINE__); \
272      rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273      printf("to line %d\n", __LINE__); \
274      }
275    #define RRETURN(ra) \
276      { \
277      printf("match() returned %d from line %d ", ra, __LINE__); \
278      return ra; \
279      }
280    #else
281    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282      rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
284    #endif
285    
286  #else  #else
287    
288    
289  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
290  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291  match(), which never changes. */  argument of match(), which never changes. */
292    
293  #define REGISTER  #define REGISTER
294    
295  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298    if (setjmp(frame->Xwhere) == 0)\    if (heapframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299      {\    frame->Xwhere = rw; \
300      newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301      newframe->Xecode = rb;\    newframe->Xecode = rb;\
302      newframe->Xoffset_top = rc;\    newframe->Xmstart = mstart;\
303      newframe->Xims = re;\    newframe->Xmarkptr = markptr;\
304      newframe->Xeptrb = rf;\    newframe->Xoffset_top = rc;\
305      newframe->Xflags = rg;\    newframe->Xims = re;\
306      newframe->Xprevframe = frame;\    newframe->Xeptrb = rf;\
307      frame = newframe;\    newframe->Xflags = rg;\
308      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xrdepth = frame->Xrdepth + 1;\
309      goto HEAP_RECURSE;\    newframe->Xprevframe = frame;\
310      }\    frame = newframe;\
311    else\    DPRINTF(("restarting from line %d\n", __LINE__));\
312      {\    goto HEAP_RECURSE;\
313      DPRINTF(("longjumped back to line %d\n", __LINE__));\    L_##rw:\
314      frame = md->thisframe;\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     rx = frame->Xresult;\  
     }\  
315    }    }
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      frame->Xresult = ra;\      rrc = ra;\
325      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
326      }\      }\
327    return ra;\    return ra;\
328    }    }
# Line 250  typedef struct heapframe { Line 335  typedef struct heapframe {
335    
336    /* Function arguments that may change */    /* Function arguments that may change */
337    
338    const uschar *Xeptr;    USPTR Xeptr;
339    const uschar *Xecode;    const uschar *Xecode;
340      USPTR Xmstart;
341      USPTR Xmarkptr;
342    int Xoffset_top;    int Xoffset_top;
343    long int Xims;    long int Xims;
344    eptrblock *Xeptrb;    eptrblock *Xeptrb;
345    int Xflags;    int Xflags;
346      unsigned int Xrdepth;
347    
348    /* Function local variables */    /* Function local variables */
349    
350    const uschar *Xcallpat;    USPTR Xcallpat;
351    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
352    const uschar *Xdata;    USPTR Xcharptr;
353    const uschar *Xnext;  #endif
354    const uschar *Xpp;    USPTR Xdata;
355    const uschar *Xprev;    USPTR Xnext;
356    const uschar *Xsaved_eptr;    USPTR Xpp;
357      USPTR Xprev;
358      USPTR Xsaved_eptr;
359    
360    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
361    
362    BOOL Xcur_is_word;    BOOL Xcur_is_word;
363    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
364    BOOL Xprev_is_word;    BOOL Xprev_is_word;
365    
366    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
367    
368  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
369    int Xprop_type;    int Xprop_type;
370      int Xprop_value;
371    int Xprop_fail_result;    int Xprop_fail_result;
372    int Xprop_category;    int Xprop_category;
373    int Xprop_chartype;    int Xprop_chartype;
374    int Xprop_othercase;    int Xprop_script;
375    int Xprop_test_against;    int Xoclength;
376    int *Xprop_test_variable;    uschar Xocchars[8];
377  #endif  #endif
378    
379      int Xcodelink;
380    int Xctype;    int Xctype;
381    int Xfc;    unsigned int Xfc;
382    int Xfi;    int Xfi;
383    int Xlength;    int Xlength;
384    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 392  typedef struct heapframe {
392    
393    eptrblock Xnewptrb;    eptrblock Xnewptrb;
394    
395    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
396    
397    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
398    
399  } heapframe;  } heapframe;
400    
# Line 320  typedef struct heapframe { Line 410  typedef struct heapframe {
410  *         Match from current position            *  *         Match from current position            *
411  *************************************************/  *************************************************/
412    
413  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
414  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
415  same response.  same response. */
416    
417    /* These macros pack up tests that are used for partial matching, and which
418    appears several times in the code. We set the "hit end" flag if the pointer is
419    at the end of the subject and also past the start of the subject (i.e.
420    something has been matched). For hard partial matching, we then return
421    immediately. The second one is used when we already know we are past the end of
422    the subject. */
423    
424    #define CHECK_PARTIAL()\
425      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
426        {\
427        md->hitend = TRUE;\
428        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
429        }
430    
431    #define SCHECK_PARTIAL()\
432      if (md->partial != 0 && eptr > mstart)\
433        {\
434        md->hitend = TRUE;\
435        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
436        }
437    
438  Performance note: It might be tempting to extract commonly used fields from the  
439  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
440    the md structure (e.g. utf8, end_subject) into individual variables to improve
441  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
442  made performance worse.  made performance worse.
443    
444  Arguments:  Arguments:
445     eptr        pointer in subject     eptr        pointer to current character in subject
446     ecode       position in code     ecode       pointer to current position in compiled code
447       mstart      pointer to the current match start position (can be modified
448                     by encountering \K)
449       markptr     pointer to the most recent MARK name, or NULL
450     offset_top  current top pointer     offset_top  current top pointer
451     md          pointer to "static" info for the match     md          pointer to "static" info for the match
452     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 454  Arguments:
454                   brackets - for testing for empty matches                   brackets - for testing for empty matches
455     flags       can contain     flags       can contain
456                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
457                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
458                       group that can match an empty string
459       rdepth      the recursion depth
460    
461  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
462                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
463                   a negative MATCH_xxx value for PRUNE, SKIP, etc
464                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
465                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
466  */  */
467    
468  static int  static int
469  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
470    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
471    int flags)    eptrblock *eptrb, int flags, unsigned int rdepth)
472  {  {
473  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
474  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
475  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
476    
477    register int  rrc;         /* Returns from recursive calls */
478    register int  i;           /* Used for loops not involving calls to RMATCH() */
479    register unsigned int c;   /* Character values not kept over RMATCH() calls */
480    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
481    
482  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
483  register int  i;      /* Used for loops not involving calls to RMATCH() */  int condcode;
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
484    
485  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
486  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 371  heap whenever RMATCH() does a "recursion Line 489  heap whenever RMATCH() does a "recursion
489    
490  #ifdef NO_RECURSE  #ifdef NO_RECURSE
491  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
494    
495  /* Copy in the original argument variables */  /* Copy in the original argument variables */
496    
497  frame->Xeptr = eptr;  frame->Xeptr = eptr;
498  frame->Xecode = ecode;  frame->Xecode = ecode;
499    frame->Xmstart = mstart;
500    frame->Xmarkptr = markptr;
501  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
502  frame->Xims = ims;  frame->Xims = ims;
503  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
504  frame->Xflags = flags;  frame->Xflags = flags;
505    frame->Xrdepth = rdepth;
506    
507  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
508    
# Line 390  HEAP_RECURSE: Line 512  HEAP_RECURSE:
512    
513  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
514  #define ecode              frame->Xecode  #define ecode              frame->Xecode
515    #define mstart             frame->Xmstart
516    #define markptr            frame->Xmarkptr
517  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
518  #define ims                frame->Xims  #define ims                frame->Xims
519  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
520  #define flags              frame->Xflags  #define flags              frame->Xflags
521    #define rdepth             frame->Xrdepth
522    
523  /* Ditto for the local variables */  /* Ditto for the local variables */
524    
# Line 401  HEAP_RECURSE: Line 526  HEAP_RECURSE:
526  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
527  #endif  #endif
528  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
529    #define codelink           frame->Xcodelink
530  #define data               frame->Xdata  #define data               frame->Xdata
531  #define next               frame->Xnext  #define next               frame->Xnext
532  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 411  HEAP_RECURSE: Line 537  HEAP_RECURSE:
537    
538  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
539  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
540  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
541    
542  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
543    
544  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
545  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
546    #define prop_value         frame->Xprop_value
547  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
548  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
549  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
550  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
551  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
552  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
553  #endif  #endif
554    
555  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 573  HEAP_RECURSE:
573  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
574  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
575    
576  #else  #else         /* NO_RECURSE not defined */
577  #define fi i  #define fi i
578  #define fc c  #define fc c
579    
580    
581  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
582  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
583  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
584  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
585  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
586  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
587  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
588  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
589  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
590                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
591  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
592                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
593  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
594  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
595  BOOL prev_is_word;  BOOL prev_is_word;
596    
597  unsigned long int original_ims;  unsigned long int original_ims;
598    
599  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
600  int prop_type;  int prop_type;
601    int prop_value;
602  int prop_fail_result;  int prop_fail_result;
603  int prop_category;  int prop_category;
604  int prop_chartype;  int prop_chartype;
605  int prop_othercase;  int prop_script;
606  int prop_test_against;  int oclength;
607  int *prop_test_variable;  uschar occhars[8];
608  #endif  #endif
609    
610    int codelink;
611  int ctype;  int ctype;
612  int length;  int length;
613  int max;  int max;
# Line 493  int save_offset1, save_offset2, save_off Line 620  int save_offset1, save_offset2, save_off
620  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
621    
622  eptrblock newptrb;  eptrblock newptrb;
623  #endif  #endif     /* NO_RECURSE */
624    
625  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
626  variables. */  variables. */
627    
628  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
629    prop_value = 0;
630  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
631  #endif  #endif
632    
633  /* OK, now we can get on with the real code of the function. Recursion is  
634  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
635  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
636  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
637  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
638  performance when true recursion is being used. */  
639    TAIL_RECURSE:
640    
641    /* OK, now we can get on with the real code of the function. Recursive calls
642    are specified by the macro RMATCH and RRETURN is used to return. When
643    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
644    and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
645    defined). However, RMATCH isn't like a function call because it's quite a
646    complicated macro. It has to be used in one particular way. This shouldn't,
647    however, impact performance when true recursion is being used. */
648    
649    #ifdef SUPPORT_UTF8
650    utf8 = md->utf8;       /* Local copy of the flag */
651    #else
652    utf8 = FALSE;
653    #endif
654    
655    /* First check that we haven't called match() too many times, or that we
656    haven't exceeded the recursive call limit. */
657    
658  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
659    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
660    
661  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
662    
663  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
664  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
665  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
666  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
667    When match() is called in other circumstances, don't add to the chain. The
668    match_cbegroup flag must NOT be used with tail recursion, because the memory
669    block that is used is on the stack, so a new one may be required for each
670    match(). */
671    
672  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
673    {    {
   newptrb.epb_prev = eptrb;  
674    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
675      newptrb.epb_prev = eptrb;
676    eptrb = &newptrb;    eptrb = &newptrb;
677    }    }
678    
679  /* Now start processing the operations. */  /* Now start processing the opcodes. */
680    
681  for (;;)  for (;;)
682    {    {
683      minimize = possessive = FALSE;
684    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
685    
686    if (op > OP_BRA)    switch(op)
687      {      {
688      number = op - OP_BRA;      case OP_MARK:
689        markptr = ecode + 2;
690      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691      number from a dummy opcode at the start. */        ims, eptrb, flags, RM55);
692    
693      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
695        argument. It is passed back in md->start_match_ptr (an overloading of that
696        variable). If it does match, we reset that variable to the current subject
697        position and return MATCH_SKIP. Otherwise, pass back the return code
698        unaltered. */
699    
700        if (rrc == MATCH_SKIP_ARG &&
701            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702          {
703          md->start_match_ptr = eptr;
704          RRETURN(MATCH_SKIP);
705          }
706    
707        if (md->mark == NULL) md->mark = markptr;
708        RRETURN(rrc);
709    
710        case OP_FAIL:
711        MRRETURN(MATCH_NOMATCH);
712    
713        case OP_COMMIT:
714        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
715          ims, eptrb, flags, RM52);
716        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
717        MRRETURN(MATCH_COMMIT);
718    
719        case OP_PRUNE:
720        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
721          ims, eptrb, flags, RM51);
722        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
723        MRRETURN(MATCH_PRUNE);
724    
725        case OP_PRUNE_ARG:
726        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
727          ims, eptrb, flags, RM56);
728        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
729        md->mark = ecode + 2;
730        RRETURN(MATCH_PRUNE);
731    
732        case OP_SKIP:
733        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734          ims, eptrb, flags, RM53);
735        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
736        md->start_match_ptr = eptr;   /* Pass back current position */
737        MRRETURN(MATCH_SKIP);
738    
739        case OP_SKIP_ARG:
740        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
741          ims, eptrb, flags, RM57);
742        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
743    
744        /* Pass back the current skip name by overloading md->start_match_ptr and
745        returning the special MATCH_SKIP_ARG return code. This will either be
746        caught by a matching MARK, or get to the top, where it is treated the same
747        as PRUNE. */
748    
749        md->start_match_ptr = ecode + 2;
750        RRETURN(MATCH_SKIP_ARG);
751    
752        case OP_THEN:
753        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
754          ims, eptrb, flags, RM54);
755        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
756        MRRETURN(MATCH_THEN);
757    
758        case OP_THEN_ARG:
759        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
760          ims, eptrb, flags, RM58);
761        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
762        md->mark = ecode + 2;
763        RRETURN(MATCH_THEN);
764    
765        /* Handle a capturing bracket. If there is space in the offset vector, save
766        the current subject position in the working slot at the top of the vector.
767        We mustn't change the current values of the data slot, because they may be
768        set from a previous iteration of this group, and be referred to by a
769        reference inside the group.
770    
771        If the bracket fails to match, we need to restore this value and also the
772        values of the final offsets, in case they were set by a previous iteration
773        of the same bracket.
774    
775        If there isn't enough space in the offset vector, treat this as if it were
776        a non-capturing bracket. Don't worry about setting the flag for the error
777        case here; that is handled in the code for KET. */
778    
779        case OP_CBRA:
780        case OP_SCBRA:
781        number = GET2(ecode, 1+LINK_SIZE);
782      offset = number << 1;      offset = number << 1;
783    
784  #ifdef DEBUG  #ifdef PCRE_DEBUG
785      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
786        printf("subject=");
787      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
788      printf("\n");      printf("\n");
789  #endif  #endif
# Line 582  for (;;) Line 796  for (;;)
796        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
797    
798        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
799        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
800            (int)(eptr - md->start_subject);
801    
802          flags = (op == OP_SCBRA)? match_cbegroup : 0;
803        do        do
804          {          {
805          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
806            match_isgroup);            ims, eptrb, flags, RM1);
807          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
808          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
809          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
810          }          }
# Line 600  for (;;) Line 816  for (;;)
816        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
817        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
818    
819          if (rrc != MATCH_THEN) md->mark = markptr;
820        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
821        }        }
822    
823      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
824        as a non-capturing bracket. */
825    
826      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
827      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828    
829    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
830    
831    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
832      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
833      case OP_BRA:     /* Non-capturing bracket: optimized */  
834      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
835      do      final alternative within the brackets, we would return the result of a
836        recursive call to match() whatever happened. We can reduce stack usage by
837        turning this into a tail recursion, except in the case when match_cbegroup
838        is set.*/
839    
840        case OP_BRA:
841        case OP_SBRA:
842        DPRINTF(("start non-capturing bracket\n"));
843        flags = (op >= OP_SBRA)? match_cbegroup : 0;
844        for (;;)
845        {        {
846        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
847          match_isgroup);          {
848        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
849              {
850              ecode += _pcre_OP_lengths[*ecode];
851              DPRINTF(("bracket 0 tail recursion\n"));
852              goto TAIL_RECURSE;
853              }
854    
855            /* Possibly empty group; can't use tail recursion. */
856    
857            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
858              eptrb, flags, RM48);
859            if (rrc == MATCH_NOMATCH) md->mark = markptr;
860            RRETURN(rrc);
861            }
862    
863          /* For non-final alternatives, continue the loop for a NOMATCH result;
864          otherwise return. */
865    
866          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
867            eptrb, flags, RM2);
868          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
869        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
870        }        }
871      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
872    
873      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
874      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
875      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
876      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
877        obeyed, we can use tail recursion to avoid using another stack frame. */
878    
879      case OP_COND:      case OP_COND:
880      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
881        codelink= GET(ecode, 1);
882    
883        /* Because of the way auto-callout works during compile, a callout item is
884        inserted between OP_COND and an assertion condition. */
885    
886        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
887          {
888          if (pcre_callout != NULL)
889            {
890            pcre_callout_block cb;
891            cb.version          = 1;   /* Version 1 of the callout block */
892            cb.callout_number   = ecode[LINK_SIZE+2];
893            cb.offset_vector    = md->offset_vector;
894            cb.subject          = (PCRE_SPTR)md->start_subject;
895            cb.subject_length   = (int)(md->end_subject - md->start_subject);
896            cb.start_match      = (int)(mstart - md->start_subject);
897            cb.current_position = (int)(eptr - md->start_subject);
898            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
899            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
900            cb.capture_top      = offset_top/2;
901            cb.capture_last     = md->capture_last;
902            cb.callout_data     = md->callout_data;
903            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
904            if (rrc < 0) RRETURN(rrc);
905            }
906          ecode += _pcre_OP_lengths[OP_CALLOUT];
907          }
908    
909        condcode = ecode[LINK_SIZE+1];
910    
911        /* Now see what the actual condition is */
912    
913        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
914          {
915          if (md->recursive == NULL)                /* Not recursing => FALSE */
916            {
917            condition = FALSE;
918            ecode += GET(ecode, 1);
919            }
920          else
921            {
922            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
923            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
924    
925            /* If the test is for recursion into a specific subpattern, and it is
926            false, but the test was set up by name, scan the table to see if the
927            name refers to any other numbers, and test them. The condition is true
928            if any one is set. */
929    
930            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
931              {
932              uschar *slotA = md->name_table;
933              for (i = 0; i < md->name_count; i++)
934                {
935                if (GET2(slotA, 0) == recno) break;
936                slotA += md->name_entry_size;
937                }
938    
939              /* Found a name for the number - there can be only one; duplicate
940              names for different numbers are allowed, but not vice versa. First
941              scan down for duplicates. */
942    
943              if (i < md->name_count)
944                {
945                uschar *slotB = slotA;
946                while (slotB > md->name_table)
947                  {
948                  slotB -= md->name_entry_size;
949                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
950                    {
951                    condition = GET2(slotB, 0) == md->recursive->group_num;
952                    if (condition) break;
953                    }
954                  else break;
955                  }
956    
957                /* Scan up for duplicates */
958    
959                if (!condition)
960                  {
961                  slotB = slotA;
962                  for (i++; i < md->name_count; i++)
963                    {
964                    slotB += md->name_entry_size;
965                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
966                      {
967                      condition = GET2(slotB, 0) == md->recursive->group_num;
968                      if (condition) break;
969                      }
970                    else break;
971                    }
972                  }
973                }
974              }
975    
976            /* Chose branch according to the condition */
977    
978            ecode += condition? 3 : GET(ecode, 1);
979            }
980          }
981    
982        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
983        {        {
984        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
985        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
986          (md->recursive != NULL) :  
987          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
988        RMATCH(rrc, eptr, ecode + (condition?        scan the table to see if the name refers to any other numbers, and test
989          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        them. The condition is true if any one is set. This is tediously similar
990          offset_top, md, ims, eptrb, match_isgroup);        to the code above, but not close enough to try to amalgamate. */
991        RRETURN(rrc);  
992          if (!condition && condcode == OP_NCREF)
993            {
994            int refno = offset >> 1;
995            uschar *slotA = md->name_table;
996    
997            for (i = 0; i < md->name_count; i++)
998              {
999              if (GET2(slotA, 0) == refno) break;
1000              slotA += md->name_entry_size;
1001              }
1002    
1003            /* Found a name for the number - there can be only one; duplicate names
1004            for different numbers are allowed, but not vice versa. First scan down
1005            for duplicates. */
1006    
1007            if (i < md->name_count)
1008              {
1009              uschar *slotB = slotA;
1010              while (slotB > md->name_table)
1011                {
1012                slotB -= md->name_entry_size;
1013                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1014                  {
1015                  offset = GET2(slotB, 0) << 1;
1016                  condition = offset < offset_top &&
1017                    md->offset_vector[offset] >= 0;
1018                  if (condition) break;
1019                  }
1020                else break;
1021                }
1022    
1023              /* Scan up for duplicates */
1024    
1025              if (!condition)
1026                {
1027                slotB = slotA;
1028                for (i++; i < md->name_count; i++)
1029                  {
1030                  slotB += md->name_entry_size;
1031                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1032                    {
1033                    offset = GET2(slotB, 0) << 1;
1034                    condition = offset < offset_top &&
1035                      md->offset_vector[offset] >= 0;
1036                    if (condition) break;
1037                    }
1038                  else break;
1039                  }
1040                }
1041              }
1042            }
1043    
1044          /* Chose branch according to the condition */
1045    
1046          ecode += condition? 3 : GET(ecode, 1);
1047          }
1048    
1049        else if (condcode == OP_DEF)     /* DEFINE - always false */
1050          {
1051          condition = FALSE;
1052          ecode += GET(ecode, 1);
1053        }        }
1054    
1055      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1056      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1057        assertion. */
1058    
1059      else      else
1060        {        {
1061        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1062            match_condassert | match_isgroup);            match_condassert, RM3);
1063        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1064          {          {
1065          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1066            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1067          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1068          }          }
1069        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1070          {          {
1071          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1072          }          }
1073        else ecode += GET(ecode, 1);        else
1074        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
1075          match_isgroup);          condition = FALSE;
1076        RRETURN(rrc);          ecode += codelink;
1077            }
1078        }        }
     /* Control never reaches here */  
1079    
1080      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
1081      encountered. */      we can use tail recursion to avoid using another stack frame, except when
1082        match_cbegroup is required for an unlimited repeat of a possibly empty
1083        group. If the second alternative doesn't exist, we can just plough on. */
1084    
1085        if (condition || *ecode == OP_ALT)
1086          {
1087          ecode += 1 + LINK_SIZE;
1088          if (op == OP_SCOND)        /* Possibly empty group */
1089            {
1090            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1091            RRETURN(rrc);
1092            }
1093          else                       /* Group must match something */
1094            {
1095            flags = 0;
1096            goto TAIL_RECURSE;
1097            }
1098          }
1099        else                         /* Condition false & no alternative */
1100          {
1101          ecode += 1 + LINK_SIZE;
1102          }
1103        break;
1104    
1105    
1106        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1107        to close any currently open capturing brackets. */
1108    
1109        case OP_CLOSE:
1110        number = GET2(ecode, 1);
1111        offset = number << 1;
1112    
1113    #ifdef PCRE_DEBUG
1114          printf("end bracket %d at *ACCEPT", number);
1115          printf("\n");
1116    #endif
1117    
1118      case OP_CREF:      md->capture_last = number;
1119      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1120          {
1121          md->offset_vector[offset] =
1122            md->offset_vector[md->offset_end - number];
1123          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1124          if (offset_top <= offset) offset_top = offset + 2;
1125          }
1126      ecode += 3;      ecode += 3;
1127      break;      break;
1128    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1129    
1130        /* End of the pattern, either real or forced. If we are in a top-level
1131        recursion, we should restore the offsets appropriately and continue from
1132        after the call. */
1133    
1134        case OP_ACCEPT:
1135      case OP_END:      case OP_END:
1136      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1137        {        {
1138        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
1139        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
1140        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1141        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1142          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1143        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1144        ims = original_ims;        ims = original_ims;
1145        ecode = rec->after_call;        ecode = rec->after_call;
1146        break;        break;
1147        }        }
1148    
1149      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1150      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1151        the subject. In both cases, backtracking will then try other alternatives,
1152        if any. */
1153    
1154        if (eptr == mstart &&
1155            (md->notempty ||
1156              (md->notempty_atstart &&
1157                mstart == md->start_subject + md->start_offset)))
1158          MRRETURN(MATCH_NOMATCH);
1159    
1160        /* Otherwise, we have a match. */
1161    
1162        md->end_match_ptr = eptr;           /* Record where we ended */
1163        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1164        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1165    
1166        /* For some reason, the macros don't work properly if an expression is
1167        given as the argument to MRRETURN when the heap is in use. */
1168    
1169      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1170      md->end_match_ptr = eptr;          /* Record where we ended */      MRRETURN(rrc);
     md->end_offset_top = offset_top;   /* and how many extracts were taken */  
     RRETURN(MATCH_MATCH);  
1171    
1172      /* Change option settings */      /* Change option settings */
1173    
# Line 717  for (;;) Line 1187  for (;;)
1187      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1188      do      do
1189        {        {
1190        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1191          match_isgroup);          RM4);
1192        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1193        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1194            mstart = md->start_match_ptr;   /* In case \K reset it */
1195            break;
1196            }
1197          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1198        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1199        }        }
1200      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1201      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1202    
1203      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1204    
# Line 738  for (;;) Line 1212  for (;;)
1212      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1213      continue;      continue;
1214    
1215      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1216        PRUNE, or COMMIT means we must assume failure without checking subsequent
1217        branches. */
1218    
1219      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1220      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1221      do      do
1222        {        {
1223        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1224          match_isgroup);          RM5);
1225        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1226        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1227            {
1228            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1229            break;
1230            }
1231          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1232        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1233        }        }
1234      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 766  for (;;) Line 1247  for (;;)
1247  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1248      if (utf8)      if (utf8)
1249        {        {
1250        c = GET(ecode,1);        i = GET(ecode, 1);
1251        for (i = 0; i < c; i++)        while (i-- > 0)
1252          {          {
1253          eptr--;          eptr--;
1254          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1255          BACKCHAR(eptr)          BACKCHAR(eptr);
1256          }          }
1257        }        }
1258      else      else
# Line 780  for (;;) Line 1261  for (;;)
1261      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1262    
1263        {        {
1264        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1265        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1266        }        }
1267    
1268      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1269    
1270        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1271      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1272      break;      break;
1273    
# Line 800  for (;;) Line 1282  for (;;)
1282        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
1283        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1284        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1285        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1286        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1287        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1288        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1289        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1290        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1291        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1292        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1293        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1294        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1295        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1296        }        }
1297      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 837  for (;;) Line 1319  for (;;)
1319      case OP_RECURSE:      case OP_RECURSE:
1320        {        {
1321        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1322        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1323            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1324    
1325        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1326    
# Line 869  for (;;) Line 1346  for (;;)
1346    
1347        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1348              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1349        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1350    
1351        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1352        restore the offset and recursion data. */        restore the offset and recursion data. */
1353    
1354        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1355          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1356        do        do
1357          {          {
1358          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1359              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1360          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1361            {            {
1362              DPRINTF(("Recursion matched\n"));
1363            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1364            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1365              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1366            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1367              }
1368            else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1369              {
1370              DPRINTF(("Recursion gave error %d\n", rrc));
1371              if (new_recursive.offset_save != stacksave)
1372                (pcre_free)(new_recursive.offset_save);
1373              RRETURN(rrc);
1374            }            }
         else if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1375    
1376          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1377          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 900  for (;;) Line 1384  for (;;)
1384        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1385        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1386          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1387        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1388        }        }
1389      /* Control never reaches here */      /* Control never reaches here */
1390    
# Line 909  for (;;) Line 1393  for (;;)
1393      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1394      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1395      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1396      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1397        the start-of-match value in case it was changed by \K. */
1398    
1399      case OP_ONCE:      case OP_ONCE:
1400        {      prev = ecode;
1401        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1402    
1403        do      do
1404          {
1405          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1406          if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1407          {          {
1408          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          mstart = md->start_match_ptr;
1409            eptrb, match_isgroup);          break;
         if (rrc == MATCH_MATCH) break;  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         ecode += GET(ecode,1);  
1410          }          }
1411        while (*ecode == OP_ALT);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1412          ecode += GET(ecode,1);
1413          }
1414        while (*ecode == OP_ALT);
1415    
1416        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1417    
1418        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1419    
1420        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1421        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1422    
1423        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1424    
1425        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1426        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1427    
1428        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1429        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1430        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1431        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1432        course of events. */      course of events. */
1433    
1434        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1435          {        {
1436          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1437          break;        break;
1438          }        }
1439    
1440        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1441        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1442        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1443        opcode. */      any options that changed within the bracket before re-running it, so
1444        check the next opcode. */
1445    
1446        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1447          {        {
1448          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1449          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1450          }        }
1451    
1452        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1453          {        {
1454          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1455          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1456          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1457          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1458          }        goto TAIL_RECURSE;
1459        else  /* OP_KETRMAX */        }
1460          {      else  /* OP_KETRMAX */
1461          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1462          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1463          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1464          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1465          }        flags = 0;
1466          goto TAIL_RECURSE;
1467        }        }
1468      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1469    
1470      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1471      bracketed group and go to there. */      bracketed group and go to there. */
# Line 985  for (;;) Line 1474  for (;;)
1474      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1475      break;      break;
1476    
1477      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1478      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1479      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1480      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1481      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1482    
1483      case OP_BRAZERO:      case OP_BRAZERO:
1484        {        {
1485        next = ecode+1;        next = ecode+1;
1486        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1487        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1488        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1489        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1490        }        }
1491      break;      break;
1492    
1493      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1494        {        {
1495        next = ecode+1;        next = ecode+1;
1496        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1497        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1498        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1499        ecode++;        ecode++;
1500        }        }
1501      break;      break;
1502    
1503      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1504      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1505      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1506      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1507          ecode = next + 1 + LINK_SIZE;
1508          }
1509        break;
1510    
1511        /* End of a group, repeated or non-repeating. */
1512    
1513      case OP_KET:      case OP_KET:
1514      case OP_KETRMIN:      case OP_KETRMIN:
1515      case OP_KETRMAX:      case OP_KETRMAX:
1516        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1517    
1518        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1519        infinite repeats of empty string matches, retrieve the subject start from
1520        the chain. Otherwise, set it NULL. */
1521    
1522        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1523          {
1524        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1525            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1526            *prev == OP_ONCE)        }
1527          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1528    
1529        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group or an atomic group, stop
1530        group number back at the start and if necessary complete handling an      matching and return MATCH_MATCH, but record the current high water mark for
1531        extraction by setting the offsets and bumping the high water mark. */      use by positive assertions. We also need to record the match start in case
1532        it was changed by \K. */
1533    
1534        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1535          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1536          number = *prev - OP_BRA;          *prev == OP_ONCE)
1537          {
1538          md->end_match_ptr = eptr;      /* For ONCE */
1539          md->end_offset_top = offset_top;
1540          md->start_match_ptr = mstart;
1541          MRRETURN(MATCH_MATCH);
1542          }
1543    
1544          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1545          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1546        bumping the high water mark. Note that whole-pattern recursion is coded as
1547        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1548        when the OP_END is reached. Other recursion is handled here. */
1549    
1550          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1551          offset = number << 1;        {
1552          number = GET2(prev, 1+LINK_SIZE);
1553          offset = number << 1;
1554    
1555  #ifdef DEBUG  #ifdef PCRE_DEBUG
1556          printf("end bracket %d", number);        printf("end bracket %d", number);
1557          printf("\n");        printf("\n");
1558  #endif  #endif
1559    
1560          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1561          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1562          into group 0, so it won't be picked up here. Instead, we catch it when          {
1563          the OP_END is reached. */          md->offset_vector[offset] =
1564              md->offset_vector[md->offset_end - number];
1565            md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1566            if (offset_top <= offset) offset_top = offset + 2;
1567            }
1568    
1569          /* Handle a recursively called group. Restore the offsets
1570          appropriately and continue from after the call. */
1571    
1572          if (md->recursive != NULL && md->recursive->group_num == number)
1573            {
1574            recursion_info *rec = md->recursive;
1575            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1576            md->recursive = rec->prevrec;
1577            memcpy(md->offset_vector, rec->offset_save,
1578              rec->saved_max * sizeof(int));
1579            offset_top = rec->save_offset_top;
1580            ecode = rec->after_call;
1581            ims = original_ims;
1582            break;
1583            }
1584          }
1585    
1586          if (number > 0)      /* For both capturing and non-capturing groups, reset the value of the ims
1587            {      flags, in case they got changed during the group. */
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
1588    
1589            /* Handle a recursively called group. Restore the offsets      ims = original_ims;
1590            appropriately and continue from after the call. */      DPRINTF(("ims reset to %02lx\n", ims));
1591    
1592            if (md->recursive != NULL && md->recursive->group_num == number)      /* For a non-repeating ket, just continue at this level. This also
1593              {      happens for a repeating ket if no characters were matched in the group.
1594              recursion_info *rec = md->recursive;      This is the forcible breaking of infinite loops as implemented in Perl
1595              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));      5.005. If there is an options reset, it will get obeyed in the normal
1596              md->recursive = rec->prevrec;      course of events. */
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
         }  
1597    
1598        /* Reset the value of the ims flags, in case they got changed during      if (*ecode == OP_KET || eptr == saved_eptr)
1599        the group. */        {
1600          ecode += 1 + LINK_SIZE;
1601          break;
1602          }
1603    
1604        ims = original_ims;      /* The repeating kets try the rest of the pattern or restart from the
1605        DPRINTF(("ims reset to %02lx\n", ims));      preceding bracket, in the appropriate order. In the second case, we can use
1606        tail recursion to avoid using another stack frame, unless we have an
1607        unlimited repeat of a group that can match an empty string. */
1608    
1609        /* For a non-repeating ket, just continue at this level. This also      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
1610    
1611        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KETRMIN)
1612          {
1613          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1614          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1615          if (flags != 0)    /* Could match an empty string */
1616          {          {
1617          ecode += 1 + LINK_SIZE;          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1618          break;          RRETURN(rrc);
         }  
   
       /* The repeating kets try the rest of the pattern or restart from the  
       preceding bracket, in the appropriate order. */  
   
       if (*ecode == OP_KETRMIN)  
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1619          }          }
1620          ecode = prev;
1621          goto TAIL_RECURSE;
1622        }        }
1623        else  /* OP_KETRMAX */
1624      RRETURN(MATCH_NOMATCH);        {
1625          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1626          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1627          ecode += 1 + LINK_SIZE;
1628          flags = 0;
1629          goto TAIL_RECURSE;
1630          }
1631        /* Control never gets here */
1632    
1633      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1634    
1635      case OP_CIRC:      case OP_CIRC:
1636      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1637      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1638        {        {
1639        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1640          RRETURN(MATCH_NOMATCH);            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1641            MRRETURN(MATCH_NOMATCH);
1642        ecode++;        ecode++;
1643        break;        break;
1644        }        }
# Line 1145  for (;;) Line 1647  for (;;)
1647      /* Start of subject assertion */      /* Start of subject assertion */
1648    
1649      case OP_SOD:      case OP_SOD:
1650      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1651      ecode++;      ecode++;
1652      break;      break;
1653    
1654      /* Start of match assertion */      /* Start of match assertion */
1655    
1656      case OP_SOM:      case OP_SOM:
1657      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1658        ecode++;
1659        break;
1660    
1661        /* Reset the start of match point */
1662    
1663        case OP_SET_SOM:
1664        mstart = eptr;
1665      ecode++;      ecode++;
1666      break;      break;
1667    
# Line 1163  for (;;) Line 1672  for (;;)
1672      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1673        {        {
1674        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1675          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1676        else        else
1677          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1678        ecode++;        ecode++;
1679        break;        break;
1680        }        }
1681      else      else
1682        {        {
1683        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1684        if (!md->endonly)        if (!md->endonly)
1685          {          {
1686          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1687             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1688            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1689          ecode++;          ecode++;
1690          break;          break;
1691          }          }
1692        }        }
1693      /* ... else fall through */      /* ... else fall through for endonly */
1694    
1695      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1696    
1697      case OP_EOD:      case OP_EOD:
1698      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1699      ecode++;      ecode++;
1700      break;      break;
1701    
1702      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1703    
1704      case OP_EODN:      case OP_EODN:
1705      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1706         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1707          MRRETURN(MATCH_NOMATCH);
1708      ecode++;      ecode++;
1709      break;      break;
1710    
# Line 1206  for (;;) Line 1716  for (;;)
1716    
1717        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1718        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1719        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1720          partial matching. */
1721    
1722  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1723        if (utf8)        if (utf8)
1724          {          {
1725            /* Get status of previous character */
1726    
1727          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1728            {            {
1729            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1730            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1731              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1732            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1733    #ifdef SUPPORT_UCP
1734              if (md->use_ucp)
1735                {
1736                if (c == '_') prev_is_word = TRUE; else
1737                  {
1738                  int cat = UCD_CATEGORY(c);
1739                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1740                  }
1741                }
1742              else
1743    #endif
1744            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1745            }            }
1746          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1747            /* Get status of next character */
1748    
1749            if (eptr >= md->end_subject)
1750              {
1751              SCHECK_PARTIAL();
1752              cur_is_word = FALSE;
1753              }
1754            else
1755            {            {
1756            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1757    #ifdef SUPPORT_UCP
1758              if (md->use_ucp)
1759                {
1760                if (c == '_') cur_is_word = TRUE; else
1761                  {
1762                  int cat = UCD_CATEGORY(c);
1763                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1764                  }
1765                }
1766              else
1767    #endif
1768            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1769            }            }
1770          }          }
1771        else        else
1772  #endif  #endif
1773    
1774        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1775          consistency with the behaviour of \w we do use it in this case. */
1776    
1777          {          {
1778          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1779            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1780          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1781            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1782              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1783    #ifdef SUPPORT_UCP
1784              if (md->use_ucp)
1785                {
1786                c = eptr[-1];
1787                if (c == '_') prev_is_word = TRUE; else
1788                  {
1789                  int cat = UCD_CATEGORY(c);
1790                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1791                  }
1792                }
1793              else
1794    #endif
1795              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1796              }
1797    
1798            /* Get status of next character */
1799    
1800            if (eptr >= md->end_subject)
1801              {
1802              SCHECK_PARTIAL();
1803              cur_is_word = FALSE;
1804              }
1805            else
1806    #ifdef SUPPORT_UCP
1807            if (md->use_ucp)
1808              {
1809              c = *eptr;
1810              if (c == '_') cur_is_word = TRUE; else
1811                {
1812                int cat = UCD_CATEGORY(c);
1813                cur_is_word = (cat == ucp_L || cat == ucp_N);
1814                }
1815              }
1816            else
1817    #endif
1818            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1819          }          }
1820    
1821        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1822    
1823        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1824             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1825          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1826        }        }
1827      break;      break;
1828    
1829      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1830    
1831      case OP_ANY:      case OP_ANY:
1832      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1833        RRETURN(MATCH_NOMATCH);      /* Fall through */
1834      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);  
1835  #ifdef SUPPORT_UTF8      case OP_ALLANY:
1836      if (utf8)      if (eptr++ >= md->end_subject)
1837        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        {
1838  #endif        SCHECK_PARTIAL();
1839          MRRETURN(MATCH_NOMATCH);
1840          }
1841        if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1842      ecode++;      ecode++;
1843      break;      break;
1844    
# Line 1261  for (;;) Line 1846  for (;;)
1846      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1847    
1848      case OP_ANYBYTE:      case OP_ANYBYTE:
1849      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1850          {
1851          SCHECK_PARTIAL();
1852          MRRETURN(MATCH_NOMATCH);
1853          }
1854      ecode++;      ecode++;
1855      break;      break;
1856    
1857      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1858      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1859          {
1860          SCHECK_PARTIAL();
1861          MRRETURN(MATCH_NOMATCH);
1862          }
1863      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1864      if (      if (
1865  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1274  for (;;) Line 1867  for (;;)
1867  #endif  #endif
1868         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1869         )         )
1870        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1871      ecode++;      ecode++;
1872      break;      break;
1873    
1874      case OP_DIGIT:      case OP_DIGIT:
1875      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1876          {
1877          SCHECK_PARTIAL();
1878          MRRETURN(MATCH_NOMATCH);
1879          }
1880      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1881      if (      if (
1882  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1287  for (;;) Line 1884  for (;;)
1884  #endif  #endif
1885         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1886         )         )
1887        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1888      ecode++;      ecode++;
1889      break;      break;
1890    
1891      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1892      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1893          {
1894          SCHECK_PARTIAL();
1895          MRRETURN(MATCH_NOMATCH);
1896          }
1897      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1898      if (      if (
1899  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1300  for (;;) Line 1901  for (;;)
1901  #endif  #endif
1902         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1903         )         )
1904        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1905      ecode++;      ecode++;
1906      break;      break;
1907    
1908      case OP_WHITESPACE:      case OP_WHITESPACE:
1909      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1910          {
1911          SCHECK_PARTIAL();
1912          MRRETURN(MATCH_NOMATCH);
1913          }
1914      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1915      if (      if (
1916  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1313  for (;;) Line 1918  for (;;)
1918  #endif  #endif
1919         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1920         )         )
1921        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1922      ecode++;      ecode++;
1923      break;      break;
1924    
1925      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1926      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1927          {
1928          SCHECK_PARTIAL();
1929          MRRETURN(MATCH_NOMATCH);
1930          }
1931      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1932      if (      if (
1933  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1326  for (;;) Line 1935  for (;;)
1935  #endif  #endif
1936         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1937         )         )
1938        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1939      ecode++;      ecode++;
1940      break;      break;
1941    
1942      case OP_WORDCHAR:      case OP_WORDCHAR:
1943      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1944          {
1945          SCHECK_PARTIAL();
1946          MRRETURN(MATCH_NOMATCH);
1947          }
1948      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1949      if (      if (
1950  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1339  for (;;) Line 1952  for (;;)
1952  #endif  #endif
1953         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1954         )         )
1955        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1956        ecode++;
1957        break;
1958    
1959        case OP_ANYNL:
1960        if (eptr >= md->end_subject)
1961          {
1962          SCHECK_PARTIAL();
1963          MRRETURN(MATCH_NOMATCH);
1964          }
1965        GETCHARINCTEST(c, eptr);
1966        switch(c)
1967          {
1968          default: MRRETURN(MATCH_NOMATCH);
1969          case 0x000d:
1970          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1971          break;
1972    
1973          case 0x000a:
1974          break;
1975    
1976          case 0x000b:
1977          case 0x000c:
1978          case 0x0085:
1979          case 0x2028:
1980          case 0x2029:
1981          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1982          break;
1983          }
1984        ecode++;
1985        break;
1986    
1987        case OP_NOT_HSPACE:
1988        if (eptr >= md->end_subject)
1989          {
1990          SCHECK_PARTIAL();
1991          MRRETURN(MATCH_NOMATCH);
1992          }
1993        GETCHARINCTEST(c, eptr);
1994        switch(c)
1995          {
1996          default: break;
1997          case 0x09:      /* HT */
1998          case 0x20:      /* SPACE */
1999          case 0xa0:      /* NBSP */
2000          case 0x1680:    /* OGHAM SPACE MARK */
2001          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2002          case 0x2000:    /* EN QUAD */
2003          case 0x2001:    /* EM QUAD */
2004          case 0x2002:    /* EN SPACE */
2005          case 0x2003:    /* EM SPACE */
2006          case 0x2004:    /* THREE-PER-EM SPACE */
2007          case 0x2005:    /* FOUR-PER-EM SPACE */
2008          case 0x2006:    /* SIX-PER-EM SPACE */
2009          case 0x2007:    /* FIGURE SPACE */
2010          case 0x2008:    /* PUNCTUATION SPACE */
2011          case 0x2009:    /* THIN SPACE */
2012          case 0x200A:    /* HAIR SPACE */
2013          case 0x202f:    /* NARROW NO-BREAK SPACE */
2014          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2015          case 0x3000:    /* IDEOGRAPHIC SPACE */
2016          MRRETURN(MATCH_NOMATCH);
2017          }
2018        ecode++;
2019        break;
2020    
2021        case OP_HSPACE:
2022        if (eptr >= md->end_subject)
2023          {
2024          SCHECK_PARTIAL();
2025          MRRETURN(MATCH_NOMATCH);
2026          }
2027        GETCHARINCTEST(c, eptr);
2028        switch(c)
2029          {
2030          default: MRRETURN(MATCH_NOMATCH);
2031          case 0x09:      /* HT */
2032          case 0x20:      /* SPACE */
2033          case 0xa0:      /* NBSP */
2034          case 0x1680:    /* OGHAM SPACE MARK */
2035          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2036          case 0x2000:    /* EN QUAD */
2037          case 0x2001:    /* EM QUAD */
2038          case 0x2002:    /* EN SPACE */
2039          case 0x2003:    /* EM SPACE */
2040          case 0x2004:    /* THREE-PER-EM SPACE */
2041          case 0x2005:    /* FOUR-PER-EM SPACE */
2042          case 0x2006:    /* SIX-PER-EM SPACE */
2043          case 0x2007:    /* FIGURE SPACE */
2044          case 0x2008:    /* PUNCTUATION SPACE */
2045          case 0x2009:    /* THIN SPACE */
2046          case 0x200A:    /* HAIR SPACE */
2047          case 0x202f:    /* NARROW NO-BREAK SPACE */
2048          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2049          case 0x3000:    /* IDEOGRAPHIC SPACE */
2050          break;
2051          }
2052        ecode++;
2053        break;
2054    
2055        case OP_NOT_VSPACE:
2056        if (eptr >= md->end_subject)
2057          {
2058          SCHECK_PARTIAL();
2059          MRRETURN(MATCH_NOMATCH);
2060          }
2061        GETCHARINCTEST(c, eptr);
2062        switch(c)
2063          {
2064          default: break;
2065          case 0x0a:      /* LF */
2066          case 0x0b:      /* VT */
2067          case 0x0c:      /* FF */
2068          case 0x0d:      /* CR */
2069          case 0x85:      /* NEL */
2070          case 0x2028:    /* LINE SEPARATOR */
2071          case 0x2029:    /* PARAGRAPH SEPARATOR */
2072          MRRETURN(MATCH_NOMATCH);
2073          }
2074        ecode++;
2075        break;
2076    
2077        case OP_VSPACE:
2078        if (eptr >= md->end_subject)
2079          {
2080          SCHECK_PARTIAL();
2081          MRRETURN(MATCH_NOMATCH);
2082          }
2083        GETCHARINCTEST(c, eptr);
2084        switch(c)
2085          {
2086          default: MRRETURN(MATCH_NOMATCH);
2087          case 0x0a:      /* LF */
2088          case 0x0b:      /* VT */
2089          case 0x0c:      /* FF */
2090          case 0x0d:      /* CR */
2091          case 0x85:      /* NEL */
2092          case 0x2028:    /* LINE SEPARATOR */
2093          case 0x2029:    /* PARAGRAPH SEPARATOR */
2094          break;
2095          }
2096      ecode++;      ecode++;
2097      break;      break;
2098    
# Line 1349  for (;;) Line 2102  for (;;)
2102    
2103      case OP_PROP:      case OP_PROP:
2104      case OP_NOTPROP:      case OP_NOTPROP:
2105      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2106          {
2107          SCHECK_PARTIAL();
2108          MRRETURN(MATCH_NOMATCH);
2109          }
2110      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2111        {        {
2112        int chartype, rqdtype;        const ucd_record *prop = GET_UCD(c);
       int othercase;  
       int category = ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
2113    
2114        if (rqdtype >= 128)        switch(ecode[1])
         {  
         if ((rqdtype - 128 != category) == (op == OP_PROP))  
           RRETURN(MATCH_NOMATCH);  
         }  
       else  
2115          {          {
2116          if ((rqdtype != chartype) == (op == OP_PROP))          case PT_ANY:
2117            RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2118            break;
2119    
2120            case PT_LAMP:
2121            if ((prop->chartype == ucp_Lu ||
2122                 prop->chartype == ucp_Ll ||
2123                 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2124              MRRETURN(MATCH_NOMATCH);
2125            break;
2126    
2127            case PT_GC:
2128            if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2129              MRRETURN(MATCH_NOMATCH);
2130            break;
2131    
2132            case PT_PC:
2133            if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2134              MRRETURN(MATCH_NOMATCH);
2135            break;
2136    
2137            case PT_SC:
2138            if ((ecode[2] != prop->script) == (op == OP_PROP))
2139              MRRETURN(MATCH_NOMATCH);
2140            break;
2141    
2142            /* These are specials */
2143    
2144            case PT_ALNUM:
2145            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2146                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2147              MRRETURN(MATCH_NOMATCH);
2148            break;
2149    
2150            case PT_SPACE:    /* Perl space */
2151            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2152                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2153                   == (op == OP_NOTPROP))
2154              MRRETURN(MATCH_NOMATCH);
2155            break;
2156    
2157            case PT_PXSPACE:  /* POSIX space */
2158            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2159                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2160                 c == CHAR_FF || c == CHAR_CR)
2161                   == (op == OP_NOTPROP))
2162              MRRETURN(MATCH_NOMATCH);
2163            break;
2164    
2165            case PT_WORD:
2166            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2167                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2168                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2169              MRRETURN(MATCH_NOMATCH);
2170            break;
2171    
2172            /* This should never occur */
2173    
2174            default:
2175            RRETURN(PCRE_ERROR_INTERNAL);
2176          }          }
2177    
2178          ecode += 3;
2179        }        }
2180      break;      break;
2181    
# Line 1376  for (;;) Line 2183  for (;;)
2183      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2184    
2185      case OP_EXTUNI:      case OP_EXTUNI:
2186      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2187          {
2188          SCHECK_PARTIAL();
2189          MRRETURN(MATCH_NOMATCH);
2190          }
2191      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2192        {        {
2193        int chartype;        int category = UCD_CATEGORY(c);
2194        int othercase;        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       int category = ucp_findchar(c, &chartype, &othercase);  
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2195        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2196          {          {
2197          int len = 1;          int len = 1;
# Line 1390  for (;;) Line 2199  for (;;)
2199            {            {
2200            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2201            }            }
2202          category = ucp_findchar(c, &chartype, &othercase);          category = UCD_CATEGORY(c);
2203          if (category != ucp_M) break;          if (category != ucp_M) break;
2204          eptr += len;          eptr += len;
2205          }          }
# Line 1411  for (;;) Line 2220  for (;;)
2220      case OP_REF:      case OP_REF:
2221        {        {
2222        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2223        ecode += 3;                                 /* Advance past item */        ecode += 3;
2224    
2225          /* If the reference is unset, there are two possibilities:
2226    
2227        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
2228        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
2229        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
2230        minima. */        quantifiers with zero minima.
2231    
2232        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
2233          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
2234          md->offset_vector[offset+1] - md->offset_vector[offset];  
2235          Otherwise, set the length to the length of what was matched by the
2236          referenced subpattern. */
2237    
2238          if (offset >= offset_top || md->offset_vector[offset] < 0)
2239            length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2240          else
2241            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2242    
2243        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2244    
# Line 1449  for (;;) Line 2267  for (;;)
2267          break;          break;
2268    
2269          default:               /* No repeat follows */          default:               /* No repeat follows */
2270          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2271              {
2272              CHECK_PARTIAL();
2273              MRRETURN(MATCH_NOMATCH);
2274              }
2275          eptr += length;          eptr += length;
2276          continue;              /* With the main loop */          continue;              /* With the main loop */
2277          }          }
# Line 1465  for (;;) Line 2287  for (;;)
2287    
2288        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2289          {          {
2290          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2291              {
2292              CHECK_PARTIAL();
2293              MRRETURN(MATCH_NOMATCH);
2294              }
2295          eptr += length;          eptr += length;
2296          }          }
2297    
# Line 1480  for (;;) Line 2306  for (;;)
2306          {          {
2307          for (fi = min;; fi++)          for (fi = min;; fi++)
2308            {            {
2309            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2310            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2311            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2312              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2313                {
2314                CHECK_PARTIAL();
2315                MRRETURN(MATCH_NOMATCH);
2316                }
2317            eptr += length;            eptr += length;
2318            }            }
2319          /* Control never gets here */          /* Control never gets here */
# Line 1496  for (;;) Line 2326  for (;;)
2326          pp = eptr;          pp = eptr;
2327          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2328            {            {
2329            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2330                {
2331                CHECK_PARTIAL();
2332                break;
2333                }
2334            eptr += length;            eptr += length;
2335            }            }
2336          while (eptr >= pp)          while (eptr >= pp)
2337            {            {
2338            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2339            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2340            eptr -= length;            eptr -= length;
2341            }            }
2342          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2343          }          }
2344        }        }
2345      /* Control never gets here */      /* Control never gets here */
2346    
   
   
2347      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2348      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2349      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1566  for (;;) Line 2398  for (;;)
2398          {          {
2399          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2400            {            {
2401            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2402                {
2403                SCHECK_PARTIAL();
2404                MRRETURN(MATCH_NOMATCH);
2405                }
2406            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2407            if (c > 255)            if (c > 255)
2408              {              {
2409              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2410              }              }
2411            else            else
2412              {              {
2413              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2414              }              }
2415            }            }
2416          }          }
# Line 1584  for (;;) Line 2420  for (;;)
2420          {          {
2421          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2422            {            {
2423            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2424                {
2425                SCHECK_PARTIAL();
2426                MRRETURN(MATCH_NOMATCH);
2427                }
2428            c = *eptr++;            c = *eptr++;
2429            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2430            }            }
2431          }          }
2432    
# Line 1606  for (;;) Line 2446  for (;;)
2446            {            {
2447            for (fi = min;; fi++)            for (fi = min;; fi++)
2448              {              {
2449              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2450              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2451              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2452                if (eptr >= md->end_subject)
2453                  {
2454                  SCHECK_PARTIAL();
2455                  MRRETURN(MATCH_NOMATCH);
2456                  }
2457              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2458              if (c > 255)              if (c > 255)
2459                {                {
2460                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2461                }                }
2462              else              else
2463                {                {
2464                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2465                }                }
2466              }              }
2467            }            }
# Line 1626  for (;;) Line 2471  for (;;)
2471            {            {
2472            for (fi = min;; fi++)            for (fi = min;; fi++)
2473              {              {
2474              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2475              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2476              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2477                if (eptr >= md->end_subject)
2478                  {
2479                  SCHECK_PARTIAL();
2480                  MRRETURN(MATCH_NOMATCH);
2481                  }
2482              c = *eptr++;              c = *eptr++;
2483              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2484              }              }
2485            }            }
2486          /* Control never gets here */          /* Control never gets here */
# Line 1649  for (;;) Line 2499  for (;;)
2499            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2500              {              {
2501              int len = 1;              int len = 1;
2502              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2503                  {
2504                  SCHECK_PARTIAL();
2505                  break;
2506                  }
2507              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2508              if (c > 255)              if (c > 255)
2509                {                {
# Line 1663  for (;;) Line 2517  for (;;)
2517              }              }
2518            for (;;)            for (;;)
2519              {              {
2520              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2521              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2522              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2523              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1675  for (;;) Line 2529  for (;;)
2529            {            {
2530            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2531              {              {
2532              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2533                  {
2534                  SCHECK_PARTIAL();
2535                  break;
2536                  }
2537              c = *eptr;              c = *eptr;
2538              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2539              eptr++;              eptr++;
2540              }              }
2541            while (eptr >= pp)            while (eptr >= pp)
2542              {              {
2543              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
2544              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2545                eptr--;
2546              }              }
2547            }            }
2548    
2549          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2550          }          }
2551        }        }
2552      /* Control never gets here */      /* Control never gets here */
2553    
2554    
2555      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2556      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2557        mode, because Unicode properties are supported in non-UTF-8 mode. */
2558    
2559  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2560      case OP_XCLASS:      case OP_XCLASS:
# Line 1736  for (;;) Line 2595  for (;;)
2595    
2596        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2597          {          {
2598          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2599          GETCHARINC(c, eptr);            {
2600          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2601              MRRETURN(MATCH_NOMATCH);
2602              }
2603            GETCHARINCTEST(c, eptr);
2604            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2605          }          }
2606    
2607        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1753  for (;;) Line 2616  for (;;)
2616          {          {
2617          for (fi = min;; fi++)          for (fi = min;; fi++)
2618            {            {
2619            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2620            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2621            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2622            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2623            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2624                SCHECK_PARTIAL();
2625                MRRETURN(MATCH_NOMATCH);
2626                }
2627              GETCHARINCTEST(c, eptr);
2628              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2629            }            }
2630          /* Control never gets here */          /* Control never gets here */
2631          }          }
# Line 1770  for (;;) Line 2638  for (;;)
2638          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2639            {            {
2640            int len = 1;            int len = 1;
2641            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2642            GETCHARLEN(c, eptr, len);              {
2643                SCHECK_PARTIAL();
2644                break;
2645                }
2646              GETCHARLENTEST(c, eptr, len);
2647            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2648            eptr += len;            eptr += len;
2649            }            }
2650          for(;;)          for(;;)
2651            {            {
2652            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2653            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2654            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2655            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2656            }            }
2657          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2658          }          }
2659    
2660        /* Control never gets here */        /* Control never gets here */
# Line 1798  for (;;) Line 2670  for (;;)
2670        length = 1;        length = 1;
2671        ecode++;        ecode++;
2672        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2673        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2674        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2675            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2676            MRRETURN(MATCH_NOMATCH);
2677            }
2678          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2679        }        }
2680      else      else
2681  #endif  #endif
2682    
2683      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2684        {        {
2685        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2686        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2687            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2688            MRRETURN(MATCH_NOMATCH);
2689            }
2690          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2691        ecode += 2;        ecode += 2;
2692        }        }
2693      break;      break;
# Line 1822  for (;;) Line 2702  for (;;)
2702        ecode++;        ecode++;
2703        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2704    
2705        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2706            {
2707            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2708            MRRETURN(MATCH_NOMATCH);
2709            }
2710    
2711        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2712        can use the fast lookup table. */        can use the fast lookup table. */
2713    
2714        if (fc < 128)        if (fc < 128)
2715          {          {
2716          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2717          }          }
2718    
2719        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2720    
2721        else        else
2722          {          {
2723          int dc;          unsigned int dc;
2724          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2725          ecode += length;          ecode += length;
2726    
2727          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2728          case of the character, if there is one. The result of ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2729    
2730          if (fc != dc)          if (fc != dc)
2731            {            {
2732  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2733            int chartype;            if (dc != UCD_OTHERCASE(fc))
           int othercase;  
           if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2734  #endif  #endif
2735              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2736            }            }
2737          }          }
2738        }        }
# Line 1861  for (;;) Line 2741  for (;;)
2741    
2742      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2743        {        {
2744        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2745        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2746            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2747            MRRETURN(MATCH_NOMATCH);
2748            }
2749          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2750        ecode += 2;        ecode += 2;
2751        }        }
2752      break;      break;
2753    
2754      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2755    
2756      case OP_EXACT:      case OP_EXACT:
2757      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2758      ecode += 3;      ecode += 3;
2759      goto REPEATCHAR;      goto REPEATCHAR;
2760    
2761        case OP_POSUPTO:
2762        possessive = TRUE;
2763        /* Fall through */
2764    
2765      case OP_UPTO:      case OP_UPTO:
2766      case OP_MINUPTO:      case OP_MINUPTO:
2767      min = 0;      min = 0;
# Line 1882  for (;;) Line 2770  for (;;)
2770      ecode += 3;      ecode += 3;
2771      goto REPEATCHAR;      goto REPEATCHAR;
2772    
2773        case OP_POSSTAR:
2774        possessive = TRUE;
2775        min = 0;
2776        max = INT_MAX;
2777        ecode++;
2778        goto REPEATCHAR;
2779    
2780        case OP_POSPLUS:
2781        possessive = TRUE;
2782        min = 1;
2783        max = INT_MAX;
2784        ecode++;
2785        goto REPEATCHAR;
2786    
2787        case OP_POSQUERY:
2788        possessive = TRUE;
2789        min = 0;
2790        max = 1;
2791        ecode++;
2792        goto REPEATCHAR;
2793    
2794      case OP_STAR:      case OP_STAR:
2795      case OP_MINSTAR:      case OP_MINSTAR:
2796      case OP_PLUS:      case OP_PLUS:
# Line 1890  for (;;) Line 2799  for (;;)
2799      case OP_MINQUERY:      case OP_MINQUERY:
2800      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2801      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2802    
2803      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2804      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2805      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2806    
2807      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2808    
2809      REPEATCHAR:      REPEATCHAR:
2810  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1905  for (;;) Line 2813  for (;;)
2813        length = 1;        length = 1;
2814        charptr = ecode;        charptr = ecode;
2815        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2816        ecode += length;        ecode += length;
2817    
2818        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1913  for (;;) Line 2820  for (;;)
2820    
2821        if (length > 1)        if (length > 1)
2822          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2823  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2824          int othercase;          unsigned int othercase;
         int chartype;  
2825          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2826               ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase > 0)  
2827            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2828            else oclength = 0;
2829  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2830    
2831          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2832            {            {
2833            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2834            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2835            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2836              else if (oclength > 0 &&
2837                       eptr <= md->end_subject - oclength &&
2838                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2839    #endif  /* SUPPORT_UCP */
2840            else            else
2841              {              {
2842              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2843              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2844              }              }
2845            }            }
2846    
# Line 1943  for (;;) Line 2850  for (;;)
2850            {            {
2851            for (fi = min;; fi++)            for (fi = min;; fi++)
2852              {              {
2853              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2854              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2855              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2856              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2857              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2858              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2859                else if (oclength > 0 &&
2860                         eptr <= md->end_subject - oclength &&
2861                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2862    #endif  /* SUPPORT_UCP */
2863              else              else
2864                {                {
2865                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2866                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2867                }                }
2868              }              }
2869            /* Control never gets here */            /* Control never gets here */
2870            }            }
2871          else  
2872            else  /* Maximize */
2873            {            {
2874            pp = eptr;            pp = eptr;
2875            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2876              {              {
2877              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2878              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2879              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2880                else if (oclength > 0 &&
2881                         eptr <= md->end_subject - oclength &&
2882                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2883    #endif  /* SUPPORT_UCP */
2884              else              else
2885                {                {
2886                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2887                eptr += oclength;                break;
2888                }                }
2889              }              }
2890            while (eptr >= pp)  
2891             {            if (possessive) continue;
2892             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2893             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2894             eptr -= length;              {
2895             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2896            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2897                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2898    #ifdef SUPPORT_UCP
2899                eptr--;
2900                BACKCHAR(eptr);
2901    #else   /* without SUPPORT_UCP */
2902                eptr -= length;
2903    #endif  /* SUPPORT_UCP */
2904                }
2905            }            }
2906          /* Control never gets here */          /* Control never gets here */
2907          }          }
# Line 1990  for (;;) Line 2914  for (;;)
2914  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2915    
2916      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2917        {  
2918        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2919    
2920      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2921      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2011  for (;;) Line 2933  for (;;)
2933        {        {
2934        fc = md->lcc[fc];        fc = md->lcc[fc];
2935        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2936          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2937            if (eptr >= md->end_subject)
2938              {
2939              SCHECK_PARTIAL();
2940              MRRETURN(MATCH_NOMATCH);
2941              }
2942            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2943            }
2944        if (min == max) continue;        if (min == max) continue;
2945        if (minimize)        if (minimize)
2946          {          {
2947          for (fi = min;; fi++)          for (fi = min;; fi++)
2948            {            {
2949            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2950            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2951            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2952                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2953              RRETURN(MATCH_NOMATCH);              {
2954            }              SCHECK_PARTIAL();
2955                MRRETURN(MATCH_NOMATCH);
2956                }
2957              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2958              }
2959          /* Control never gets here */          /* Control never gets here */
2960          }          }
2961        else        else  /* Maximize */
2962          {          {
2963          pp = eptr;          pp = eptr;
2964          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2965            {            {
2966            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2967                {
2968                SCHECK_PARTIAL();
2969                break;
2970                }
2971              if (fc != md->lcc[*eptr]) break;
2972            eptr++;            eptr++;
2973            }            }
2974    
2975            if (possessive) continue;
2976    
2977          while (eptr >= pp)          while (eptr >= pp)
2978            {            {
2979            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2980            eptr--;            eptr--;
2981            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2982            }            }
2983          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2984          }          }
2985        /* Control never gets here */        /* Control never gets here */
2986        }        }
# Line 2048  for (;;) Line 2989  for (;;)
2989    
2990      else      else
2991        {        {
2992        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2993            {
2994            if (eptr >= md->end_subject)
2995              {
2996              SCHECK_PARTIAL();
2997              MRRETURN(MATCH_NOMATCH);
2998              }
2999            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3000            }
3001    
3002        if (min == max) continue;        if (min == max) continue;
3003    
3004        if (minimize)        if (minimize)
3005          {          {
3006          for (fi = min;; fi++)          for (fi = min;; fi++)
3007            {            {
3008            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3009            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3010            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3011              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3012                {
3013                SCHECK_PARTIAL();
3014                MRRETURN(MATCH_NOMATCH);
3015                }
3016              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3017            }            }
3018          /* Control never gets here */          /* Control never gets here */
3019          }          }
3020        else        else  /* Maximize */
3021          {          {
3022          pp = eptr;          pp = eptr;
3023          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3024            {            {
3025            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3026                {
3027                SCHECK_PARTIAL();
3028                break;
3029                }
3030              if (fc != *eptr) break;
3031            eptr++;            eptr++;
3032            }            }
3033            if (possessive) continue;
3034    
3035          while (eptr >= pp)          while (eptr >= pp)
3036            {            {
3037            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3038            eptr--;            eptr--;
3039            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040            }            }
3041          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3042          }          }
3043        }        }
3044      /* Control never gets here */      /* Control never gets here */
# Line 2084  for (;;) Line 3047  for (;;)
3047      checking can be multibyte. */      checking can be multibyte. */
3048    
3049      case OP_NOT:      case OP_NOT:
3050      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3051          {
3052          SCHECK_PARTIAL();
3053          MRRETURN(MATCH_NOMATCH);
3054          }
3055      ecode++;      ecode++;
3056      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3057      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2093  for (;;) Line 3060  for (;;)
3060        if (c < 256)        if (c < 256)
3061  #endif  #endif
3062        c = md->lcc[c];        c = md->lcc[c];
3063        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3064        }        }
3065      else      else
3066        {        {
3067        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3068        }        }
3069      break;      break;
3070    
# Line 2121  for (;;) Line 3088  for (;;)
3088      ecode += 3;      ecode += 3;
3089      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3090    
3091        case OP_NOTPOSSTAR:
3092        possessive = TRUE;
3093        min = 0;
3094        max = INT_MAX;
3095        ecode++;
3096        goto REPEATNOTCHAR;
3097    
3098        case OP_NOTPOSPLUS:
3099        possessive = TRUE;
3100        min = 1;
3101        max = INT_MAX;
3102        ecode++;
3103        goto REPEATNOTCHAR;
3104    
3105        case OP_NOTPOSQUERY:
3106        possessive = TRUE;
3107        min = 0;
3108        max = 1;
3109        ecode++;
3110        goto REPEATNOTCHAR;
3111    
3112        case OP_NOTPOSUPTO:
3113        possessive = TRUE;
3114        min = 0;
3115        max = GET2(ecode, 1);
3116        ecode += 3;
3117        goto REPEATNOTCHAR;
3118    
3119      case OP_NOTSTAR:      case OP_NOTSTAR:
3120      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3121      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2133  for (;;) Line 3128  for (;;)
3128      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3129      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3130    
3131      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3132    
3133      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3134      fc = *ecode++;      fc = *ecode++;
3135    
3136      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2160  for (;;) Line 3152  for (;;)
3152        /* UTF-8 mode */        /* UTF-8 mode */
3153        if (utf8)        if (utf8)
3154          {          {
3155          register int d;          register unsigned int d;
3156          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3157            {            {
3158              if (eptr >= md->end_subject)
3159                {
3160                SCHECK_PARTIAL();
3161                MRRETURN(MATCH_NOMATCH);
3162                }
3163            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3164            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3165            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3166            }            }
3167          }          }
3168        else        else
# Line 2174  for (;;) Line 3171  for (;;)
3171        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3172          {          {
3173          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3174            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3175              if (eptr >= md->end_subject)
3176                {
3177                SCHECK_PARTIAL();
3178                MRRETURN(MATCH_NOMATCH);
3179                }
3180              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3181              }
3182          }          }
3183    
3184        if (min == max) continue;        if (min == max) continue;
# Line 2185  for (;;) Line 3189  for (;;)
3189          /* UTF-8 mode */          /* UTF-8 mode */
3190          if (utf8)          if (utf8)
3191            {            {
3192            register int d;            register unsigned int d;
3193            for (fi = min;; fi++)            for (fi = min;; fi++)
3194              {              {
3195              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3196              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3197                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3198                if (eptr >= md->end_subject)
3199                  {
3200                  SCHECK_PARTIAL();
3201                  MRRETURN(MATCH_NOMATCH);
3202                  }
3203              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3204              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3205              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3206              }              }
3207            }            }
3208          else          else
# Line 2202  for (;;) Line 3211  for (;;)
3211            {            {
3212            for (fi = min;; fi++)            for (fi = min;; fi++)
3213              {              {
3214              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3215              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3216              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3217                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3218                  {
3219                  SCHECK_PARTIAL();
3220                  MRRETURN(MATCH_NOMATCH);
3221                  }
3222                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3223              }              }
3224            }            }
3225          /* Control never gets here */          /* Control never gets here */
# Line 2221  for (;;) Line 3235  for (;;)
3235          /* UTF-8 mode */          /* UTF-8 mode */
3236          if (utf8)          if (utf8)
3237            {            {
3238            register int d;            register unsigned int d;
3239            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3240              {              {
3241              int len = 1;              int len = 1;
3242              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3243                  {
3244                  SCHECK_PARTIAL();
3245                  break;
3246                  }
3247              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3248              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3249              if (fc == d) break;              if (fc == d) break;
3250              eptr += len;              eptr += len;
3251              }              }
3252            for(;;)          if (possessive) continue;
3253            for(;;)
3254              {              {
3255              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3256              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3257              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3258              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2245  for (;;) Line 3264  for (;;)
3264            {            {
3265            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3266              {              {
3267              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3268                  {
3269                  SCHECK_PARTIAL();
3270                  break;
3271                  }
3272                if (fc == md->lcc[*eptr]) break;
3273              eptr++;              eptr++;
3274              }              }
3275              if (possessive) continue;
3276            while (eptr >= pp)            while (eptr >= pp)
3277              {              {
3278              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3279              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3280              eptr--;              eptr--;
3281              }              }
3282            }            }
3283    
3284          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3285          }          }
3286        /* Control never gets here */        /* Control never gets here */
3287        }        }
# Line 2269  for (;;) Line 3294  for (;;)
3294        /* UTF-8 mode */        /* UTF-8 mode */
3295        if (utf8)        if (utf8)
3296          {          {
3297          register int d;          register unsigned int d;
3298          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3299            {            {
3300              if (eptr >= md->end_subject)
3301                {
3302                SCHECK_PARTIAL();
3303                MRRETURN(MATCH_NOMATCH);
3304                }
3305            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3306            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3307            }            }
3308          }          }
3309        else        else
# Line 2281  for (;;) Line 3311  for (;;)
3311        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3312          {          {
3313          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3314            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3315              if (eptr >= md->end_subject)
3316                {
3317                SCHECK_PARTIAL();
3318                MRRETURN(MATCH_NOMATCH);
3319                }
3320              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3321              }
3322          }          }
3323    
3324        if (min == max) continue;        if (min == max) continue;
# Line 2292  for (;;) Line 3329  for (;;)
3329          /* UTF-8 mode */          /* UTF-8 mode */
3330          if (utf8)          if (utf8)
3331            {            {
3332            register int d;            register unsigned int d;
3333            for (fi = min;; fi++)            for (fi = min;; fi++)
3334              {              {
3335              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3336              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3337                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3338                if (eptr >= md->end_subject)
3339                  {
3340                  SCHECK_PARTIAL();
3341                  MRRETURN(MATCH_NOMATCH);
3342                  }
3343              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3344              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3345              }              }
3346            }            }
3347          else          else
# Line 2308  for (;;) Line 3350  for (;;)
3350            {            {
3351            for (fi = min;; fi++)            for (fi = min;; fi++)
3352              {              {
3353              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3354              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3355              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3356                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3357                  {
3358                  SCHECK_PARTIAL();
3359                  MRRETURN(MATCH_NOMATCH);
3360                  }
3361                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3362              }              }
3363            }            }
3364          /* Control never gets here */          /* Control never gets here */
# Line 2327  for (;;) Line 3374  for (;;)
3374          /* UTF-8 mode */          /* UTF-8 mode */
3375          if (utf8)          if (utf8)
3376            {            {
3377            register int d;            register unsigned int d;
3378            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3379              {              {
3380              int len = 1;              int len = 1;
3381              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3382                  {
3383                  SCHECK_PARTIAL();
3384                  break;
3385                  }
3386              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3387              if (fc == d) break;              if (fc == d) break;
3388              eptr += len;              eptr += len;
3389              }              }
3390              if (possessive) continue;
3391            for(;;)            for(;;)
3392              {              {
3393              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3394              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3395              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3396              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2350  for (;;) Line 3402  for (;;)
3402            {            {
3403            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3404              {              {
3405              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3406                  {
3407                  SCHECK_PARTIAL();
3408                  break;
3409                  }
3410                if (fc == *eptr) break;
3411              eptr++;              eptr++;
3412              }              }
3413              if (possessive) continue;
3414            while (eptr >= pp)            while (eptr >= pp)
3415              {              {
3416              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3417              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3418              eptr--;              eptr--;
3419              }              }
3420            }            }
3421    
3422          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3423          }          }
3424        }        }
3425      /* Control never gets here */      /* Control never gets here */
# Line 2384  for (;;) Line 3442  for (;;)
3442      ecode += 3;      ecode += 3;
3443      goto REPEATTYPE;      goto REPEATTYPE;
3444    
3445        case OP_TYPEPOSSTAR:
3446        possessive = TRUE;
3447        min = 0;
3448        max = INT_MAX;
3449        ecode++;
3450        goto REPEATTYPE;
3451    
3452        case OP_TYPEPOSPLUS:
3453        possessive = TRUE;
3454        min = 1;
3455        max = INT_MAX;
3456        ecode++;
3457        goto REPEATTYPE;
3458    
3459        case OP_TYPEPOSQUERY:
3460        possessive = TRUE;
3461        min = 0;
3462        max = 1;
3463        ecode++;
3464        goto REPEATTYPE;
3465    
3466        case OP_TYPEPOSUPTO:
3467        possessive = TRUE;
3468        min = 0;
3469        max = GET2(ecode, 1);
3470        ecode += 3;
3471        goto REPEATTYPE;
3472    
3473      case OP_TYPESTAR:      case OP_TYPESTAR:
3474      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3475      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 3494  for (;;)
3494        {        {
3495        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
3496        prop_type = *ecode++;        prop_type = *ecode++;
3497        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
3498        }        }
3499      else prop_type = -1;      else prop_type = -1;
3500  #endif  #endif
3501    
3502      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3503      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3504      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3505      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3506      and single-bytes. */      and single-bytes. */
3507    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3508      if (min > 0)      if (min > 0)
3509        {        {
3510  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3511        if (prop_type > 0)        if (prop_type >= 0)
3512          {          {
3513          for (i = 1; i <= min; i++)          switch(prop_type)
3514            {            {
3515            GETCHARINC(c, eptr);            case PT_ANY:
3516            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3517            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
3518              RRETURN(MATCH_NOMATCH);              {
3519                if (eptr >= md->end_subject)
3520                  {
3521                  SCHECK_PARTIAL();
3522                  MRRETURN(MATCH_NOMATCH);
3523                  }
3524                GETCHARINCTEST(c, eptr);
3525                }
3526              break;
3527    
3528              case PT_LAMP:
3529              for (i = 1; i <= min; i++)
3530                {
3531                if (eptr >= md->end_subject)
3532                  {
3533                  SCHECK_PARTIAL();
3534                  MRRETURN(MATCH_NOMATCH);
3535                  }
3536                GETCHARINCTEST(c, eptr);
3537                prop_chartype = UCD_CHARTYPE(c);
3538                if ((prop_chartype == ucp_Lu ||
3539                     prop_chartype == ucp_Ll ||
3540                     prop_chartype == ucp_Lt) == prop_fail_result)
3541                  MRRETURN(MATCH_NOMATCH);
3542                }
3543              break;
3544    
3545              case PT_GC:
3546              for (i = 1; i <= min; i++)
3547                {
3548                if (eptr >= md->end_subject)
3549                  {
3550                  SCHECK_PARTIAL();
3551                  MRRETURN(MATCH_NOMATCH);
3552                  }
3553                GETCHARINCTEST(c, eptr);
3554                prop_category = UCD_CATEGORY(c);
3555                if ((prop_category == prop_value) == prop_fail_result)
3556                  MRRETURN(MATCH_NOMATCH);
3557                }
3558              break;
3559    
3560              case PT_PC:
3561              for (i = 1; i <= min; i++)
3562                {
3563                if (eptr >= md->end_subject)
3564                  {
3565                  SCHECK_PARTIAL();
3566                  MRRETURN(MATCH_NOMATCH);
3567                  }
3568                GETCHARINCTEST(c, eptr);
3569                prop_chartype = UCD_CHARTYPE(c);
3570                if ((prop_chartype == prop_value) == prop_fail_result)
3571                  MRRETURN(MATCH_NOMATCH);
3572                }
3573              break;
3574    
3575              case PT_SC:
3576              for (i = 1; i <= min; i++)
3577                {
3578                if (eptr >= md->end_subject)
3579                  {
3580                  SCHECK_PARTIAL();
3581                  MRRETURN(MATCH_NOMATCH);
3582                  }
3583                GETCHARINCTEST(c, eptr);
3584                prop_script = UCD_SCRIPT(c);
3585                if ((prop_script == prop_value) == prop_fail_result)
3586                  MRRETURN(MATCH_NOMATCH);
3587                }
3588              break;
3589    
3590              case PT_ALNUM:
3591              for (i = 1; i <= min; i++)
3592                {
3593                if (eptr >= md->end_subject)
3594                  {
3595                  SCHECK_PARTIAL();
3596                  MRRETURN(MATCH_NOMATCH);
3597                  }
3598                GETCHARINCTEST(c, eptr);
3599                prop_category = UCD_CATEGORY(c);
3600                if ((prop_category == ucp_L || prop_category == ucp_N)
3601                       == prop_fail_result)
3602                  MRRETURN(MATCH_NOMATCH);
3603                }
3604              break;
3605    
3606              case PT_SPACE:    /* Perl space */
3607              for (i = 1; i <= min; i++)
3608                {
3609                if (eptr >= md->end_subject)
3610                  {
3611                  SCHECK_PARTIAL();
3612                  MRRETURN(MATCH_NOMATCH);
3613                  }
3614                GETCHARINCTEST(c, eptr);
3615                prop_category = UCD_CATEGORY(c);
3616                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3617                     c == CHAR_FF || c == CHAR_CR)
3618                       == prop_fail_result)
3619                  MRRETURN(MATCH_NOMATCH);
3620                }
3621              break;
3622    
3623              case PT_PXSPACE:  /* POSIX space */
3624              for (i = 1; i <= min; i++)
3625                {
3626                if (eptr >= md->end_subject)
3627                  {
3628                  SCHECK_PARTIAL();
3629                  MRRETURN(MATCH_NOMATCH);
3630                  }
3631                GETCHARINCTEST(c, eptr);
3632                prop_category = UCD_CATEGORY(c);
3633                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3634                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3635                       == prop_fail_result)
3636                  MRRETURN(MATCH_NOMATCH);
3637                }
3638              break;
3639    
3640              case PT_WORD:
3641              for (i = 1; i <= min; i++)
3642                {
3643                if (eptr >= md->end_subject)
3644                  {
3645                  SCHECK_PARTIAL();
3646                  MRRETURN(MATCH_NOMATCH);
3647                  }
3648                GETCHARINCTEST(c, eptr);
3649                prop_category = UCD_CATEGORY(c);
3650                if ((prop_category == ucp_L || prop_category == ucp_N ||
3651                     c == CHAR_UNDERSCORE)
3652                       == prop_fail_result)
3653                  MRRETURN(MATCH_NOMATCH);
3654                }
3655              break;
3656    
3657              /* This should not occur */
3658    
3659              default:
3660              RRETURN(PCRE_ERROR_INTERNAL);
3661            }            }
3662          }          }
3663    
# Line 2452  for (;;) Line 3668  for (;;)
3668          {          {
3669          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3670            {            {
3671              if (eptr >= md->end_subject)
3672                {
3673                SCHECK_PARTIAL();
3674                MRRETURN(MATCH_NOMATCH);
3675                }
3676            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3677            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = UCD_CATEGORY(c);
3678            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3679            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3680              {              {
3681              int len = 1;              int len = 1;
3682              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3683                {                else { GETCHARLEN(c, eptr, len); }
3684                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);  
3685              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3686              eptr += len;              eptr += len;
3687              }              }
# Line 2480  for (;;) Line 3699  for (;;)
3699          case OP_ANY:          case OP_ANY:
3700          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3701            {            {
3702            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3703               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))              {
3704              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3705                MRRETURN(MATCH_NOMATCH);
3706