/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 510 by ph10, Sat Mar 27 17:45:29 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_SKIP_ARG     (-996)
78    #define MATCH_THEN         (-995)
79    
80    /* This is a convenience macro for code that occurs many times. */
81    
82    #define MRRETURN(ra) \
83      { \
84      md->mark = markptr; \
85      RRETURN(ra); \
86      }
87    
88  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
89  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
90  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 81  static const char rep_max[] = { 0, 0, 0, Line 98  static const char rep_max[] = { 0, 0, 0,
98    
99    
100    
101  #ifdef DEBUG  #ifdef PCRE_DEBUG
102  /*************************************************  /*************************************************
103  *        Debugging function to print chars       *  *        Debugging function to print chars       *
104  *************************************************/  *************************************************/
# Line 101  Returns:     nothing Line 118  Returns:     nothing
118  static void  static void
119  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
120  {  {
121  int c;  unsigned int c;
122  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
123  while (length-- > 0)  while (length-- > 0)
124    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 133  match_ref(int offset, register USPTR ept Line 150  match_ref(int offset, register USPTR ept
150  {  {
151  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef DEBUG  #ifdef PCRE_DEBUG
154  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
155    printf("matching subject <null>");    printf("matching subject <null>");
156  else  else
# Line 150  printf("\n"); Line 167  printf("\n");
167    
168  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
169    
170  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
171    properly if Unicode properties are supported. Otherwise, we can check only
172    ASCII characters. */
173    
174  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
175    {    {
176    #ifdef SUPPORT_UTF8
177    #ifdef SUPPORT_UCP
178      if (md->utf8)
179        {
180        USPTR endptr = eptr + length;
181        while (eptr < endptr)
182          {
183          int c, d;
184          GETCHARINC(c, eptr);
185          GETCHARINC(d, p);
186          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
187          }
188        }
189      else
190    #endif
191    #endif
192    
193      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
194      is no UCP support. */
195    
196    while (length-- > 0)    while (length-- > 0)
197      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
198    }    }
199    
200    /* In the caseful case, we can just compare the bytes, whether or not we
201    are in UTF-8 mode. */
202    
203  else  else
204    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
205    
# Line 186  calls by keeping local variables that ne Line 229  calls by keeping local variables that ne
229  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
230  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
231  always used to.  always used to.
232    
233    The original heap-recursive code used longjmp(). However, it seems that this
234    can be very slow on some operating systems. Following a suggestion from Stan
235    Switzer, the use of longjmp() has been abolished, at the cost of having to
236    provide a unique number for each call to RMATCH. There is no way of generating
237    a sequence of numbers at compile time in C. I have given them names, to make
238    them stand out more clearly.
239    
240    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
241    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
242    tests. Furthermore, not using longjmp() means that local dynamic variables
243    don't have indeterminate values; this has meant that the frame size can be
244    reduced because the result can be "passed back" by straight setting of the
245    variable instead of being passed in the frame.
246  ****************************************************************************  ****************************************************************************
247  ***************************************************************************/  ***************************************************************************/
248    
249    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
250    below must be updated in sync.  */
251    
252    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
253           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
254           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
255           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
256           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
257           RM51,  RM52, RM53, RM54 };
258    
259  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
260  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
261    actually used in this definition. */
262    
263  #ifndef NO_RECURSE  #ifndef NO_RECURSE
264  #define REGISTER register  #define REGISTER register
265  #ifdef DEBUG  
266  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
267    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
268    { \    { \
269    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
270    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
271    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
272    }    }
273  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 276  versions and production versions. */
276    return ra; \    return ra; \
277    }    }
278  #else  #else
279  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
280    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
281  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
282  #endif  #endif
283    
284  #else  #else
285    
286    
287  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
288  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
289  match(), which never changes. */  argument of match(), which never changes. */
290    
291  #define REGISTER  #define REGISTER
292    
293  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
294    {\    {\
295    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
296    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
297      {\    newframe->Xeptr = ra;\
298      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
299      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
300      newframe->Xoffset_top = rc;\    newframe->Xmarkptr = markptr;\
301      newframe->Xims = re;\    newframe->Xoffset_top = rc;\
302      newframe->Xeptrb = rf;\    newframe->Xims = re;\
303      newframe->Xflags = rg;\    newframe->Xeptrb = rf;\
304      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xflags = rg;\
305      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
306      frame = newframe;\    newframe->Xprevframe = frame;\
307      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
308      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
309      }\    goto HEAP_RECURSE;\
310    else\    L_##rw:\
311      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
312    }    }
313    
314  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 254  match(), which never changes. */ Line 318  match(), which never changes. */
318    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
319    if (frame != NULL)\    if (frame != NULL)\
320      {\      {\
321      frame->Xresult = ra;\      rrc = ra;\
322      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
323      }\      }\
324    return ra;\    return ra;\
325    }    }
# Line 269  typedef struct heapframe { Line 332  typedef struct heapframe {
332    
333    /* Function arguments that may change */    /* Function arguments that may change */
334    
335    const uschar *Xeptr;    USPTR Xeptr;
336    const uschar *Xecode;    const uschar *Xecode;
337      USPTR Xmstart;
338      USPTR Xmarkptr;
339    int Xoffset_top;    int Xoffset_top;
340    long int Xims;    long int Xims;
341    eptrblock *Xeptrb;    eptrblock *Xeptrb;
342    int Xflags;    int Xflags;
343    int Xrdepth;    unsigned int Xrdepth;
344    
345    /* Function local variables */    /* Function local variables */
346    
347    const uschar *Xcallpat;    USPTR Xcallpat;
348    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
349    const uschar *Xdata;    USPTR Xcharptr;
350    const uschar *Xnext;  #endif
351    const uschar *Xpp;    USPTR Xdata;
352    const uschar *Xprev;    USPTR Xnext;
353    const uschar *Xsaved_eptr;    USPTR Xpp;
354      USPTR Xprev;
355      USPTR Xsaved_eptr;
356    
357    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
358    
359    BOOL Xcur_is_word;    BOOL Xcur_is_word;
360    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
361    BOOL Xprev_is_word;    BOOL Xprev_is_word;
362    
363    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 369  typedef struct heapframe {
369    int Xprop_category;    int Xprop_category;
370    int Xprop_chartype;    int Xprop_chartype;
371    int Xprop_script;    int Xprop_script;
372    int *Xprop_test_variable;    int Xoclength;
373      uschar Xocchars[8];
374  #endif  #endif
375    
376      int Xcodelink;
377    int Xctype;    int Xctype;
378    int Xfc;    unsigned int Xfc;
379    int Xfi;    int Xfi;
380    int Xlength;    int Xlength;
381    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 389  typedef struct heapframe {
389    
390    eptrblock Xnewptrb;    eptrblock Xnewptrb;
391    
392    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
393    
394    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
395    
396  } heapframe;  } heapframe;
397    
# Line 340  typedef struct heapframe { Line 407  typedef struct heapframe {
407  *         Match from current position            *  *         Match from current position            *
408  *************************************************/  *************************************************/
409    
410  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
411  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
412  same response.  same response. */
413    
414  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
415  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
416    at the end of the subject and also past the start of the subject (i.e.
417    something has been matched). For hard partial matching, we then return
418    immediately. The second one is used when we already know we are past the end of
419    the subject. */
420    
421    #define CHECK_PARTIAL()\
422      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
423        {\
424        md->hitend = TRUE;\
425        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
426        }
427    
428    #define SCHECK_PARTIAL()\
429      if (md->partial != 0 && eptr > mstart)\
430        {\
431        md->hitend = TRUE;\
432        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
433        }
434    
435    
436    /* Performance note: It might be tempting to extract commonly used fields from
437    the md structure (e.g. utf8, end_subject) into individual variables to improve
438  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
439  made performance worse.  made performance worse.
440    
441  Arguments:  Arguments:
442     eptr        pointer in subject     eptr        pointer to current character in subject
443     ecode       position in code     ecode       pointer to current position in compiled code
444       mstart      pointer to the current match start position (can be modified
445                     by encountering \K)
446       markptr     pointer to the most recent MARK name, or NULL
447     offset_top  current top pointer     offset_top  current top pointer
448     md          pointer to "static" info for the match     md          pointer to "static" info for the match
449     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 451  Arguments:
451                   brackets - for testing for empty matches                   brackets - for testing for empty matches
452     flags       can contain     flags       can contain
453                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
454                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
455                       group that can match an empty string
456     rdepth      the recursion depth     rdepth      the recursion depth
457    
458  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
459                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
460                   a negative MATCH_xxx value for PRUNE, SKIP, etc
461                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
462                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
463  */  */
464    
465  static int  static int
466  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
467    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
468    int flags, int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
469  {  {
470  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
471  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
472  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
473    
474  register int  rrc;    /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
475  register int  i;      /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
476  register int  c;      /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
477  register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
478    
479    BOOL minimize, possessive; /* Quantifier options */
480    int condcode;
481    
482  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
483  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 398  frame->Xprevframe = NULL;            /* Line 492  frame->Xprevframe = NULL;            /*
492    
493  frame->Xeptr = eptr;  frame->Xeptr = eptr;
494  frame->Xecode = ecode;  frame->Xecode = ecode;
495    frame->Xmstart = mstart;
496    frame->Xmarkptr = markptr;
497  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
498  frame->Xims = ims;  frame->Xims = ims;
499  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 508  HEAP_RECURSE:
508    
509  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
510  #define ecode              frame->Xecode  #define ecode              frame->Xecode
511    #define mstart             frame->Xmstart
512    #define markptr            frame->Xmarkptr
513  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
514  #define ims                frame->Xims  #define ims                frame->Xims
515  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 424  HEAP_RECURSE: Line 522  HEAP_RECURSE:
522  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
523  #endif  #endif
524  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
525    #define codelink           frame->Xcodelink
526  #define data               frame->Xdata  #define data               frame->Xdata
527  #define next               frame->Xnext  #define next               frame->Xnext
528  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 434  HEAP_RECURSE: Line 533  HEAP_RECURSE:
533    
534  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
535  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
536  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
537    
538  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 544  HEAP_RECURSE:
544  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
545  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
546  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
547  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
548    #define occhars            frame->Xocchars
549  #endif  #endif
550    
551  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 569  HEAP_RECURSE:
569  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
570  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
571    
572  #else  #else         /* NO_RECURSE not defined */
573  #define fi i  #define fi i
574  #define fc c  #define fc c
575    
# Line 489  recursion_info new_recursive;      /* wi Line 588  recursion_info new_recursive;      /* wi
588                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
589  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
590  BOOL condition;  BOOL condition;
 BOOL minimize;  
591  BOOL prev_is_word;  BOOL prev_is_word;
592    
593  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 599  int prop_fail_result;
599  int prop_category;  int prop_category;
600  int prop_chartype;  int prop_chartype;
601  int prop_script;  int prop_script;
602  int *prop_test_variable;  int oclength;
603    uschar occhars[8];
604  #endif  #endif
605    
606    int codelink;
607  int ctype;  int ctype;
608  int length;  int length;
609  int max;  int max;
# Line 516  int save_offset1, save_offset2, save_off Line 616  int save_offset1, save_offset2, save_off
616  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
617    
618  eptrblock newptrb;  eptrblock newptrb;
619  #endif  #endif     /* NO_RECURSE */
620    
621  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
622  variables. */  variables. */
# Line 524  variables. */ Line 624  variables. */
624  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
625  prop_value = 0;  prop_value = 0;
626  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
627  #endif  #endif
628    
629    
630    /* This label is used for tail recursion, which is used in a few cases even
631    when NO_RECURSE is not defined, in order to reduce the amount of stack that is
632    used. Thanks to Ian Taylor for noticing this possibility and sending the
633    original patch. */
634    
635    TAIL_RECURSE:
636    
637  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
638  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
639  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
640  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
641  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
642  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
643  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
644    
645    #ifdef SUPPORT_UTF8
646    utf8 = md->utf8;       /* Local copy of the flag */
647    #else
648    utf8 = FALSE;
649    #endif
650    
651  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
652  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
653    
# Line 542  if (md->match_call_count++ >= md->match_ Line 655  if (md->match_call_count++ >= md->match_
655  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
656    
657  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
658    
659  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
660  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
661  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
662  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
663    When match() is called in other circumstances, don't add to the chain. The
664    match_cbegroup flag must NOT be used with tail recursion, because the memory
665    block that is used is on the stack, so a new one may be required for each
666    match(). */
667    
668  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
669    {    {
   newptrb.epb_prev = eptrb;  
670    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
671      newptrb.epb_prev = eptrb;
672    eptrb = &newptrb;    eptrb = &newptrb;
673    }    }
674    
675  /* Now start processing the operations. */  /* Now start processing the opcodes. */
676    
677  for (;;)  for (;;)
678    {    {
679      minimize = possessive = FALSE;
680    op = *ecode;    op = *ecode;
   minimize = FALSE;  
   
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
   /* Opening capturing bracket. If there is space in the offset vector, save  
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
681    
682    if (op > OP_BRA)    switch(op)
683      {      {
684      number = op - OP_BRA;      case OP_MARK:
685        markptr = ecode + 2;
686      /* For extended extraction brackets (large number), we have to fish out the      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
687      number from a dummy opcode at the start. */        ims, eptrb, flags, RM51);
688    
689      if (number > EXTRACT_BASIC_MAX)      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
690        number = GET2(ecode, 2+LINK_SIZE);      argument, and we must check whether that argument matches this MARK's
691        argument. It is passed back in md->start_match_ptr (an overloading of that
692        variable). If it does match, we reset that variable to the current subject
693        position and return MATCH_SKIP. Otherwise, pass back the return code
694        unaltered. */
695    
696        if (rrc == MATCH_SKIP_ARG &&
697            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
698          {
699          md->start_match_ptr = eptr;
700          RRETURN(MATCH_SKIP);
701          }
702    
703        if (md->mark == NULL) md->mark = markptr;
704        RRETURN(rrc);
705    
706        case OP_FAIL:
707        MRRETURN(MATCH_NOMATCH);
708    
709        case OP_COMMIT:
710        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
711          ims, eptrb, flags, RM52);
712        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
713        MRRETURN(MATCH_COMMIT);
714    
715        case OP_PRUNE:
716        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717          ims, eptrb, flags, RM51);
718        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
719        MRRETURN(MATCH_PRUNE);
720    
721        case OP_PRUNE_ARG:
722        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
723          ims, eptrb, flags, RM51);
724        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
725        md->mark = ecode + 2;
726        RRETURN(MATCH_PRUNE);
727    
728        case OP_SKIP:
729        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
730          ims, eptrb, flags, RM53);
731        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
732        md->start_match_ptr = eptr;   /* Pass back current position */
733        MRRETURN(MATCH_SKIP);
734    
735        case OP_SKIP_ARG:
736        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
737          ims, eptrb, flags, RM53);
738        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
739    
740        /* Pass back the current skip name by overloading md->start_match_ptr and
741        returning the special MATCH_SKIP_ARG return code. This will either be
742        caught by a matching MARK, or get to the top, where it is treated the same
743        as PRUNE. */
744    
745        md->start_match_ptr = ecode + 2;
746        RRETURN(MATCH_SKIP_ARG);
747    
748        case OP_THEN:
749        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
750          ims, eptrb, flags, RM54);
751        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
752        MRRETURN(MATCH_THEN);
753    
754        case OP_THEN_ARG:
755        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
756          ims, eptrb, flags, RM54);
757        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
758        md->mark = ecode + 2;
759        RRETURN(MATCH_THEN);
760    
761        /* Handle a capturing bracket. If there is space in the offset vector, save
762        the current subject position in the working slot at the top of the vector.
763        We mustn't change the current values of the data slot, because they may be
764        set from a previous iteration of this group, and be referred to by a
765        reference inside the group.
766    
767        If the bracket fails to match, we need to restore this value and also the
768        values of the final offsets, in case they were set by a previous iteration
769        of the same bracket.
770    
771        If there isn't enough space in the offset vector, treat this as if it were
772        a non-capturing bracket. Don't worry about setting the flag for the error
773        case here; that is handled in the code for KET. */
774    
775        case OP_CBRA:
776        case OP_SCBRA:
777        number = GET2(ecode, 1+LINK_SIZE);
778      offset = number << 1;      offset = number << 1;
779    
780  #ifdef DEBUG  #ifdef PCRE_DEBUG
781      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
782        printf("subject=");
783      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
784      printf("\n");      printf("\n");
785  #endif  #endif
# Line 612  for (;;) Line 794  for (;;)
794        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
795        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
796    
797          flags = (op == OP_SCBRA)? match_cbegroup : 0;
798        do        do
799          {          {
800          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
801            match_isgroup);            ims, eptrb, flags, RM1);
802          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
804          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
805          }          }
# Line 628  for (;;) Line 811  for (;;)
811        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
812        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
813    
814          if (rrc != MATCH_THEN) md->mark = markptr;
815        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
816        }        }
817    
818      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
819        as a non-capturing bracket. */
820    
821      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
822      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823    
824    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
825    
826    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
827      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828      case OP_BRA:     /* Non-capturing bracket: optimized */  
829      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
830      do      final alternative within the brackets, we would return the result of a
831        recursive call to match() whatever happened. We can reduce stack usage by
832        turning this into a tail recursion, except in the case when match_cbegroup
833        is set.*/
834    
835        case OP_BRA:
836        case OP_SBRA:
837        DPRINTF(("start non-capturing bracket\n"));
838        flags = (op >= OP_SBRA)? match_cbegroup : 0;
839        for (;;)
840        {        {
841        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
842          match_isgroup);          {
843        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
844              {
845              ecode += _pcre_OP_lengths[*ecode];
846              DPRINTF(("bracket 0 tail recursion\n"));
847              goto TAIL_RECURSE;
848              }
849    
850            /* Possibly empty group; can't use tail recursion. */
851    
852            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
853              eptrb, flags, RM48);
854            if (rrc == MATCH_NOMATCH) md->mark = markptr;
855            RRETURN(rrc);
856            }
857    
858          /* For non-final alternatives, continue the loop for a NOMATCH result;
859          otherwise return. */
860    
861          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
862            eptrb, flags, RM2);
863          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
864        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
865        }        }
866      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
867    
868      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
869      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
870      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
871      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
872        obeyed, we can use tail recursion to avoid using another stack frame. */
873    
874      case OP_COND:      case OP_COND:
875      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
876        codelink= GET(ecode, 1);
877    
878        /* Because of the way auto-callout works during compile, a callout item is
879        inserted between OP_COND and an assertion condition. */
880    
881        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
882          {
883          if (pcre_callout != NULL)
884            {
885            pcre_callout_block cb;
886            cb.version          = 1;   /* Version 1 of the callout block */
887            cb.callout_number   = ecode[LINK_SIZE+2];
888            cb.offset_vector    = md->offset_vector;
889            cb.subject          = (PCRE_SPTR)md->start_subject;
890            cb.subject_length   = md->end_subject - md->start_subject;
891            cb.start_match      = mstart - md->start_subject;
892            cb.current_position = eptr - md->start_subject;
893            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
894            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
895            cb.capture_top      = offset_top/2;
896            cb.capture_last     = md->capture_last;
897            cb.callout_data     = md->callout_data;
898            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
899            if (rrc < 0) RRETURN(rrc);
900            }
901          ecode += _pcre_OP_lengths[OP_CALLOUT];
902          }
903    
904        condcode = ecode[LINK_SIZE+1];
905    
906        /* Now see what the actual condition is */
907    
908        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
909          {
910          if (md->recursive == NULL)                /* Not recursing => FALSE */
911            {
912            condition = FALSE;
913            ecode += GET(ecode, 1);
914            }
915          else
916            {
917            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
918            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
919    
920            /* If the test is for recursion into a specific subpattern, and it is
921            false, but the test was set up by name, scan the table to see if the
922            name refers to any other numbers, and test them. The condition is true
923            if any one is set. */
924    
925            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
926              {
927              uschar *slotA = md->name_table;
928              for (i = 0; i < md->name_count; i++)
929                {
930                if (GET2(slotA, 0) == recno) break;
931                slotA += md->name_entry_size;
932                }
933    
934              /* Found a name for the number - there can be only one; duplicate
935              names for different numbers are allowed, but not vice versa. First
936              scan down for duplicates. */
937    
938              if (i < md->name_count)
939                {
940                uschar *slotB = slotA;
941                while (slotB > md->name_table)
942                  {
943                  slotB -= md->name_entry_size;
944                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
945                    {
946                    condition = GET2(slotB, 0) == md->recursive->group_num;
947                    if (condition) break;
948                    }
949                  else break;
950                  }
951    
952                /* Scan up for duplicates */
953    
954                if (!condition)
955                  {
956                  slotB = slotA;
957                  for (i++; i < md->name_count; i++)
958                    {
959                    slotB += md->name_entry_size;
960                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961                      {
962                      condition = GET2(slotB, 0) == md->recursive->group_num;
963                      if (condition) break;
964                      }
965                    else break;
966                    }
967                  }
968                }
969              }
970    
971            /* Chose branch according to the condition */
972    
973            ecode += condition? 3 : GET(ecode, 1);
974            }
975          }
976    
977        else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
978        {        {
979        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
980        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
981          (md->recursive != NULL) :  
982          (offset < offset_top && md->offset_vector[offset] >= 0);        /* If the numbered capture is unset, but the reference was by name,
983        RMATCH(rrc, eptr, ecode + (condition?        scan the table to see if the name refers to any other numbers, and test
984          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        them. The condition is true if any one is set. This is tediously similar
985          offset_top, md, ims, eptrb, match_isgroup);        to the code above, but not close enough to try to amalgamate. */
986        RRETURN(rrc);  
987          if (!condition && condcode == OP_NCREF)
988            {
989            int refno = offset >> 1;
990            uschar *slotA = md->name_table;
991    
992            for (i = 0; i < md->name_count; i++)
993              {
994              if (GET2(slotA, 0) == refno) break;
995              slotA += md->name_entry_size;
996              }
997    
998            /* Found a name for the number - there can be only one; duplicate names
999            for different numbers are allowed, but not vice versa. First scan down
1000            for duplicates. */
1001    
1002            if (i < md->name_count)
1003              {
1004              uschar *slotB = slotA;
1005              while (slotB > md->name_table)
1006                {
1007                slotB -= md->name_entry_size;
1008                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1009                  {
1010                  offset = GET2(slotB, 0) << 1;
1011                  condition = offset < offset_top &&
1012                    md->offset_vector[offset] >= 0;
1013                  if (condition) break;
1014                  }
1015                else break;
1016                }
1017    
1018              /* Scan up for duplicates */
1019    
1020              if (!condition)
1021                {
1022                slotB = slotA;
1023                for (i++; i < md->name_count; i++)
1024                  {
1025                  slotB += md->name_entry_size;
1026                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1027                    {
1028                    offset = GET2(slotB, 0) << 1;
1029                    condition = offset < offset_top &&
1030                      md->offset_vector[offset] >= 0;
1031                    if (condition) break;
1032                    }
1033                  else break;
1034                  }
1035                }
1036              }
1037            }
1038    
1039          /* Chose branch according to the condition */
1040    
1041          ecode += condition? 3 : GET(ecode, 1);
1042          }
1043    
1044        else if (condcode == OP_DEF)     /* DEFINE - always false */
1045          {
1046          condition = FALSE;
1047          ecode += GET(ecode, 1);
1048        }        }
1049    
1050      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
1051      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
1052        assertion. */
1053    
1054      else      else
1055        {        {
1056        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1057            match_condassert | match_isgroup);            match_condassert, RM3);
1058        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1059          {          {
1060          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
1061            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1062          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1063          }          }
1064        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1065          {          {
1066          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1067          }          }
1068        else ecode += GET(ecode, 1);        else
1069        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
1070          match_isgroup);          condition = FALSE;
1071        RRETURN(rrc);          ecode += codelink;
1072            }
1073        }        }
     /* Control never reaches here */  
1074    
1075      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
1076      encountered. */      we can use tail recursion to avoid using another stack frame, except when
1077        match_cbegroup is required for an unlimited repeat of a possibly empty
1078        group. If the second alternative doesn't exist, we can just plough on. */
1079    
1080        if (condition || *ecode == OP_ALT)
1081          {
1082          ecode += 1 + LINK_SIZE;
1083          if (op == OP_SCOND)        /* Possibly empty group */
1084            {
1085            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1086            RRETURN(rrc);
1087            }
1088          else                       /* Group must match something */
1089            {
1090            flags = 0;
1091            goto TAIL_RECURSE;
1092            }
1093          }
1094        else                         /* Condition false & no alternative */
1095          {
1096          ecode += 1 + LINK_SIZE;
1097          }
1098        break;
1099    
1100    
1101        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1102        to close any currently open capturing brackets. */
1103    
1104        case OP_CLOSE:
1105        number = GET2(ecode, 1);
1106        offset = number << 1;
1107    
1108    #ifdef PCRE_DEBUG
1109          printf("end bracket %d at *ACCEPT", number);
1110          printf("\n");
1111    #endif
1112    
1113      case OP_CREF:      md->capture_last = number;
1114      case OP_BRANUMBER:      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1115          {
1116          md->offset_vector[offset] =
1117            md->offset_vector[md->offset_end - number];
1118          md->offset_vector[offset+1] = eptr - md->start_subject;
1119          if (offset_top <= offset) offset_top = offset + 2;
1120          }
1121      ecode += 3;      ecode += 3;
1122      break;      break;
1123    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
1124    
1125        /* End of the pattern, either real or forced. If we are in a top-level
1126        recursion, we should restore the offsets appropriately and continue from
1127        after the call. */
1128    
1129        case OP_ACCEPT:
1130      case OP_END:      case OP_END:
1131      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1132        {        {
# Line 713  for (;;) Line 1135  for (;;)
1135        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1136        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1137          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1138        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1139        ims = original_ims;        ims = original_ims;
1140        ecode = rec->after_call;        ecode = rec->after_call;
1141        break;        break;
1142        }        }
1143    
1144      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1145      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1146        the subject. In both cases, backtracking will then try other alternatives,
1147      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if any. */
1148      md->end_match_ptr = eptr;          /* Record where we ended */  
1149      md->end_offset_top = offset_top;   /* and how many extracts were taken */      if (eptr == mstart &&
1150      RRETURN(MATCH_MATCH);          (md->notempty ||
1151              (md->notempty_atstart &&
1152                mstart == md->start_subject + md->start_offset)))
1153          MRRETURN(MATCH_NOMATCH);
1154    
1155        /* Otherwise, we have a match. */
1156    
1157        md->end_match_ptr = eptr;           /* Record where we ended */
1158        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1159        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1160        MRRETURN(MATCH_MATCH);
1161    
1162      /* Change option settings */      /* Change option settings */
1163    
# Line 745  for (;;) Line 1177  for (;;)
1177      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1178      do      do
1179        {        {
1180        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1181          match_isgroup);          RM4);
1182        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1183        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1184            mstart = md->start_match_ptr;   /* In case \K reset it */
1185            break;
1186            }
1187          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1188        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1189        }        }
1190      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1191      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1192    
1193      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1194    
# Line 766  for (;;) Line 1202  for (;;)
1202      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1203      continue;      continue;
1204    
1205      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1206        PRUNE, or COMMIT means we must assume failure without checking subsequent
1207        branches. */
1208    
1209      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1210      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1211      do      do
1212        {        {
1213        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214          match_isgroup);          RM5);
1215        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) MRRETURN(MATCH_NOMATCH);
1216        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1217            {
1218            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1219            break;
1220            }
1221          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1222        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1223        }        }
1224      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 794  for (;;) Line 1237  for (;;)
1237  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1238      if (utf8)      if (utf8)
1239        {        {
1240        c = GET(ecode,1);        i = GET(ecode, 1);
1241        for (i = 0; i < c; i++)        while (i-- > 0)
1242          {          {
1243          eptr--;          eptr--;
1244          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1245          BACKCHAR(eptr)          BACKCHAR(eptr);
1246          }          }
1247        }        }
1248      else      else
# Line 808  for (;;) Line 1251  for (;;)
1251      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1252    
1253        {        {
1254        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
1255        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1256        }        }
1257    
1258      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1259    
1260        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1261      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1262      break;      break;
1263    
# Line 830  for (;;) Line 1274  for (;;)
1274        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1275        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1276        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1277        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1278        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1279        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1280        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1281        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1282        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1283        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1284        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1285        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1286        }        }
1287      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 865  for (;;) Line 1309  for (;;)
1309      case OP_RECURSE:      case OP_RECURSE:
1310        {        {
1311        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1312        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1313            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1314    
1315        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1316    
# Line 897  for (;;) Line 1336  for (;;)
1336    
1337        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1338              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1339        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1340    
1341        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1342        restore the offset and recursion data. */        restore the offset and recursion data. */
1343    
1344        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1345          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1346        do        do
1347          {          {
1348          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1349              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1350          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1351            {            {
1352            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1353            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1354            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1355              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1356            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1357            }            }
1358          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1359            {            {
1360            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1361              if (new_recursive.offset_save != stacksave)
1362                (pcre_free)(new_recursive.offset_save);
1363            RRETURN(rrc);            RRETURN(rrc);
1364            }            }
1365    
# Line 933  for (;;) Line 1374  for (;;)
1374        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1375        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1376          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1377        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1378        }        }
1379      /* Control never reaches here */      /* Control never reaches here */
1380    
# Line 942  for (;;) Line 1383  for (;;)
1383      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1384      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1385      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1386      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1387        the start-of-match value in case it was changed by \K. */
1388    
1389      case OP_ONCE:      case OP_ONCE:
1390        {      prev = ecode;
1391        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1392    
1393        do      do
1394          {
1395          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1396          if (rrc == MATCH_MATCH)
1397          {          {
1398          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,          mstart = md->start_match_ptr;
1399            eptrb, match_isgroup);          break;
         if (rrc == MATCH_MATCH) break;  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         ecode += GET(ecode,1);  
1400          }          }
1401        while (*ecode == OP_ALT);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1402          ecode += GET(ecode,1);
1403          }
1404        while (*ecode == OP_ALT);
1405    
1406        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1407    
1408        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1409    
1410        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1411        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1412    
1413        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1414    
1415        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1416        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1417    
1418        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1419        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1420        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1421        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1422        course of events. */      course of events. */
1423    
1424        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1425          {        {
1426          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1427          break;        break;
1428          }        }
1429    
1430        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1431        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1432        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1433        opcode. */      any options that changed within the bracket before re-running it, so
1434        check the next opcode. */
1435    
1436        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1437          {        {
1438          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1439          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1440          }        }
1441    
1442        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1443          {        {
1444          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1445          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1446          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1447          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1448          }        goto TAIL_RECURSE;
1449        else  /* OP_KETRMAX */        }
1450          {      else  /* OP_KETRMAX */
1451          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1452          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1453          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1454          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1455          }        flags = 0;
1456          goto TAIL_RECURSE;
1457        }        }
1458      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1459    
1460      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1461      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1018  for (;;) Line 1464  for (;;)
1464      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1465      break;      break;
1466    
1467      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1468      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1469      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1470      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1471      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1472    
1473      case OP_BRAZERO:      case OP_BRAZERO:
1474        {        {
1475        next = ecode+1;        next = ecode+1;
1476        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1477        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1478        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1479        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1480        }        }
1481      break;      break;
1482    
1483      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1484        {        {
1485        next = ecode+1;        next = ecode+1;
1486        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1487        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1488        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1489        ecode++;        ecode++;
1490        }        }
1491      break;      break;
1492    
1493      /* End of a group, repeated or non-repeating. If we are at the end of      case OP_SKIPZERO:
1494      an assertion "group", stop matching and return MATCH_MATCH, but record the        {
1495      current high water mark for use by positive assertions. Do this also        next = ecode+1;
1496      for the "once" (not-backup up) groups. */        do next += GET(next,1); while (*next == OP_ALT);
1497          ecode = next + 1 + LINK_SIZE;
1498          }
1499        break;
1500    
1501        /* End of a group, repeated or non-repeating. */
1502    
1503      case OP_KET:      case OP_KET:
1504      case OP_KETRMIN:      case OP_KETRMIN:
1505      case OP_KETRMAX:      case OP_KETRMAX:
1506        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
   
       /* Back up the stack of bracket start pointers. */  
1507    
1508        eptrb = eptrb->epb_prev;      /* If this was a group that remembered the subject start, in order to break
1509        infinite repeats of empty string matches, retrieve the subject start from
1510        the chain. Otherwise, set it NULL. */
1511    
1512        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev >= OP_SBRA)
1513            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        {
1514            *prev == OP_ONCE)        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1515          {        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1516          md->end_match_ptr = eptr;      /* For ONCE */        }
1517          md->end_offset_top = offset_top;      else saved_eptr = NULL;
         RRETURN(MATCH_MATCH);  
         }  
1518    
1519        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group or an atomic group, stop
1520        group number back at the start and if necessary complete handling an      matching and return MATCH_MATCH, but record the current high water mark for
1521        extraction by setting the offsets and bumping the high water mark. */      use by positive assertions. We also need to record the match start in case
1522        it was changed by \K. */
1523    
1524        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1525          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1526          number = *prev - OP_BRA;          *prev == OP_ONCE)
1527          {
1528          md->end_match_ptr = eptr;      /* For ONCE */
1529          md->end_offset_top = offset_top;
1530          md->start_match_ptr = mstart;
1531          MRRETURN(MATCH_MATCH);
1532          }
1533    
1534          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1535          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1536        bumping the high water mark. Note that whole-pattern recursion is coded as
1537        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1538        when the OP_END is reached. Other recursion is handled here. */
1539    
1540          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1541          offset = number << 1;        {
1542          number = GET2(prev, 1+LINK_SIZE);
1543          offset = number << 1;
1544    
1545  #ifdef DEBUG  #ifdef PCRE_DEBUG
1546          printf("end bracket %d", number);        printf("end bracket %d", number);
1547          printf("\n");        printf("\n");
1548  #endif  #endif
1549    
1550          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1551          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
         into group 0, so it won't be picked up here. Instead, we catch it when  
         the OP_END is reached. */  
   
         if (number > 0)  
           {  
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
   
           /* Handle a recursively called group. Restore the offsets  
           appropriately and continue from after the call. */  
   
           if (md->recursive != NULL && md->recursive->group_num == number)  
             {  
             recursion_info *rec = md->recursive;  
             DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
         }  
   
       /* Reset the value of the ims flags, in case they got changed during  
       the group. */  
   
       ims = original_ims;  
       DPRINTF(("ims reset to %02lx\n", ims));  
   
       /* For a non-repeating ket, just continue at this level. This also  
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
   
       if (*ecode == OP_KET || eptr == saved_eptr)  
1552          {          {
1553          ecode += 1 + LINK_SIZE;          md->offset_vector[offset] =
1554          break;            md->offset_vector[md->offset_end - number];
1555            md->offset_vector[offset+1] = eptr - md->start_subject;
1556            if (offset_top <= offset) offset_top = offset + 2;
1557          }          }
1558    
1559        /* The repeating kets try the rest of the pattern or restart from the        /* Handle a recursively called group. Restore the offsets
1560        preceding bracket, in the appropriate order. */        appropriately and continue from after the call. */
1561    
1562        if (*ecode == OP_KETRMIN)        if (md->recursive != NULL && md->recursive->group_num == number)
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
1563          {          {
1564          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);          recursion_info *rec = md->recursive;
1565          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1566          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);          md->recursive = rec->prevrec;
1567          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          memcpy(md->offset_vector, rec->offset_save,
1568              rec->saved_max * sizeof(int));
1569            offset_top = rec->save_offset_top;
1570            ecode = rec->after_call;
1571            ims = original_ims;
1572            break;
1573          }          }
1574        }        }
1575    
1576      RRETURN(MATCH_NOMATCH);      /* For both capturing and non-capturing groups, reset the value of the ims
1577        flags, in case they got changed during the group. */
1578    
1579      /* Start of subject unless notbol, or after internal newline if multiline */      ims = original_ims;
1580        DPRINTF(("ims reset to %02lx\n", ims));
1581    
1582      case OP_CIRC:      /* For a non-repeating ket, just continue at this level. This also
1583      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      happens for a repeating ket if no characters were matched in the group.
1584      if ((ims & PCRE_MULTILINE) != 0)      This is the forcible breaking of infinite loops as implemented in Perl
1585        5.005. If there is an options reset, it will get obeyed in the normal
1586        course of events. */
1587    
1588        if (*ecode == OP_KET || eptr == saved_eptr)
1589        {        {
1590        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        ecode += 1 + LINK_SIZE;
1591          RRETURN(MATCH_NOMATCH);        break;
1592          }
1593    
1594        /* The repeating kets try the rest of the pattern or restart from the
1595        preceding bracket, in the appropriate order. In the second case, we can use
1596        tail recursion to avoid using another stack frame, unless we have an
1597        unlimited repeat of a group that can match an empty string. */
1598    
1599        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1600    
1601        if (*ecode == OP_KETRMIN)
1602          {
1603          RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1604          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1605          if (flags != 0)    /* Could match an empty string */
1606            {
1607            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1608            RRETURN(rrc);
1609            }
1610          ecode = prev;
1611          goto TAIL_RECURSE;
1612          }
1613        else  /* OP_KETRMAX */
1614          {
1615          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1616          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1617          ecode += 1 + LINK_SIZE;
1618          flags = 0;
1619          goto TAIL_RECURSE;
1620          }
1621        /* Control never gets here */
1622    
1623        /* Start of subject unless notbol, or after internal newline if multiline */
1624    
1625        case OP_CIRC:
1626        if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1627        if ((ims & PCRE_MULTILINE) != 0)
1628          {
1629          if (eptr != md->start_subject &&
1630              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1631            MRRETURN(MATCH_NOMATCH);
1632        ecode++;        ecode++;
1633        break;        break;
1634        }        }
# Line 1178  for (;;) Line 1637  for (;;)
1637      /* Start of subject assertion */      /* Start of subject assertion */
1638    
1639      case OP_SOD:      case OP_SOD:
1640      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1641      ecode++;      ecode++;
1642      break;      break;
1643    
1644      /* Start of match assertion */      /* Start of match assertion */
1645    
1646      case OP_SOM:      case OP_SOM:
1647      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1648        ecode++;
1649        break;
1650    
1651        /* Reset the start of match point */
1652    
1653        case OP_SET_SOM:
1654        mstart = eptr;
1655      ecode++;      ecode++;
1656      break;      break;
1657    
# Line 1196  for (;;) Line 1662  for (;;)
1662      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1663        {        {
1664        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1665          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1666        else        else
1667          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1668        ecode++;        ecode++;
1669        break;        break;
1670        }        }
1671      else      else
1672        {        {
1673        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1674        if (!md->endonly)        if (!md->endonly)
1675          {          {
1676          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1677             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1678            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1679          ecode++;          ecode++;
1680          break;          break;
1681          }          }
1682        }        }
1683      /* ... else fall through */      /* ... else fall through for endonly */
1684    
1685      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1686    
1687      case OP_EOD:      case OP_EOD:
1688      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1689      ecode++;      ecode++;
1690      break;      break;
1691    
1692      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1693    
1694      case OP_EODN:      case OP_EODN:
1695      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1696         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1697          MRRETURN(MATCH_NOMATCH);
1698      ecode++;      ecode++;
1699      break;      break;
1700    
# Line 1239  for (;;) Line 1706  for (;;)
1706    
1707        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1708        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1709        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1710          partial matching. */
1711    
1712  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1713        if (utf8)        if (utf8)
1714          {          {
1715          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1716            {            {
1717            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1718            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1719              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1720            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1721            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1722            }            }
1723          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1724              {
1725              SCHECK_PARTIAL();
1726              cur_is_word = FALSE;
1727              }
1728            else
1729            {            {
1730            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1731            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1260  for (;;) Line 1734  for (;;)
1734        else        else
1735  #endif  #endif
1736    
1737        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1738    
1739          {          {
1740          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1741            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1742          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1743            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1744              }
1745            if (eptr >= md->end_subject)
1746              {
1747              SCHECK_PARTIAL();
1748              cur_is_word = FALSE;
1749              }
1750            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1751          }          }
1752    
1753        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1754    
1755        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1756             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1757          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1758        }        }
1759      break;      break;
1760    
1761      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1762    
1763      case OP_ANY:      case OP_ANY:
1764      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1765        RRETURN(MATCH_NOMATCH);      /* Fall through */
1766      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);  
1767  #ifdef SUPPORT_UTF8      case OP_ALLANY:
1768      if (utf8)      if (eptr++ >= md->end_subject)
1769        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        {
1770  #endif        SCHECK_PARTIAL();
1771          MRRETURN(MATCH_NOMATCH);
1772          }
1773        if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1774      ecode++;      ecode++;
1775      break;      break;
1776    
# Line 1294  for (;;) Line 1778  for (;;)
1778      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1779    
1780      case OP_ANYBYTE:      case OP_ANYBYTE:
1781      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1782          {
1783          SCHECK_PARTIAL();
1784          MRRETURN(MATCH_NOMATCH);
1785          }
1786      ecode++;      ecode++;
1787      break;      break;
1788    
1789      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1790      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1791          {
1792          SCHECK_PARTIAL();
1793          MRRETURN(MATCH_NOMATCH);
1794          }
1795      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1796      if (      if (
1797  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1307  for (;;) Line 1799  for (;;)
1799  #endif  #endif
1800         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1801         )         )
1802        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1803      ecode++;      ecode++;
1804      break;      break;
1805    
1806      case OP_DIGIT:      case OP_DIGIT:
1807      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1808          {
1809          SCHECK_PARTIAL();
1810          MRRETURN(MATCH_NOMATCH);
1811          }
1812      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1813      if (      if (
1814  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 1816  for (;;)
1816  #endif  #endif
1817         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1818         )         )
1819        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1820      ecode++;      ecode++;
1821      break;      break;
1822    
1823      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1824      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1825          {
1826          SCHECK_PARTIAL();
1827          MRRETURN(MATCH_NOMATCH);
1828          }
1829      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1830      if (      if (
1831  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1333  for (;;) Line 1833  for (;;)
1833  #endif  #endif
1834         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1835         )         )
1836        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1837      ecode++;      ecode++;
1838      break;      break;
1839    
1840      case OP_WHITESPACE:      case OP_WHITESPACE:
1841      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1842          {
1843          SCHECK_PARTIAL();
1844          MRRETURN(MATCH_NOMATCH);
1845          }
1846      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1847      if (      if (
1848  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1346  for (;;) Line 1850  for (;;)
1850  #endif  #endif
1851         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1852         )         )
1853        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1854      ecode++;      ecode++;
1855      break;      break;
1856    
1857      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1858      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1859          {
1860          SCHECK_PARTIAL();
1861          MRRETURN(MATCH_NOMATCH);
1862          }
1863      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1864      if (      if (
1865  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1359  for (;;) Line 1867  for (;;)
1867  #endif  #endif
1868         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1869         )         )
1870        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1871      ecode++;      ecode++;
1872      break;      break;
1873    
1874      case OP_WORDCHAR:      case OP_WORDCHAR:
1875      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1876          {
1877          SCHECK_PARTIAL();
1878          MRRETURN(MATCH_NOMATCH);
1879          }
1880      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1881      if (      if (
1882  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1372  for (;;) Line 1884  for (;;)
1884  #endif  #endif
1885         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1886         )         )
1887        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1888        ecode++;
1889        break;
1890    
1891        case OP_ANYNL:
1892        if (eptr >= md->end_subject)
1893          {
1894          SCHECK_PARTIAL();
1895          MRRETURN(MATCH_NOMATCH);
1896          }
1897        GETCHARINCTEST(c, eptr);
1898        switch(c)
1899          {
1900          default: MRRETURN(MATCH_NOMATCH);
1901          case 0x000d:
1902          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1903          break;
1904    
1905          case 0x000a:
1906          break;
1907    
1908          case 0x000b:
1909          case 0x000c:
1910          case 0x0085:
1911          case 0x2028:
1912          case 0x2029:
1913          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1914          break;
1915          }
1916        ecode++;
1917        break;
1918    
1919        case OP_NOT_HSPACE:
1920        if (eptr >= md->end_subject)
1921          {
1922          SCHECK_PARTIAL();
1923          MRRETURN(MATCH_NOMATCH);
1924          }
1925        GETCHARINCTEST(c, eptr);
1926        switch(c)
1927          {
1928          default: break;
1929          case 0x09:      /* HT */
1930          case 0x20:      /* SPACE */
1931          case 0xa0:      /* NBSP */
1932          case 0x1680:    /* OGHAM SPACE MARK */
1933          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1934          case 0x2000:    /* EN QUAD */
1935          case 0x2001:    /* EM QUAD */
1936          case 0x2002:    /* EN SPACE */
1937          case 0x2003:    /* EM SPACE */
1938          case 0x2004:    /* THREE-PER-EM SPACE */
1939          case 0x2005:    /* FOUR-PER-EM SPACE */
1940          case 0x2006:    /* SIX-PER-EM SPACE */
1941          case 0x2007:    /* FIGURE SPACE */
1942          case 0x2008:    /* PUNCTUATION SPACE */
1943          case 0x2009:    /* THIN SPACE */
1944          case 0x200A:    /* HAIR SPACE */
1945          case 0x202f:    /* NARROW NO-BREAK SPACE */
1946          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1947          case 0x3000:    /* IDEOGRAPHIC SPACE */
1948          MRRETURN(MATCH_NOMATCH);
1949          }
1950        ecode++;
1951        break;
1952    
1953        case OP_HSPACE:
1954        if (eptr >= md->end_subject)
1955          {
1956          SCHECK_PARTIAL();
1957          MRRETURN(MATCH_NOMATCH);
1958          }
1959        GETCHARINCTEST(c, eptr);
1960        switch(c)
1961          {
1962          default: MRRETURN(MATCH_NOMATCH);
1963          case 0x09:      /* HT */
1964          case 0x20:      /* SPACE */
1965          case 0xa0:      /* NBSP */
1966          case 0x1680:    /* OGHAM SPACE MARK */
1967          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1968          case 0x2000:    /* EN QUAD */
1969          case 0x2001:    /* EM QUAD */
1970          case 0x2002:    /* EN SPACE */
1971          case 0x2003:    /* EM SPACE */
1972          case 0x2004:    /* THREE-PER-EM SPACE */
1973          case 0x2005:    /* FOUR-PER-EM SPACE */
1974          case 0x2006:    /* SIX-PER-EM SPACE */
1975          case 0x2007:    /* FIGURE SPACE */
1976          case 0x2008:    /* PUNCTUATION SPACE */
1977          case 0x2009:    /* THIN SPACE */
1978          case 0x200A:    /* HAIR SPACE */
1979          case 0x202f:    /* NARROW NO-BREAK SPACE */
1980          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1981          case 0x3000:    /* IDEOGRAPHIC SPACE */
1982          break;
1983          }
1984        ecode++;
1985        break;
1986    
1987        case OP_NOT_VSPACE:
1988        if (eptr >= md->end_subject)
1989          {
1990          SCHECK_PARTIAL();
1991          MRRETURN(MATCH_NOMATCH);
1992          }
1993        GETCHARINCTEST(c, eptr);
1994        switch(c)
1995          {
1996          default: break;
1997          case 0x0a:      /* LF */
1998          case 0x0b:      /* VT */
1999          case 0x0c:      /* FF */
2000          case 0x0d:      /* CR */
2001          case 0x85:      /* NEL */
2002          case 0x2028:    /* LINE SEPARATOR */
2003          case 0x2029:    /* PARAGRAPH SEPARATOR */
2004          MRRETURN(MATCH_NOMATCH);
2005          }
2006        ecode++;
2007        break;
2008    
2009        case OP_VSPACE:
2010        if (eptr >= md->end_subject)
2011          {
2012          SCHECK_PARTIAL();
2013          MRRETURN(MATCH_NOMATCH);
2014          }
2015        GETCHARINCTEST(c, eptr);
2016        switch(c)
2017          {
2018          default: MRRETURN(MATCH_NOMATCH);
2019          case 0x0a:      /* LF */
2020          case 0x0b:      /* VT */
2021          case 0x0c:      /* FF */
2022          case 0x0d:      /* CR */
2023          case 0x85:      /* NEL */
2024          case 0x2028:    /* LINE SEPARATOR */
2025          case 0x2029:    /* PARAGRAPH SEPARATOR */
2026          break;
2027          }
2028      ecode++;      ecode++;
2029      break;      break;
2030    
# Line 1382  for (;;) Line 2034  for (;;)
2034    
2035      case OP_PROP:      case OP_PROP:
2036      case OP_NOTPROP:      case OP_NOTPROP:
2037      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2038          {
2039          SCHECK_PARTIAL();
2040          MRRETURN(MATCH_NOMATCH);
2041          }
2042      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2043        {        {
2044        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2045    
2046        switch(ecode[1])        switch(ecode[1])
2047          {          {
2048          case PT_ANY:          case PT_ANY:
2049          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2050          break;          break;
2051    
2052          case PT_LAMP:          case PT_LAMP:
2053          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2054               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2055               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2056            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2057           break;           break;
2058    
2059          case PT_GC:          case PT_GC:
2060          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2061            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2062          break;          break;
2063    
2064          case PT_PC:          case PT_PC:
2065          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2066            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2067          break;          break;
2068    
2069          case PT_SC:          case PT_SC:
2070          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2071            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2072          break;          break;
2073    
2074          default:          default:
2075          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
2076          }          }
2077    
2078        ecode += 3;        ecode += 3;
# Line 1429  for (;;) Line 2083  for (;;)
2083      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2084    
2085      case OP_EXTUNI:      case OP_EXTUNI:
2086      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2087          {
2088          SCHECK_PARTIAL();
2089          MRRETURN(MATCH_NOMATCH);
2090          }
2091      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2092        {        {
2093        int chartype, script;        int category = UCD_CATEGORY(c);
2094        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2095        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2096          {          {
2097          int len = 1;          int len = 1;
# Line 1442  for (;;) Line 2099  for (;;)
2099            {            {
2100            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2101            }            }
2102          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2103          if (category != ucp_M) break;          if (category != ucp_M) break;
2104          eptr += len;          eptr += len;
2105          }          }
# Line 1463  for (;;) Line 2120  for (;;)
2120      case OP_REF:      case OP_REF:
2121        {        {
2122        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2123        ecode += 3;                                 /* Advance past item */        ecode += 3;
2124    
2125          /* If the reference is unset, there are two possibilities:
2126    
2127        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
2128        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
2129        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
2130        minima. */        quantifiers with zero minima.
2131    
2132        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
2133          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
2134          md->offset_vector[offset+1] - md->offset_vector[offset];  
2135          Otherwise, set the length to the length of what was matched by the
2136          referenced subpattern. */
2137    
2138          if (offset >= offset_top || md->offset_vector[offset] < 0)
2139            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2140          else
2141            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2142    
2143        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2144    
# Line 1501  for (;;) Line 2167  for (;;)
2167          break;          break;
2168    
2169          default:               /* No repeat follows */          default:               /* No repeat follows */
2170          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2171              {
2172              CHECK_PARTIAL();
2173              MRRETURN(MATCH_NOMATCH);
2174              }
2175          eptr += length;          eptr += length;
2176          continue;              /* With the main loop */          continue;              /* With the main loop */
2177          }          }
# Line 1517  for (;;) Line 2187  for (;;)
2187    
2188        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2189          {          {
2190          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2191              {
2192              CHECK_PARTIAL();
2193              MRRETURN(MATCH_NOMATCH);
2194              }
2195          eptr += length;          eptr += length;
2196          }          }
2197    
# Line 1532  for (;;) Line 2206  for (;;)
2206          {          {
2207          for (fi = min;; fi++)          for (fi = min;; fi++)
2208            {            {
2209            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2210            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2211            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2212              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2213                {
2214                CHECK_PARTIAL();
2215                MRRETURN(MATCH_NOMATCH);
2216                }
2217            eptr += length;            eptr += length;
2218            }            }
2219          /* Control never gets here */          /* Control never gets here */
# Line 1548  for (;;) Line 2226  for (;;)
2226          pp = eptr;          pp = eptr;
2227          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2228            {            {
2229            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2230                {
2231                CHECK_PARTIAL();
2232                break;
2233                }
2234            eptr += length;            eptr += length;
2235            }            }
2236          while (eptr >= pp)          while (eptr >= pp)
2237            {            {
2238            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2239            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2240            eptr -= length;            eptr -= length;
2241            }            }
2242          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2243          }          }
2244        }        }
2245      /* Control never gets here */      /* Control never gets here */
2246    
   
   
2247      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2248      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2249      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1618  for (;;) Line 2298  for (;;)
2298          {          {
2299          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2300            {            {
2301            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2302                {
2303                SCHECK_PARTIAL();
2304                MRRETURN(MATCH_NOMATCH);
2305                }
2306            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2307            if (c > 255)            if (c > 255)
2308              {              {
2309              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2310              }              }
2311            else            else
2312              {              {
2313              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2314              }              }
2315            }            }
2316          }          }
# Line 1636  for (;;) Line 2320  for (;;)
2320          {          {
2321          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2322            {            {
2323            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2324                {
2325                SCHECK_PARTIAL();
2326                MRRETURN(MATCH_NOMATCH);
2327                }
2328            c = *eptr++;            c = *eptr++;
2329            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2330            }            }
2331          }          }
2332    
# Line 1658  for (;;) Line 2346  for (;;)
2346            {            {
2347            for (fi = min;; fi++)            for (fi = min;; fi++)
2348              {              {
2349              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2350              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2351              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2352                if (eptr >= md->end_subject)
2353                  {
2354                  SCHECK_PARTIAL();
2355                  MRRETURN(MATCH_NOMATCH);
2356                  }
2357              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2358              if (c > 255)              if (c > 255)
2359                {                {
2360                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2361                }                }
2362              else              else
2363                {                {
2364                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2365                }                }
2366              }              }
2367            }            }
# Line 1678  for (;;) Line 2371  for (;;)
2371            {            {
2372            for (fi = min;; fi++)            for (fi = min;; fi++)
2373              {              {
2374              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2375              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2376              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2377                if (eptr >= md->end_subject)
2378                  {
2379                  SCHECK_PARTIAL();
2380                  MRRETURN(MATCH_NOMATCH);
2381                  }
2382              c = *eptr++;              c = *eptr++;
2383              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2384              }              }
2385            }            }
2386          /* Control never gets here */          /* Control never gets here */
# Line 1701  for (;;) Line 2399  for (;;)
2399            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2400              {              {
2401              int len = 1;              int len = 1;
2402              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2403                  {
2404                  SCHECK_PARTIAL();
2405                  break;
2406                  }
2407              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2408              if (c > 255)              if (c > 255)
2409                {                {
# Line 1715  for (;;) Line 2417  for (;;)
2417              }              }
2418            for (;;)            for (;;)
2419              {              {
2420              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2421              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2422              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2423              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1727  for (;;) Line 2429  for (;;)
2429            {            {
2430            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2431              {              {
2432              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2433                  {
2434                  SCHECK_PARTIAL();
2435                  break;
2436                  }
2437              c = *eptr;              c = *eptr;
2438              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2439              eptr++;              eptr++;
2440              }              }
2441            while (eptr >= pp)            while (eptr >= pp)
2442              {              {
2443              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2444              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2445              eptr--;              eptr--;
2446              }              }
2447            }            }
2448    
2449          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2450          }          }
2451        }        }
2452      /* Control never gets here */      /* Control never gets here */
2453    
2454    
2455      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2456      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2457        mode, because Unicode properties are supported in non-UTF-8 mode. */
2458    
2459  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2460      case OP_XCLASS:      case OP_XCLASS:
# Line 1788  for (;;) Line 2495  for (;;)
2495    
2496        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2497          {          {
2498          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2499          GETCHARINC(c, eptr);            {
2500          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2501              MRRETURN(MATCH_NOMATCH);
2502              }
2503            GETCHARINCTEST(c, eptr);
2504            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2505          }          }
2506    
2507        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1805  for (;;) Line 2516  for (;;)
2516          {          {
2517          for (fi = min;; fi++)          for (fi = min;; fi++)
2518            {            {
2519            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2520            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2523            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2524                SCHECK_PARTIAL();
2525                MRRETURN(MATCH_NOMATCH);
2526                }
2527              GETCHARINCTEST(c, eptr);
2528              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2529            }            }
2530          /* Control never gets here */          /* Control never gets here */
2531          }          }
# Line 1822  for (;;) Line 2538  for (;;)
2538          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2539            {            {
2540            int len = 1;            int len = 1;
2541            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2542            GETCHARLEN(c, eptr, len);              {
2543                SCHECK_PARTIAL();
2544                break;
2545                }
2546              GETCHARLENTEST(c, eptr, len);
2547            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2548            eptr += len;            eptr += len;
2549            }            }
2550          for(;;)          for(;;)
2551            {            {
2552            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2553            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2554            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2555            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2556            }            }
2557          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2558          }          }
2559    
2560        /* Control never gets here */        /* Control never gets here */
# Line 1850  for (;;) Line 2570  for (;;)
2570        length = 1;        length = 1;
2571        ecode++;        ecode++;
2572        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2573        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2574        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2575            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2576            MRRETURN(MATCH_NOMATCH);
2577            }
2578          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2579        }        }
2580      else      else
2581  #endif  #endif
2582    
2583      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2584        {        {
2585        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2586        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2587            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2588            MRRETURN(MATCH_NOMATCH);
2589            }
2590          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2591        ecode += 2;        ecode += 2;
2592        }        }
2593      break;      break;
# Line 1874  for (;;) Line 2602  for (;;)
2602        ecode++;        ecode++;
2603        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2604    
2605        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2606            {
2607            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2608            MRRETURN(MATCH_NOMATCH);
2609            }
2610    
2611        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2612        can use the fast lookup table. */        can use the fast lookup table. */
2613    
2614        if (fc < 128)        if (fc < 128)
2615          {          {
2616          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2617          }          }
2618    
2619        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
2620    
2621        else        else
2622          {          {
2623          int dc;          unsigned int dc;
2624          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2625          ecode += length;          ecode += length;
2626    
# Line 1898  for (;;) Line 2630  for (;;)
2630          if (fc != dc)          if (fc != dc)
2631            {            {
2632  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2633            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2634  #endif  #endif
2635              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2636            }            }
2637          }          }
2638        }        }
# Line 1909  for (;;) Line 2641  for (;;)
2641    
2642      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2643        {        {
2644        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2645        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2646            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2647            MRRETURN(MATCH_NOMATCH);
2648            }
2649          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2650        ecode += 2;        ecode += 2;
2651        }        }
2652      break;      break;
2653    
2654      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2655    
2656      case OP_EXACT:      case OP_EXACT:
2657      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2658      ecode += 3;      ecode += 3;
2659      goto REPEATCHAR;      goto REPEATCHAR;
2660    
2661        case OP_POSUPTO:
2662        possessive = TRUE;
2663        /* Fall through */
2664    
2665      case OP_UPTO:      case OP_UPTO:
2666      case OP_MINUPTO:      case OP_MINUPTO:
2667      min = 0;      min = 0;
# Line 1930  for (;;) Line 2670  for (;;)
2670      ecode += 3;      ecode += 3;
2671      goto REPEATCHAR;      goto REPEATCHAR;
2672    
2673        case OP_POSSTAR:
2674        possessive = TRUE;
2675        min = 0;
2676        max = INT_MAX;
2677        ecode++;
2678        goto REPEATCHAR;
2679    
2680        case OP_POSPLUS:
2681        possessive = TRUE;
2682        min = 1;
2683        max = INT_MAX;
2684        ecode++;
2685        goto REPEATCHAR;
2686    
2687        case OP_POSQUERY:
2688        possessive = TRUE;
2689        min = 0;
2690        max = 1;
2691        ecode++;
2692        goto REPEATCHAR;
2693    
2694      case OP_STAR:      case OP_STAR:
2695      case OP_MINSTAR:      case OP_MINSTAR:
2696      case OP_PLUS:      case OP_PLUS:
# Line 1938  for (;;) Line 2699  for (;;)
2699      case OP_MINQUERY:      case OP_MINQUERY:
2700      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2701      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2702    
2703      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2704      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2705      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2706    
2707      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2708    
2709      REPEATCHAR:      REPEATCHAR:
2710  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1953  for (;;) Line 2713  for (;;)
2713        length = 1;        length = 1;
2714        charptr = ecode;        charptr = ecode;
2715        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2716        ecode += length;        ecode += length;
2717    
2718        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 1961  for (;;) Line 2720  for (;;)
2720    
2721        if (length > 1)        if (length > 1)
2722          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2723  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2724          int othercase;          unsigned int othercase;
2725          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2726              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = UCD_OTHERCASE(fc)) != fc)
              othercase >= 0)  
2727            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2728            else oclength = 0;
2729  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2730    
2731          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2732            {            {
2733            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2734            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2735            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2736              else if (oclength > 0 &&
2737                       eptr <= md->end_subject - oclength &&
2738                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2739    #endif  /* SUPPORT_UCP */
2740            else            else
2741              {              {
2742              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2743              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2744              }              }
2745            }            }
2746    
# Line 1990  for (;;) Line 2750  for (;;)
2750            {            {
2751            for (fi = min;; fi++)            for (fi = min;; fi++)
2752              {              {
2753              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2754              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2755              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2756              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2757              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2758              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2759                else if (oclength > 0 &&
2760                         eptr <= md->end_subject - oclength &&
2761                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2762    #endif  /* SUPPORT_UCP */
2763              else              else
2764                {                {
2765                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2766                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2767                }                }
2768              }              }
2769            /* Control never gets here */            /* Control never gets here */
2770            }            }
2771          else  
2772            else  /* Maximize */
2773            {            {
2774            pp = eptr;            pp = eptr;
2775            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2776              {              {
2777              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2778              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2779              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2780                else if (oclength > 0 &&
2781                         eptr <= md->end_subject - oclength &&
2782                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2783    #endif  /* SUPPORT_UCP */
2784              else              else
2785                {                {
2786                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2787                eptr += oclength;                break;
2788                }                }
2789              }              }
2790            while (eptr >= pp)  
2791             {            if (possessive) continue;
2792             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);  
2793             if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for(;;)
2794             eptr -= length;              {
2795             }              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2796            RRETURN(MATCH_NOMATCH);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2797                if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2798    #ifdef SUPPORT_UCP
2799                eptr--;
2800                BACKCHAR(eptr);
2801    #else   /* without SUPPORT_UCP */
2802                eptr -= length;
2803    #endif  /* SUPPORT_UCP */
2804                }
2805            }            }
2806          /* Control never gets here */          /* Control never gets here */
2807          }          }
# Line 2037  for (;;) Line 2814  for (;;)
2814  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2815    
2816      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2817        {  
2818        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2819    
2820      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2821      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2058  for (;;) Line 2833  for (;;)
2833        {        {
2834        fc = md->lcc[fc];        fc = md->lcc[fc];
2835        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2836          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2837            if (eptr >= md->end_subject)
2838              {
2839              SCHECK_PARTIAL();
2840              MRRETURN(MATCH_NOMATCH);
2841              }
2842            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2843            }
2844        if (min == max) continue;        if (min == max) continue;
2845        if (minimize)        if (minimize)
2846          {          {
2847          for (fi = min;; fi++)          for (fi = min;; fi++)
2848            {            {
2849            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2850            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2851            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2852                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2853              RRETURN(MATCH_NOMATCH);              {
2854                SCHECK_PARTIAL();
2855                MRRETURN(MATCH_NOMATCH);
2856                }
2857              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2858            }            }
2859          /* Control never gets here */          /* Control never gets here */
2860          }          }
2861        else        else  /* Maximize */
2862          {          {
2863          pp = eptr;          pp = eptr;
2864          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2865            {            {
2866            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2867                {
2868                SCHECK_PARTIAL();
2869                break;
2870                }
2871              if (fc != md->lcc[*eptr]) break;
2872            eptr++;            eptr++;
2873            }            }
2874    
2875            if (possessive) continue;
2876    
2877          while (eptr >= pp)          while (eptr >= pp)
2878            {            {
2879            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2880            eptr--;            eptr--;
2881            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2882            }            }
2883          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2884          }          }
2885        /* Control never gets here */        /* Control never gets here */
2886        }        }
# Line 2095  for (;;) Line 2889  for (;;)
2889    
2890      else      else
2891        {        {
2892        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2893            {
2894            if (eptr >= md->end_subject)
2895              {
2896              SCHECK_PARTIAL();
2897              MRRETURN(MATCH_NOMATCH);
2898              }
2899            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2900            }
2901    
2902        if (min == max) continue;        if (min == max) continue;
2903    
2904        if (minimize)        if (minimize)
2905          {          {
2906          for (fi = min;; fi++)          for (fi = min;; fi++)
2907            {            {
2908            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2909            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2911              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2912                {
2913                SCHECK_PARTIAL();
2914                MRRETURN(MATCH_NOMATCH);
2915                }
2916              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2917            }            }
2918          /* Control never gets here */          /* Control never gets here */
2919          }          }
2920        else        else  /* Maximize */
2921          {          {
2922          pp = eptr;          pp = eptr;
2923          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2924            {            {
2925            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2926                {
2927                SCHECK_PARTIAL();
2928                break;
2929                }
2930              if (fc != *eptr) break;
2931            eptr++;            eptr++;
2932            }            }
2933            if (possessive) continue;
2934    
2935          while (eptr >= pp)          while (eptr >= pp)
2936            {            {
2937            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2938            eptr--;            eptr--;
2939            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2940            }            }
2941          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2942          }          }
2943        }        }
2944      /* Control never gets here */      /* Control never gets here */
# Line 2131  for (;;) Line 2947  for (;;)
2947      checking can be multibyte. */      checking can be multibyte. */
2948    
2949      case OP_NOT:      case OP_NOT:
2950      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2951          {
2952          SCHECK_PARTIAL();
2953          MRRETURN(MATCH_NOMATCH);
2954          }
2955      ecode++;      ecode++;
2956      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2957      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2140  for (;;) Line 2960  for (;;)
2960        if (c < 256)        if (c < 256)
2961  #endif  #endif
2962        c = md->lcc[c];        c = md->lcc[c];
2963        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
2964        }        }
2965      else      else
2966        {        {
2967        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
2968        }        }
2969      break;      break;
2970    
# Line 2168  for (;;) Line 2988  for (;;)
2988      ecode += 3;      ecode += 3;
2989      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2990    
2991        case OP_NOTPOSSTAR:
2992        possessive = TRUE;
2993        min = 0;
2994        max = INT_MAX;
2995        ecode++;
2996        goto REPEATNOTCHAR;
2997    
2998        case OP_NOTPOSPLUS:
2999        possessive = TRUE;
3000        min = 1;
3001        max = INT_MAX;
3002        ecode++;
3003        goto REPEATNOTCHAR;
3004    
3005        case OP_NOTPOSQUERY:
3006        possessive = TRUE;
3007        min = 0;
3008        max = 1;
3009        ecode++;
3010        goto REPEATNOTCHAR;
3011    
3012        case OP_NOTPOSUPTO:
3013        possessive = TRUE;
3014        min = 0;
3015        max = GET2(ecode, 1);
3016        ecode += 3;
3017        goto REPEATNOTCHAR;
3018    
3019      case OP_NOTSTAR:      case OP_NOTSTAR:
3020      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3021      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2180  for (;;) Line 3028  for (;;)
3028      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3029      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3030    
3031      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3032    
3033      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3034      fc = *ecode++;      fc = *ecode++;
3035    
3036      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2207  for (;;) Line 3052  for (;;)
3052        /* UTF-8 mode */        /* UTF-8 mode */
3053        if (utf8)        if (utf8)
3054          {          {
3055          register int d;          register unsigned int d;
3056          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3057            {            {
3058              if (eptr >= md->end_subject)
3059                {
3060                SCHECK_PARTIAL();
3061                MRRETURN(MATCH_NOMATCH);
3062                }
3063            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3064            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3065            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3066            }            }
3067          }          }
3068        else        else
# Line 2221  for (;;) Line 3071  for (;;)
3071        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3072          {          {
3073          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3074            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3075              if (eptr >= md->end_subject)
3076                {
3077                SCHECK_PARTIAL();
3078                MRRETURN(MATCH_NOMATCH);
3079                }
3080              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3081              }
3082          }          }
3083    
3084        if (min == max) continue;        if (min == max) continue;
# Line 2232  for (;;) Line 3089  for (;;)
3089          /* UTF-8 mode */          /* UTF-8 mode */
3090          if (utf8)          if (utf8)
3091            {            {
3092            register int d;            register unsigned int d;
3093            for (fi = min;; fi++)            for (fi = min;; fi++)
3094              {              {
3095              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3096              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3097                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3098                if (eptr >= md->end_subject)
3099                  {
3100                  SCHECK_PARTIAL();
3101                  MRRETURN(MATCH_NOMATCH);
3102                  }
3103              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3104              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3105              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3106              }              }
3107            }            }
3108          else          else
# Line 2249  for (;;) Line 3111  for (;;)
3111            {            {
3112            for (fi = min;; fi++)            for (fi = min;; fi++)
3113              {              {
3114              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3115              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3116              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3117                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3118                  {
3119                  SCHECK_PARTIAL();
3120                  MRRETURN(MATCH_NOMATCH);
3121                  }
3122                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3123              }              }
3124            }            }
3125          /* Control never gets here */          /* Control never gets here */
# Line 2268  for (;;) Line 3135  for (;;)
3135          /* UTF-8 mode */          /* UTF-8 mode */
3136          if (utf8)          if (utf8)
3137            {            {
3138            register int d;            register unsigned int d;
3139            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3140              {              {
3141              int len = 1;              int len = 1;
3142              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3143              GETCHARLEN(d, eptr, len);                {
3144                  SCHECK_PARTIAL();
3145                  break;
3146                  }
3147                GETCHARLEN(d, eptr, len);
3148              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3149              if (fc == d) break;              if (fc == d) break;
3150              eptr += len;              eptr += len;
3151              }              }
3152            for(;;)          if (possessive) continue;
3153            for(;;)
3154              {              {
3155              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3156              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3157              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3158              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2292  for (;;) Line 3164  for (;;)
3164            {            {
3165            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3166              {              {
3167              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3168                  {
3169                  SCHECK_PARTIAL();
3170                  break;
3171                  }
3172                if (fc == md->lcc[*eptr]) break;
3173              eptr++;              eptr++;
3174              }              }
3175              if (possessive) continue;
3176            while (eptr >= pp)            while (eptr >= pp)
3177              {              {
3178              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3179              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3180              eptr--;              eptr--;
3181              }              }
3182            }            }
3183    
3184          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3185          }          }
3186        /* Control never gets here */        /* Control never gets here */
3187        }        }
# Line 2316  for (;;) Line 3194  for (;;)
3194        /* UTF-8 mode */        /* UTF-8 mode */
3195        if (utf8)        if (utf8)
3196          {          {
3197          register int d;          register unsigned int d;
3198          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3199            {            {
3200              if (eptr >= md->end_subject)
3201                {
3202                SCHECK_PARTIAL();
3203                MRRETURN(MATCH_NOMATCH);
3204                }
3205            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3206            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3207            }            }
3208          }          }
3209        else        else
# Line 2328  for (;;) Line 3211  for (;;)
3211        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3212          {          {
3213          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3214            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3215              if (eptr >= md->end_subject)
3216                {
3217                SCHECK_PARTIAL();
3218                MRRETURN(MATCH_NOMATCH);
3219                }
3220              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3221              }
3222          }          }
3223    
3224        if (min == max) continue;        if (min == max) continue;
# Line 2339  for (;;) Line 3229  for (;;)
3229          /* UTF-8 mode */          /* UTF-8 mode */
3230          if (utf8)          if (utf8)
3231            {            {
3232            register int d;            register unsigned int d;
3233            for (fi = min;; fi++)            for (fi = min;; fi++)
3234              {              {
3235              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3236              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3237                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3238                if (eptr >= md->end_subject)
3239                  {
3240                  SCHECK_PARTIAL();
3241                  MRRETURN(MATCH_NOMATCH);
3242                  }
3243              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3244              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3245              }              }
3246            }            }
3247          else          else
# Line 2355  for (;;) Line 3250  for (;;)
3250            {            {
3251            for (fi = min;; fi++)            for (fi = min;; fi++)
3252              {              {
3253              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3254              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3255              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3256                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3257                  {
3258                  SCHECK_PARTIAL();
3259                  MRRETURN(MATCH_NOMATCH);
3260                  }
3261                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3262              }              }
3263            }            }
3264          /* Control never gets here */          /* Control never gets here */
# Line 2374  for (;;) Line 3274  for (;;)
3274          /* UTF-8 mode */          /* UTF-8 mode */
3275          if (utf8)          if (utf8)
3276            {            {
3277            register int d;            register unsigned int d;
3278            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3279              {              {
3280              int len = 1;              int len = 1;
3281              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3282                  {
3283                  SCHECK_PARTIAL();
3284                  break;
3285                  }
3286              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3287              if (fc == d) break;              if (fc == d) break;
3288              eptr += len;              eptr += len;
3289              }              }
3290              if (possessive) continue;
3291            for(;;)            for(;;)
3292              {              {
3293              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3294              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3295              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3296              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2397  for (;;) Line 3302  for (;;)
3302            {            {
3303            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3304              {              {
3305              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3306                  {
3307                  SCHECK_PARTIAL();
3308                  break;
3309                  }
3310                if (fc == *eptr) break;
3311              eptr++;              eptr++;
3312              }              }
3313              if (possessive) continue;
3314            while (eptr >= pp)            while (eptr >= pp)
3315              {              {
3316              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3317              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3318              eptr--;              eptr--;
3319              }              }
3320            }            }
3321    
3322          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3323          }          }
3324        }        }
3325      /* Control never gets here */      /* Control never gets here */
# Line 2431  for (;;) Line 3342  for (;;)
3342      ecode += 3;      ecode += 3;
3343      goto REPEATTYPE;      goto REPEATTYPE;
3344    
3345        case OP_TYPEPOSSTAR:
3346        possessive = TRUE;
3347        min = 0;
3348        max = INT_MAX;
3349        ecode++;
3350        goto REPEATTYPE;
3351    
3352        case OP_TYPEPOSPLUS:
3353        possessive = TRUE;
3354        min = 1;
3355        max = INT_MAX;
3356        ecode++;
3357        goto REPEATTYPE;
3358    
3359        case OP_TYPEPOSQUERY:
3360        possessive = TRUE;
3361        min = 0;
3362        max = 1;
3363        ecode++;
3364        goto REPEATTYPE;
3365    
3366        case OP_TYPEPOSUPTO:
3367        possessive = TRUE;
3368        min = 0;
3369        max = GET2(ecode, 1);
3370        ecode += 3;
3371        goto REPEATTYPE;
3372    
3373      case OP_TYPESTAR:      case OP_TYPESTAR:
3374      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
3375      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2462  for (;;) Line 3401  for (;;)
3401    
3402      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3403      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3404      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3405      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3406      and single-bytes. */      and single-bytes. */
3407    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3408      if (min > 0)      if (min > 0)
3409        {        {
3410  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2477  for (;;) Line 3413  for (;;)
3413          switch(prop_type)          switch(prop_type)
3414            {            {
3415            case PT_ANY:            case PT_ANY:
3416            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3417            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3418              {              {
3419              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3420              GETCHARINC(c, eptr);                {
3421                  SCHECK_PARTIAL();
3422                  MRRETURN(MATCH_NOMATCH);
3423                  }
3424                GETCHARINCTEST(c, eptr);
3425              }              }
3426            break;            break;
3427    
3428            case PT_LAMP:            case PT_LAMP:
3429            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3430              {              {
3431              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3432              GETCHARINC(c, eptr);                {
3433              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3434                  MRRETURN(MATCH_NOMATCH);
3435                  }
3436                GETCHARINCTEST(c, eptr);
3437                prop_chartype = UCD_CHARTYPE(c);
3438              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3439                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3440                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3441                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3442              }              }
3443            break;            break;
3444    
3445            case PT_GC:            case PT_GC:
3446            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3447              {              {
3448              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3449              GETCHARINC(c, eptr);                {
3450              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3451                  MRRETURN(MATCH_NOMATCH);
3452                  }
3453                GETCHARINCTEST(c, eptr);
3454                prop_category = UCD_CATEGORY(c);
3455              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3456                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3457              }              }
3458            break;            break;
3459    
3460            case PT_PC:            case PT_PC:
3461            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3462              {              {
3463              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3464              GETCHARINC(c, eptr);                {
3465              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3466                  MRRETURN(MATCH_NOMATCH);
3467                  }
3468                GETCHARINCTEST(c, eptr);
3469                prop_chartype = UCD_CHARTYPE(c);
3470              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3471                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3472              }              }
3473            break;            break;
3474    
3475            case PT_SC:            case PT_SC:
3476            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3477              {              {
3478              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3479              GETCHARINC(c, eptr);                {
3480              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3481                  MRRETURN(MATCH_NOMATCH);
3482                  }
3483                GETCHARINCTEST(c, eptr);
3484                prop_script = UCD_SCRIPT(c);
3485              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3486                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3487              }              }
3488            break;            break;
3489    
3490            default:            default:
3491            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3492            }            }
3493          }          }
3494    
# Line 2544  for (;;) Line 3499  for (;;)
3499          {          {
3500          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3501            {            {
3502              if (eptr >= md->end_subject)
3503                {
3504                SCHECK_PARTIAL();
3505                MRRETURN(MATCH_NOMATCH);
3506                }
3507            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3508            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3509            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3510            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3511              {              {
3512              int len = 1;              int len = 1;
3513              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3514                {                else { GETCHARLEN(c, eptr, len); }
3515                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3516              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3517              eptr += len;              eptr += len;
3518              }              }
# Line 2572  for (;;) Line 3530  for (;;)
3530          case OP_ANY:          case OP_ANY:
3531          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3532            {            {
3533            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3534               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))              {
3535              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3536                MRRETURN(MATCH_NOMATCH);
3537                }
3538              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3539              eptr++;
3540              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3541              }
3542            break;
3543    
3544            case OP_ALLANY:
3545            for (i = 1; i <= min; i++)
3546              {
3547              if (eptr >= md->end_subject)
3548                {
3549                SCHECK_PARTIAL();
3550                MRRETURN(MATCH_NOMATCH);
3551                }
3552              eptr++;
3553            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3554            }            }
3555          break;          break;
3556    
3557          case OP_ANYBYTE:          case OP_ANYBYTE:
3558            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3559          eptr += min;          eptr += min;
3560          break;          break;
3561    
3562            case OP_ANYNL:
3563            for (i = 1; i <= min; i++)
3564              {
3565              if (eptr >= md->end_subject)
3566                {
3567                SCHECK_PARTIAL();
3568                MRRETURN(MATCH_NOMATCH);
3569                }
3570              GETCHARINC(c, eptr);
3571              switch(c)
3572                {
3573                default: MRRETURN(MATCH_NOMATCH);
3574                case 0x000d:
3575                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3576                break;
3577    
3578                case 0x000a:
3579                break;
3580    
3581                case 0x000b:
3582                case 0x000c:
3583                case 0x0085:
3584                case 0x2028:
3585                case 0x2029:
3586                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3587                break;
3588                }
3589              }
3590            break;
3591    
3592            case OP_NOT_HSPACE:
3593            for (i = 1; i <= min; i++)
3594              {
3595              if (eptr >= md->end_subject)
3596                {
3597                SCHECK_PARTIAL();
3598                MRRETURN(MATCH_NOMATCH);
3599                }
3600              GETCHARINC(c, eptr);
3601              switch(c)
3602                {
3603                default: break;
3604                case 0x09:      /* HT */
3605                case 0x20:      /* SPACE */
3606                case 0xa0:      /* NBSP */
3607                case 0x1680:    /* OGHAM SPACE MARK */
3608                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3609                case 0x2000:    /* EN QUAD */
3610                case 0x2001:    /* EM QUAD */
3611                case 0x2002:    /* EN SPACE */
3612                case 0x2003:    /* EM SPACE */
3613                case 0x2004:    /* THREE-PER-EM SPACE */
3614                case 0x2005:    /* FOUR-PER-EM SPACE */
3615                case 0x2006:    /* SIX-PER-EM SPACE */
3616                case 0x2007:    /* FIGURE SPACE */
3617                case 0x2008:    /* PUNCTUATION SPACE */
3618                case 0x2009:    /* THIN SPACE */
3619                case 0x200A:    /* HAIR SPACE */
3620                case 0x202f:    /* NARROW NO-BREAK SPACE */
3621                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3622                case 0x3000:    /* IDEOGRAPHIC SPACE */
3623                MRRETURN(MATCH_NOMATCH);
3624                }
3625              }
3626            break;
3627    
3628            case OP_HSPACE:
3629            for (i = 1; i <= min; i++)
3630              {
3631              if (eptr >= md->end_subject)
3632                {
3633                SCHECK_PARTIAL();
3634                MRRETURN(MATCH_NOMATCH);
3635                }
3636              GETCHARINC(c, eptr);
3637              switch(c)
3638                {
3639                default: MRRETURN(MATCH_NOMATCH);
3640                case 0x09:      /* HT */
3641                case 0x20:      /* SPACE */
3642                case 0xa0:      /* NBSP */
3643                case 0x1680:    /* OGHAM SPACE MARK */
3644                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3645                case 0x2000:    /* EN QUAD */
3646                case 0x2001:    /* EM QUAD */
3647                case 0x2002:    /* EN SPACE */
3648                case 0x2003:    /* EM SPACE */
3649                case 0x2004:    /* THREE-PER-EM SPACE */
3650                case 0x2005:    /* FOUR-PER-EM SPACE */
3651                case 0x2006:    /* SIX-PER-EM SPACE */
3652                case 0x2007:    /* FIGURE SPACE */
3653                case 0x2008:    /* PUNCTUATION SPACE */
3654                case 0x2009:    /* THIN SPACE */
3655                case 0x200A:    /* HAIR SPACE */
3656                case 0x202f:    /* NARROW NO-BREAK SPACE */
3657                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3658                case 0x3000:    /* IDEOGRAPHIC SPACE */
3659                break;
3660                }
3661              }
3662            break;
3663    
3664            case OP_NOT_VSPACE:
3665            for (i = 1; i <= min; i++)
3666              {
3667              if (eptr >= md->end_subject)
3668                {
3669                SCHECK_PARTIAL();
3670                MRRETURN(MATCH_NOMATCH);
3671                }
3672              GETCHARINC(c, eptr);
3673              switch(c)
3674                {
3675                default: break;
3676                case 0x0a:      /* LF */
3677                case 0x0b:      /* VT */
3678                case 0x0c:      /* FF */
3679                case 0x0d:      /* CR */
3680                case 0x85:      /* NEL */
3681                case 0x2028:    /* LINE SEPARATOR */
3682                case 0x2029:    /* PARAGRAPH SEPARATOR */
3683                MRRETURN(MATCH_NOMATCH);
3684                }
3685              }
3686            break;
3687    
3688            case OP_VSPACE:
3689            for (i = 1; i <= min; i++)
3690              {
3691              if (eptr >= md->end_subject)
3692                {
3693                SCHECK_PARTIAL();
3694                MRRETURN(MATCH_NOMATCH);
3695                }
3696              GETCHARINC(c, eptr);
3697              switch(c)
3698                {
3699                default: MRRETURN(MATCH_NOMATCH);
3700                case 0x0a:      /* LF */
3701                case 0x0b:      /* VT */
3702                case 0x0c:      /* FF */
3703                case 0x0d:      /* CR */
3704                case 0x85:      /* NEL */
3705                case 0x2028:    /* LINE SEPARATOR */
3706                case 0x2029:    /* PARAGRAPH SEPARATOR */
3707                break;
3708                }
3709              }
3710            break;
3711    
3712          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3713          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3714            {            {
3715            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3716                {
3717                SCHECK_PARTIAL();
3718                MRRETURN(MATCH_NOMATCH);
3719                }
3720            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3721            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3722              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3723            }            }
3724          break;          break;
3725    
3726          case OP_DIGIT:          case OP_DIGIT:
3727          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3728            {            {
3729            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3730               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3731              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3732                MRRETURN(MATCH_NOMATCH);
3733                }
3734              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3735                MRRETURN(MATCH_NOMATCH);
3736            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3737            }            }
3738          break;          break;
# Line 2606  for (;;) Line 3740  for (;;)
3740          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3741          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3742            {            {
3743            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3744               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3745              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3746            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3747                }
3748              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3749                MRRETURN(MATCH_NOMATCH);
3750              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3751            }            }
3752          break;          break;
3753    
3754          case OP_WHITESPACE:          case OP_WHITESPACE:
3755          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3756            {            {
3757            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3758               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3759              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3760                MRRETURN(MATCH_NOMATCH);
3761                }
3762              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3763                MRRETURN(MATCH_NOMATCH);
3764            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3765            }            }
3766          break;          break;
# Line 2626  for (;;) Line 3768  for (;;)
3768          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3769          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3770            {            {
3771            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3772               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3773              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3774            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3775                }
3776              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3777                MRRETURN(MATCH_NOMATCH);
3778              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3779            }            }
3780          break;          break;
3781    
3782          case OP_WORDCHAR:          case OP_WORDCHAR:
3783          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3784            {            {
3785            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3786               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3787              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3788                MRRETURN(MATCH_NOMATCH);
3789                }
3790              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3791                MRRETURN(MATCH_NOMATCH);
3792            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3793            }            }
3794          break;          break;
# Line 2656  for (;;) Line 3806  for (;;)
3806        switch(ctype)        switch(ctype)
3807          {          {
3808          case OP_ANY:          case OP_ANY:
3809          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3810            {            {
3811            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3812              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
3813                SCHECK_PARTIAL();
3814                MRRETURN(MATCH_NOMATCH);
3815                }
3816