/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 123 by ph10, Mon Mar 12 15:19:06 2007 UTC revision 384 by ph10, Sun Mar 8 16:27:43 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  /* Undefine some potentially clashing cpp symbols */
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
56    
57  #define EPTR_WORK_SIZE (1000)  #undef min
58    #undef max
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 65  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 147  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 183  calls by keeping local variables that ne Line 220  calls by keeping local variables that ne
220  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
221  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
222  always used to.  always used to.
223    
224    The original heap-recursive code used longjmp(). However, it seems that this
225    can be very slow on some operating systems. Following a suggestion from Stan
226    Switzer, the use of longjmp() has been abolished, at the cost of having to
227    provide a unique number for each call to RMATCH. There is no way of generating
228    a sequence of numbers at compile time in C. I have given them names, to make
229    them stand out more clearly.
230    
231    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233    tests. Furthermore, not using longjmp() means that local dynamic variables
234    don't have indeterminate values; this has meant that the frame size can be
235    reduced because the result can be "passed back" by straight setting of the
236    variable instead of being passed in the frame.
237  ****************************************************************************  ****************************************************************************
238  ***************************************************************************/  ***************************************************************************/
239    
240    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241    below must be updated in sync.  */
242    
243    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248           RM51,  RM52, RM53, RM54 };
249    
250  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
251  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
252    actuall used in this definition. */
253    
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256    
257  #ifdef DEBUG  #ifdef DEBUG
258  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259    { \    { \
260    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
261    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
263    }    }
264  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 205  versions and production versions. */ Line 267  versions and production versions. */
267    return ra; \    return ra; \
268    }    }
269  #else  #else
270  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
273  #endif  #endif
274    
275  #else  #else
276    
277    
278  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
279  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280  match(), which never changes. */  argument of match(), which never changes. */
281    
282  #define REGISTER  #define REGISTER
283    
284  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285    {\    {\
286    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
288      {\    newframe->Xeptr = ra;\
289      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
290      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
291      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
292      newframe->Xims = re;\    newframe->Xims = re;\
293      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
294      newframe->Xflags = rg;\    newframe->Xflags = rg;\
295      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
296      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
297      frame = newframe;\    frame = newframe;\
298      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
299      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
300      }\    L_##rw:\
301    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
302    }    }
303    
304  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 251  match(), which never changes. */ Line 308  match(), which never changes. */
308    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
309    if (frame != NULL)\    if (frame != NULL)\
310      {\      {\
311      frame->Xresult = ra;\      rrc = ra;\
312      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
313      }\      }\
314    return ra;\    return ra;\
315    }    }
# Line 268  typedef struct heapframe { Line 324  typedef struct heapframe {
324    
325    const uschar *Xeptr;    const uschar *Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327      const uschar *Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 318  typedef struct heapframe { Line 375  typedef struct heapframe {
375    
376    eptrblock Xnewptrb;    eptrblock Xnewptrb;
377    
378    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
379    
380    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
381    
382  } heapframe;  } heapframe;
383    
# Line 349  made performance worse. Line 405  made performance worse.
405  Arguments:  Arguments:
406     eptr        pointer to current character in subject     eptr        pointer to current character in subject
407     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
408       mstart      pointer to the current match start position (can be modified
409                     by encountering \K)
410     offset_top  current top pointer     offset_top  current top pointer
411     md          pointer to "static" info for the match     md          pointer to "static" info for the match
412     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 358  Arguments: Line 416  Arguments:
416                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
417                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
418                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
419     rdepth      the recursion depth     rdepth      the recursion depth
420    
421  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 368  Returns:       MATCH_MATCH if matched Line 425  Returns:       MATCH_MATCH if matched
425  */  */
426    
427  static int  static int
428  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
429    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
430    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
431  {  {
# Line 396  frame->Xprevframe = NULL;            /* Line 453  frame->Xprevframe = NULL;            /*
453    
454  frame->Xeptr = eptr;  frame->Xeptr = eptr;
455  frame->Xecode = ecode;  frame->Xecode = ecode;
456    frame->Xmstart = mstart;
457  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
458  frame->Xims = ims;  frame->Xims = ims;
459  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 410  HEAP_RECURSE: Line 468  HEAP_RECURSE:
468    
469  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
470  #define ecode              frame->Xecode  #define ecode              frame->Xecode
471    #define mstart             frame->Xmstart
472  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
473  #define ims                frame->Xims  #define ims                frame->Xims
474  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 540  defined). However, RMATCH isn't like a f Line 599  defined). However, RMATCH isn't like a f
599  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
600  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
601    
602    #ifdef SUPPORT_UTF8
603    utf8 = md->utf8;       /* Local copy of the flag */
604    #else
605    utf8 = FALSE;
606    #endif
607    
608  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
609  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
610    
# Line 548  if (rdepth >= md->match_limit_recursion) Line 613  if (rdepth >= md->match_limit_recursion)
613    
614  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
615    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
616  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
617  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
618  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
619  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
620  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
621  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
622  already used. */  block that is used is on the stack, so a new one may be required for each
623    match(). */
624    
625  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
626    {    {
627    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
628    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
629      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
630    }    }
631    
632  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 588  for (;;) Line 641  for (;;)
641    
642    if (md->partial &&    if (md->partial &&
643        eptr >= md->end_subject &&        eptr >= md->end_subject &&
644        eptr > md->start_match)        eptr > mstart)
645      md->hitend = TRUE;      md->hitend = TRUE;
646    
647    switch(op)    switch(op)
648      {      {
649        case OP_FAIL:
650        RRETURN(MATCH_NOMATCH);
651    
652        case OP_PRUNE:
653        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
654          ims, eptrb, flags, RM51);
655        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
656        RRETURN(MATCH_PRUNE);
657    
658        case OP_COMMIT:
659        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
660          ims, eptrb, flags, RM52);
661        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
662        RRETURN(MATCH_COMMIT);
663    
664        case OP_SKIP:
665        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
666          ims, eptrb, flags, RM53);
667        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
668        md->start_match_ptr = eptr;   /* Pass back current position */
669        RRETURN(MATCH_SKIP);
670    
671        case OP_THEN:
672        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
673          ims, eptrb, flags, RM54);
674        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
675        RRETURN(MATCH_THEN);
676    
677      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
678      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
679      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 632  for (;;) Line 713  for (;;)
713        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
714        do        do
715          {          {
716          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717            ims, eptrb, flags);            ims, eptrb, flags, RM1);
718          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
719          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
720          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
721          }          }
# Line 649  for (;;) Line 730  for (;;)
730        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
731        }        }
732    
733      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
734      bracket. */      as a non-capturing bracket. */
735    
736        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
737        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
738    
739      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
740    
741        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
742        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
743    
744      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
745      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
746      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
747      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
748        is set.*/
749    
750      case OP_BRA:      case OP_BRA:
751      case OP_SBRA:      case OP_SBRA:
# Line 665  for (;;) Line 753  for (;;)
753      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
754      for (;;)      for (;;)
755        {        {
756        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
757          {          {
758          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
759          flags |= match_tail_recursed;            {
760          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
761          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
762              goto TAIL_RECURSE;
763              }
764    
765            /* Possibly empty group; can't use tail recursion. */
766    
767            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
768              eptrb, flags, RM48);
769            RRETURN(rrc);
770          }          }
771    
772        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
773        otherwise return. */        otherwise return. */
774    
775        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
776          eptrb, flags);          eptrb, flags, RM2);
777        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
778        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
779        }        }
780      /* Control never reaches here. */      /* Control never reaches here. */
# Line 691  for (;;) Line 787  for (;;)
787    
788      case OP_COND:      case OP_COND:
789      case OP_SCOND:      case OP_SCOND:
790        /* Because of the way auto-callout works during compile, a callout item is
791        inserted between OP_COND and an assertion condition. */
792    
793        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
794          {
795          if (pcre_callout != NULL)
796            {
797            pcre_callout_block cb;
798            cb.version          = 1;   /* Version 1 of the callout block */
799            cb.callout_number   = ecode[LINK_SIZE+2];
800            cb.offset_vector    = md->offset_vector;
801            cb.subject          = (PCRE_SPTR)md->start_subject;
802            cb.subject_length   = md->end_subject - md->start_subject;
803            cb.start_match      = mstart - md->start_subject;
804            cb.current_position = eptr - md->start_subject;
805            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
806            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
807            cb.capture_top      = offset_top/2;
808            cb.capture_last     = md->capture_last;
809            cb.callout_data     = md->callout_data;
810            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
811            if (rrc < 0) RRETURN(rrc);
812            }
813          ecode += _pcre_OP_lengths[OP_CALLOUT];
814          }
815    
816        /* Now see what the actual condition is */
817    
818      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
819        {        {
820        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
# Line 718  for (;;) Line 842  for (;;)
842    
843      else      else
844        {        {
845        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
846            match_condassert);            match_condassert, RM3);
847        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
848          {          {
849          condition = TRUE;          condition = TRUE;
850          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
851          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
852          }          }
853        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
854          {          {
855          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
856          }          }
# Line 738  for (;;) Line 862  for (;;)
862        }        }
863    
864      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
865      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
866      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
867        group. If the second alternative doesn't exist, we can just plough on. */
868    
869      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
870        {        {
871        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
872        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
873        goto TAIL_RECURSE;          {
874            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
875            RRETURN(rrc);
876            }
877          else                       /* Group must match something */
878            {
879            flags = 0;
880            goto TAIL_RECURSE;
881            }
882        }        }
883      else      else                         /* Condition false & no 2nd alternative */
884        {        {
885        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
886        }        }
887      break;      break;
888    
889    
890      /* End of the pattern. If we are in a top-level recursion, we should      /* End of the pattern, either real or forced. If we are in a top-level
891      restore the offsets appropriately and continue from after the call. */      recursion, we should restore the offsets appropriately and continue from
892        after the call. */
893    
894        case OP_ACCEPT:
895      case OP_END:      case OP_END:
896      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
897        {        {
# Line 765  for (;;) Line 900  for (;;)
900        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
901        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
902          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
903        md->start_match = rec->save_start;        mstart = rec->save_start;
904        ims = original_ims;        ims = original_ims;
905        ecode = rec->after_call;        ecode = rec->after_call;
906        break;        break;
# Line 774  for (;;) Line 909  for (;;)
909      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
910      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
911    
912      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
913      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
914      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
915        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
916      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
917    
918      /* Change option settings */      /* Change option settings */
# Line 797  for (;;) Line 933  for (;;)
933      case OP_ASSERTBACK:      case OP_ASSERTBACK:
934      do      do
935        {        {
936        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
937            RM4);
938        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
939        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
940        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
941        }        }
942      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 823  for (;;) Line 960  for (;;)
960      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
961      do      do
962        {        {
963        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
964            RM5);
965        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
966        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
967        ecode += GET(ecode,1);        ecode += GET(ecode,1);
968        }        }
969      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 849  for (;;) Line 987  for (;;)
987          {          {
988          eptr--;          eptr--;
989          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
990          BACKCHAR(eptr)          BACKCHAR(eptr);
991          }          }
992        }        }
993      else      else
# Line 880  for (;;) Line 1018  for (;;)
1018        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1019        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1020        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1021        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1022        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1023        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1024        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 942  for (;;) Line 1080  for (;;)
1080    
1081        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1082              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1083        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
1084        md->start_match = eptr;        mstart = eptr;
1085    
1086        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1087        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 952  for (;;) Line 1090  for (;;)
1090        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1091        do        do
1092          {          {
1093          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1094            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1095          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1096            {            {
1097            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 962  for (;;) Line 1100  for (;;)
1100              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1101            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1102            }            }
1103          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1104            {            {
1105            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1106            RRETURN(rrc);            RRETURN(rrc);
# Line 996  for (;;) Line 1134  for (;;)
1134    
1135      do      do
1136        {        {
1137        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, 0);  
1138        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1139        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1140        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1141        }        }
1142      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1042  for (;;) Line 1179  for (;;)
1179    
1180      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1181        {        {
1182        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1183        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1184        ecode = prev;        ecode = prev;
1185        flags = match_tail_recursed;        flags = 0;
1186        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1187        }        }
1188      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1189        {        {
1190        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1191        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1192        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1193        flags = match_tail_recursed;        flags = 0;
1194        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1195        }        }
1196      /* Control never gets here */      /* Control never gets here */
# Line 1065  for (;;) Line 1202  for (;;)
1202      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1203      break;      break;
1204    
1205      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1206      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1207      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1208      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1209      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1210    
1211      case OP_BRAZERO:      case OP_BRAZERO:
1212        {        {
1213        next = ecode+1;        next = ecode+1;
1214        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1215        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1216        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1217        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1085  for (;;) Line 1222  for (;;)
1222        {        {
1223        next = ecode+1;        next = ecode+1;
1224        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1225        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1226        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1227        ecode++;        ecode++;
1228        }        }
1229      break;      break;
1230    
1231        case OP_SKIPZERO:
1232          {
1233          next = ecode+1;
1234          do next += GET(next,1); while (*next == OP_ALT);
1235          ecode = next + 1 + LINK_SIZE;
1236          }
1237        break;
1238    
1239      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1240    
1241      case OP_KET:      case OP_KET:
# Line 1155  for (;;) Line 1300  for (;;)
1300          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1301          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1302          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
1303          md->start_match = rec->save_start;          mstart = rec->save_start;
1304          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1305            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1306          ecode = rec->after_call;          ecode = rec->after_call;
# Line 1184  for (;;) Line 1329  for (;;)
1329    
1330      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1331      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1332      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1333        unlimited repeat of a group that can match an empty string. */
1334    
1335      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1336    
1337      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1338        {        {
1339        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1340        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1341          if (flags != 0)    /* Could match an empty string */
1342            {
1343            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1344            RRETURN(rrc);
1345            }
1346        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1347        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1348        }        }
1349      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1350        {        {
1351        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1352        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1353        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1354        flags = match_tail_recursed;        flags = 0;
1355        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1356        }        }
1357      /* Control never gets here */      /* Control never gets here */
# Line 1234  for (;;) Line 1384  for (;;)
1384      ecode++;      ecode++;
1385      break;      break;
1386    
1387        /* Reset the start of match point */
1388    
1389        case OP_SET_SOM:
1390        mstart = eptr;
1391        ecode++;
1392        break;
1393    
1394      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1395      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1396    
# Line 1326  for (;;) Line 1483  for (;;)
1483      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1484    
1485      case OP_ANY:      case OP_ANY:
1486      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1487        {      /* Fall through */
1488        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1489        }      case OP_ALLANY:
1490      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1491      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1492      ecode++;      ecode++;
1493      break;      break;
1494    
# Line 1431  for (;;) Line 1587  for (;;)
1587        case 0x000d:        case 0x000d:
1588        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1589        break;        break;
1590    
1591        case 0x000a:        case 0x000a:
1592          break;
1593    
1594        case 0x000b:        case 0x000b:
1595        case 0x000c:        case 0x000c:
1596        case 0x0085:        case 0x0085:
1597        case 0x2028:        case 0x2028:
1598        case 0x2029:        case 0x2029:
1599          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1600          break;
1601          }
1602        ecode++;
1603        break;
1604    
1605        case OP_NOT_HSPACE:
1606        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1607        GETCHARINCTEST(c, eptr);
1608        switch(c)
1609          {
1610          default: break;
1611          case 0x09:      /* HT */
1612          case 0x20:      /* SPACE */
1613          case 0xa0:      /* NBSP */
1614          case 0x1680:    /* OGHAM SPACE MARK */
1615          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1616          case 0x2000:    /* EN QUAD */
1617          case 0x2001:    /* EM QUAD */
1618          case 0x2002:    /* EN SPACE */
1619          case 0x2003:    /* EM SPACE */
1620          case 0x2004:    /* THREE-PER-EM SPACE */
1621          case 0x2005:    /* FOUR-PER-EM SPACE */
1622          case 0x2006:    /* SIX-PER-EM SPACE */
1623          case 0x2007:    /* FIGURE SPACE */
1624          case 0x2008:    /* PUNCTUATION SPACE */
1625          case 0x2009:    /* THIN SPACE */
1626          case 0x200A:    /* HAIR SPACE */
1627          case 0x202f:    /* NARROW NO-BREAK SPACE */
1628          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1629          case 0x3000:    /* IDEOGRAPHIC SPACE */
1630          RRETURN(MATCH_NOMATCH);
1631          }
1632        ecode++;
1633        break;
1634    
1635        case OP_HSPACE:
1636        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1637        GETCHARINCTEST(c, eptr);
1638        switch(c)
1639          {
1640          default: RRETURN(MATCH_NOMATCH);
1641          case 0x09:      /* HT */
1642          case 0x20:      /* SPACE */
1643          case 0xa0:      /* NBSP */
1644          case 0x1680:    /* OGHAM SPACE MARK */
1645          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1646          case 0x2000:    /* EN QUAD */
1647          case 0x2001:    /* EM QUAD */
1648          case 0x2002:    /* EN SPACE */
1649          case 0x2003:    /* EM SPACE */
1650          case 0x2004:    /* THREE-PER-EM SPACE */
1651          case 0x2005:    /* FOUR-PER-EM SPACE */
1652          case 0x2006:    /* SIX-PER-EM SPACE */
1653          case 0x2007:    /* FIGURE SPACE */
1654          case 0x2008:    /* PUNCTUATION SPACE */
1655          case 0x2009:    /* THIN SPACE */
1656          case 0x200A:    /* HAIR SPACE */
1657          case 0x202f:    /* NARROW NO-BREAK SPACE */
1658          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1659          case 0x3000:    /* IDEOGRAPHIC SPACE */
1660          break;
1661          }
1662        ecode++;
1663        break;
1664    
1665        case OP_NOT_VSPACE:
1666        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1667        GETCHARINCTEST(c, eptr);
1668        switch(c)
1669          {
1670          default: break;
1671          case 0x0a:      /* LF */
1672          case 0x0b:      /* VT */
1673          case 0x0c:      /* FF */
1674          case 0x0d:      /* CR */
1675          case 0x85:      /* NEL */
1676          case 0x2028:    /* LINE SEPARATOR */
1677          case 0x2029:    /* PARAGRAPH SEPARATOR */
1678          RRETURN(MATCH_NOMATCH);
1679          }
1680        ecode++;
1681        break;
1682    
1683        case OP_VSPACE:
1684        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1685        GETCHARINCTEST(c, eptr);
1686        switch(c)
1687          {
1688          default: RRETURN(MATCH_NOMATCH);
1689          case 0x0a:      /* LF */
1690          case 0x0b:      /* VT */
1691          case 0x0c:      /* FF */
1692          case 0x0d:      /* CR */
1693          case 0x85:      /* NEL */
1694          case 0x2028:    /* LINE SEPARATOR */
1695          case 0x2029:    /* PARAGRAPH SEPARATOR */
1696        break;        break;
1697        }        }
1698      ecode++;      ecode++;
# Line 1451  for (;;) Line 1707  for (;;)
1707      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1708      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1709        {        {
1710        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1711    
1712        switch(ecode[1])        switch(ecode[1])
1713          {          {
# Line 1461  for (;;) Line 1716  for (;;)
1716          break;          break;
1717    
1718          case PT_LAMP:          case PT_LAMP:
1719          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1720               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1721               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1722            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1723           break;           break;
1724    
1725          case PT_GC:          case PT_GC:
1726          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1727            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1728          break;          break;
1729    
1730          case PT_PC:          case PT_PC:
1731          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1732            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1733          break;          break;
1734    
1735          case PT_SC:          case PT_SC:
1736          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1737            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1738          break;          break;
1739    
# Line 1497  for (;;) Line 1752  for (;;)
1752      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1753      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1754        {        {
1755        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1756        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1757        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1758          {          {
# Line 1507  for (;;) Line 1761  for (;;)
1761            {            {
1762            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1763            }            }
1764          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1765          if (category != ucp_M) break;          if (category != ucp_M) break;
1766          eptr += len;          eptr += len;
1767          }          }
# Line 1528  for (;;) Line 1782  for (;;)
1782      case OP_REF:      case OP_REF:
1783        {        {
1784        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1785        ecode += 3;                                 /* Advance past item */        ecode += 3;
1786    
1787        /* If the reference is unset, set the length to be longer than the amount        /* If the reference is unset, there are two possibilities:
1788        of subject left; this ensures that every attempt at a match fails. We  
1789        can't just fail here, because of the possibility of quantifiers with zero        (a) In the default, Perl-compatible state, set the length to be longer
1790        minima. */        than the amount of subject left; this ensures that every attempt at a
1791          match fails. We can't just fail here, because of the possibility of
1792        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        quantifiers with zero minima.
1793          md->end_subject - eptr + 1 :  
1794          md->offset_vector[offset+1] - md->offset_vector[offset];        (b) If the JavaScript compatibility flag is set, set the length to zero
1795          so that the back reference matches an empty string.
1796    
1797          Otherwise, set the length to the length of what was matched by the
1798          referenced subpattern. */
1799    
1800          if (offset >= offset_top || md->offset_vector[offset] < 0)
1801            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1802          else
1803            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1804    
1805        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1806    
# Line 1597  for (;;) Line 1860  for (;;)
1860          {          {
1861          for (fi = min;; fi++)          for (fi = min;; fi++)
1862            {            {
1863            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1864            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1865            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1866              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1618  for (;;) Line 1881  for (;;)
1881            }            }
1882          while (eptr >= pp)          while (eptr >= pp)
1883            {            {
1884            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1885            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1886            eptr -= length;            eptr -= length;
1887            }            }
# Line 1723  for (;;) Line 1986  for (;;)
1986            {            {
1987            for (fi = min;; fi++)            for (fi = min;; fi++)
1988              {              {
1989              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1990              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1991              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1992              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1743  for (;;) Line 2006  for (;;)
2006            {            {
2007            for (fi = min;; fi++)            for (fi = min;; fi++)
2008              {              {
2009              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2010              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2011              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2012              c = *eptr++;              c = *eptr++;
# Line 1780  for (;;) Line 2043  for (;;)
2043              }              }
2044            for (;;)            for (;;)
2045              {              {
2046              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2047              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2049              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1799  for (;;) Line 2062  for (;;)
2062              }              }
2063            while (eptr >= pp)            while (eptr >= pp)
2064              {              {
2065              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2066              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2067              eptr--;              eptr--;
2068              }              }
# Line 1812  for (;;) Line 2075  for (;;)
2075    
2076    
2077      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2078      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2079        mode, because Unicode properties are supported in non-UTF-8 mode. */
2080    
2081  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2082      case OP_XCLASS:      case OP_XCLASS:
# Line 1854  for (;;) Line 2118  for (;;)
2118        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2119          {          {
2120          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2121          GETCHARINC(c, eptr);          GETCHARINCTEST(c, eptr);
2122          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2123          }          }
2124    
# Line 1870  for (;;) Line 2134  for (;;)
2134          {          {
2135          for (fi = min;; fi++)          for (fi = min;; fi++)
2136            {            {
2137            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2138            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2139            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2140            GETCHARINC(c, eptr);            GETCHARINCTEST(c, eptr);
2141            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2142            }            }
2143          /* Control never gets here */          /* Control never gets here */
# Line 1888  for (;;) Line 2152  for (;;)
2152            {            {
2153            int len = 1;            int len = 1;
2154            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2155            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2156            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2157            eptr += len;            eptr += len;
2158            }            }
2159          for(;;)          for(;;)
2160            {            {
2161            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2162            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2163            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2164            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2165            }            }
2166          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2167          }          }
# Line 1963  for (;;) Line 2227  for (;;)
2227          if (fc != dc)          if (fc != dc)
2228            {            {
2229  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2230            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2231  #endif  #endif
2232              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2233            }            }
# Line 2054  for (;;) Line 2318  for (;;)
2318  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2319          unsigned int othercase;          unsigned int othercase;
2320          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2321              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2322            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2323          else oclength = 0;          else oclength = 0;
2324  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2081  for (;;) Line 2345  for (;;)
2345            {            {
2346            for (fi = min;; fi++)            for (fi = min;; fi++)
2347              {              {
2348              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2349              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2350              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2351              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
# Line 2122  for (;;) Line 2386  for (;;)
2386            if (possessive) continue;            if (possessive) continue;
2387            for(;;)            for(;;)
2388             {             {
2389             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2390             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2391             if (eptr == pp) RRETURN(MATCH_NOMATCH);             if (eptr == pp) RRETURN(MATCH_NOMATCH);
2392  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2171  for (;;) Line 2435  for (;;)
2435          {          {
2436          for (fi = min;; fi++)          for (fi = min;; fi++)
2437            {            {
2438            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2439            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2440            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2441                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2190  for (;;) Line 2454  for (;;)
2454          if (possessive) continue;          if (possessive) continue;
2455          while (eptr >= pp)          while (eptr >= pp)
2456            {            {
2457            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2458            eptr--;            eptr--;
2459            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2460            }            }
# Line 2209  for (;;) Line 2473  for (;;)
2473          {          {
2474          for (fi = min;; fi++)          for (fi = min;; fi++)
2475            {            {
2476            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2477            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2478            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2479              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2227  for (;;) Line 2491  for (;;)
2491          if (possessive) continue;          if (possessive) continue;
2492          while (eptr >= pp)          while (eptr >= pp)
2493            {            {
2494            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2495            eptr--;            eptr--;
2496            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2497            }            }
# Line 2372  for (;;) Line 2636  for (;;)
2636            register unsigned int d;            register unsigned int d;
2637            for (fi = min;; fi++)            for (fi = min;; fi++)
2638              {              {
2639              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2640              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2641                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2642              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2643              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2644              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2645                RRETURN(MATCH_NOMATCH);  
2646              }              }
2647            }            }
2648          else          else
# Line 2386  for (;;) Line 2651  for (;;)
2651            {            {
2652            for (fi = min;; fi++)            for (fi = min;; fi++)
2653              {              {
2654              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2655              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2656              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2657                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2418  for (;;) Line 2683  for (;;)
2683          if (possessive) continue;          if (possessive) continue;
2684          for(;;)          for(;;)
2685              {              {
2686              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2687              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2688              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2689              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2436  for (;;) Line 2701  for (;;)
2701            if (possessive) continue;            if (possessive) continue;
2702            while (eptr >= pp)            while (eptr >= pp)
2703              {              {
2704              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2705              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2706              eptr--;              eptr--;
2707              }              }
# Line 2481  for (;;) Line 2746  for (;;)
2746            register unsigned int d;            register unsigned int d;
2747            for (fi = min;; fi++)            for (fi = min;; fi++)
2748              {              {
2749              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2750              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2751                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2752              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2753              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2754              }              }
2755            }            }
2756          else          else
# Line 2494  for (;;) Line 2759  for (;;)
2759            {            {
2760            for (fi = min;; fi++)            for (fi = min;; fi++)
2761              {              {
2762              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2763              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2764              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2765                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2525  for (;;) Line 2790  for (;;)
2790            if (possessive) continue;            if (possessive) continue;
2791            for(;;)            for(;;)
2792              {              {
2793              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2794              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2795              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2796              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2543  for (;;) Line 2808  for (;;)
2808            if (possessive) continue;            if (possessive) continue;
2809            while (eptr >= pp)            while (eptr >= pp)
2810              {              {
2811              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2812              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2813              eptr--;              eptr--;
2814              }              }
# Line 2650  for (;;) Line 2915  for (;;)
2915            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2916              {              {
2917              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2918              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2919              }              }
2920            break;            break;
2921    
# Line 2658  for (;;) Line 2923  for (;;)
2923            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2924              {              {
2925              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2926              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2927              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2928              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2929                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2930                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2671  for (;;) Line 2936  for (;;)
2936            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2937              {              {
2938              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2939              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2940              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2941              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2942                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2943              }              }
# Line 2682  for (;;) Line 2947  for (;;)
2947            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2948              {              {
2949              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2950              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2951              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2952              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2953                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2954              }              }
# Line 2693  for (;;) Line 2958  for (;;)
2958            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2959              {              {
2960              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2961              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2962              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2963              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2964                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2965              }              }
# Line 2713  for (;;) Line 2978  for (;;)
2978          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2979            {            {
2980            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2981            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2982            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2983            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2984              {              {
# Line 2722  for (;;) Line 2987  for (;;)
2987                {                {
2988                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2989                }                }
2990              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2991              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2992              eptr += len;              eptr += len;
2993              }              }
# Line 2740  for (;;) Line 3005  for (;;)
3005          case OP_ANY:          case OP_ANY:
3006          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3007            {            {
3008            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
3009              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3010            eptr++;            eptr++;
3011            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3012            }            }
3013          break;          break;
3014    
3015            case OP_ALLANY:
3016            for (i = 1; i <= min; i++)
3017              {
3018              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3019              eptr++;
3020              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3021              }
3022            break;
3023    
3024          case OP_ANYBYTE:          case OP_ANYBYTE:
3025          eptr += min;          eptr += min;
3026          break;          break;
# Line 2763  for (;;) Line 3036  for (;;)
3036              case 0x000d:              case 0x000d:
3037              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3038              break;              break;
3039    
3040              case 0x000a:              case 0x000a:
3041                break;
3042    
3043              case 0x000b:              case 0x000b:
3044              case 0x000c:              case 0x000c:
3045              case 0x0085:              case 0x0085:
3046              case 0x2028:              case 0x2028:
3047              case 0x2029:              case 0x2029:
3048                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3049                break;
3050                }
3051              }
3052            break;
3053    
3054            case OP_NOT_HSPACE:
3055            for (i = 1; i <= min; i++)
3056              {
3057              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3058              GETCHARINC(c, eptr);
3059              switch(c)
3060                {
3061                default: break;
3062                case 0x09:      /* HT */
3063                case 0x20:      /* SPACE */
3064                case 0xa0:      /* NBSP */
3065                case 0x1680:    /* OGHAM SPACE MARK */
3066                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3067                case 0x2000:    /* EN QUAD */
3068                case 0x2001:    /* EM QUAD */
3069                case 0x2002:    /* EN SPACE */
3070                case 0x2003:    /* EM SPACE */
3071                case 0x2004:    /* THREE-PER-EM SPACE */
3072                case 0x2005:    /* FOUR-PER-EM SPACE */
3073                case 0x2006:    /* SIX-PER-EM SPACE */
3074                case 0x2007:    /* FIGURE SPACE */
3075                case 0x2008:    /* PUNCTUATION SPACE */
3076                case 0x2009:    /* THIN SPACE */
3077                case 0x200A:    /* HAIR SPACE */
3078                case 0x202f:    /* NARROW NO-BREAK SPACE */
3079                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3080                case 0x3000:    /* IDEOGRAPHIC SPACE */
3081                RRETURN(MATCH_NOMATCH);
3082                }
3083              }
3084            break;
3085    
3086            case OP_HSPACE:
3087            for (i = 1; i <= min; i++)
3088              {
3089              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3090              GETCHARINC(c, eptr);
3091              switch(c)
3092                {
3093                default: RRETURN(MATCH_NOMATCH);
3094                case 0x09:      /* HT */
3095                case 0x20:      /* SPACE */
3096                case 0xa0:      /* NBSP */
3097                case 0x1680:    /* OGHAM SPACE MARK */
3098                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3099                case 0x2000:    /* EN QUAD */
3100                case 0x2001:    /* EM QUAD */
3101                case 0x2002:    /* EN SPACE */
3102                case 0x2003:    /* EM SPACE */
3103                case 0x2004:    /* THREE-PER-EM SPACE */
3104                case 0x2005:    /* FOUR-PER-EM SPACE */
3105                case 0x2006:    /* SIX-PER-EM SPACE */
3106                case 0x2007:    /* FIGURE SPACE */
3107                case 0x2008:    /* PUNCTUATION SPACE */
3108                case 0x2009:    /* THIN SPACE */
3109                case 0x200A:    /* HAIR SPACE */
3110                case 0x202f:    /* NARROW NO-BREAK SPACE */
3111                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3112                case 0x3000:    /* IDEOGRAPHIC SPACE */
3113                break;
3114                }
3115              }
3116            break;
3117    
3118            case OP_NOT_VSPACE:
3119            for (i = 1; i <= min; i++)
3120              {
3121              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3122              GETCHARINC(c, eptr);
3123              switch(c)
3124                {
3125                default: break;
3126                case 0x0a:      /* LF */
3127                case 0x0b:      /* VT */
3128                case 0x0c:      /* FF */
3129                case 0x0d:      /* CR */
3130                case 0x85:      /* NEL */
3131                case 0x2028:    /* LINE SEPARATOR */
3132                case 0x2029:    /* PARAGRAPH SEPARATOR */
3133                RRETURN(MATCH_NOMATCH);
3134                }
3135              }
3136            break;
3137    
3138            case OP_VSPACE:
3139            for (i = 1; i <= min; i++)
3140              {
3141              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3142              GETCHARINC(c, eptr);
3143              switch(c)
3144                {
3145                default: RRETURN(MATCH_NOMATCH);
3146                case 0x0a:      /* LF */
3147                case 0x0b:      /* VT */
3148                case 0x0c:      /* FF */
3149                case 0x0d:      /* CR */
3150                case 0x85:      /* NEL */
3151                case 0x2028:    /* LINE SEPARATOR */
3152                case 0x2029:    /* PARAGRAPH SEPARATOR */
3153              break;              break;
3154              }              }
3155            }            }
# Line 2798  for (;;) Line 3179  for (;;)
3179          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3180            {            {
3181            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3182               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3183              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3184            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3185            }            }
3186          break;          break;
3187    
# Line 2818  for (;;) Line 3199  for (;;)
3199          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3200            {            {
3201            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3202               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3203              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3204            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3205            }            }
3206          break;          break;
3207    
# Line 2848  for (;;) Line 3229  for (;;)
3229        switch(ctype)        switch(ctype)
3230          {          {
3231          case OP_ANY:          case OP_ANY:
3232          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3233            {            {
3234            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3235              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3236            }            }
3237          else eptr += min;          break;
3238    
3239            case OP_ALLANY:
3240            eptr += min;
3241          break;          break;
3242    
3243          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 2877  for (;;) Line 3258  for (;;)
3258              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3259              break;              break;
3260              case 0x000a:              case 0x000a:
3261                break;
3262    
3263              case 0x000b:              case 0x000b:
3264              case 0x000c:              case 0x000c:
3265              case 0x0085:              case 0x0085:
3266                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3267                break;
3268                }
3269              }
3270            break;
3271    
3272            case OP_NOT_HSPACE:
3273            for (i = 1; i <= min; i++)
3274              {
3275              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3276              switch(*eptr++)
3277                {
3278                default: break;
3279                case 0x09:      /* HT */
3280                case 0x20:      /* SPACE */
3281                case 0xa0:      /* NBSP */
3282                RRETURN(MATCH_NOMATCH);
3283                }
3284              }
3285            break;
3286    
3287            case OP_HSPACE:
3288            for (i = 1; i <= min; i++)
3289              {
3290              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3291              switch(*eptr++)
3292                {
3293                default: RRETURN(MATCH_NOMATCH);
3294                case 0x09:      /* HT */
3295                case 0x20:      /* SPACE */
3296                case 0xa0:      /* NBSP */
3297                break;
3298                }
3299              }
3300            break;
3301    
3302            case OP_NOT_VSPACE:
3303            for (i = 1; i <= min; i++)
3304              {
3305              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3306              switch(*eptr++)
3307                {
3308                default: break;
3309                case 0x0a:      /* LF */
3310                case 0x0b:      /* VT */
3311                case 0x0c:      /* FF */
3312                case 0x0d:      /* CR */
3313                case 0x85:      /* NEL */
3314                RRETURN(MATCH_NOMATCH);
3315                }
3316              }
3317            break;
3318    
3319            case OP_VSPACE:
3320            for (i = 1; i <= min; i++)
3321              {
3322              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3323              switch(*eptr++)
3324                {
3325                default: RRETURN(MATCH_NOMATCH);
3326                case 0x0a:      /* LF */
3327                case 0x0b:      /* VT */
3328                case 0x0c:      /* FF */
3329                case 0x0d:      /* CR */
3330                case 0x85:      /* NEL */
3331              break;              break;
3332              }              }
3333            }            }
# Line 2940  for (;;) Line 3388  for (;;)
3388            case PT_ANY:            case PT_ANY:
3389            for (fi = min;; fi++)            for (fi = min;; fi++)
3390              {              {
3391              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3392              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3393              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3394              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2951  for (;;) Line 3399  for (;;)
3399            case PT_LAMP:            case PT_LAMP:
3400            for (fi = min;; fi++)            for (fi = min;; fi++)
3401              {              {
3402              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3403              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3404              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3405              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3406              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3407              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3408                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3409                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2966  for (;;) Line 3414  for (;;)
3414            case PT_GC:            case PT_GC:
3415            for (fi = min;; fi++)            for (fi = min;; fi++)
3416              {              {
3417              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3418              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3419              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3420              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3421              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3422              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3423                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3424              }              }
# Line 2979  for (;;) Line 3427  for (;;)
3427            case PT_PC:            case PT_PC:
3428            for (fi = min;; fi++)            for (fi = min;; fi++)
3429              {              {
3430              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3431              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3432              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3433              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3434              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3435              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3436                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3437              }              }
# Line 2992  for (;;) Line 3440  for (;;)
3440            case PT_SC:            case PT_SC:
3441            for (fi = min;; fi++)            for (fi = min;; fi++)
3442              {              {
3443              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3444              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3445              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3446              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3447              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3448              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3449                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3450              }              }
# Line 3014  for (;;) Line 3462  for (;;)
3462          {          {
3463          for (fi = min;; fi++)          for (fi = min;; fi++)
3464            {            {
3465            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3466            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3467            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3468            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3469            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3470            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3471            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3472              {              {
# Line 3027  for (;;) Line 3475  for (;;)
3475                {                {
3476                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3477                }                }
3478              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3479              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3480              eptr += len;              eptr += len;
3481              }              }
# Line 3043  for (;;) Line 3491  for (;;)
3491          {          {
3492          for (fi = min;; fi++)          for (fi = min;; fi++)
3493            {            {
3494            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3495            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3496            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3497                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3498              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3499    
3500            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3501            switch(ctype)            switch(ctype)
3502              {              {
3503              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3504              break;              case OP_ALLANY:
   
3505              case OP_ANYBYTE:              case OP_ANYBYTE:
3506              break;              break;
3507    
# Line 3067  for (;;) Line 3513  for (;;)
3513                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3514                break;                break;
3515                case 0x000a:                case 0x000a:
3516                  break;
3517    
3518                case 0x000b:                case 0x000b:
3519                case 0x000c:                case 0x000c:
3520                case 0x0085:                case 0x0085:
3521                case 0x2028:                case 0x2028:
3522                case 0x2029:                case 0x2029:
3523                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3524                  break;
3525                  }
3526                break;
3527    
3528                case OP_NOT_HSPACE:
3529                switch(c)
3530                  {
3531                  default: break;
3532                  case 0x09:      /* HT */
3533                  case 0x20:      /* SPACE */
3534                  case 0xa0:      /* NBSP */
3535                  case 0x1680:    /* OGHAM SPACE MARK */
3536                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3537                  case 0x2000:    /* EN QUAD */
3538                  case 0x2001:    /* EM QUAD */
3539                  case 0x2002:    /* EN SPACE */
3540                  case 0x2003:    /* EM SPACE */
3541                  case 0x2004:    /* THREE-PER-EM SPACE */
3542                  case 0x2005:    /* FOUR-PER-EM SPACE */
3543                  case 0x2006:    /* SIX-PER-EM SPACE */
3544                  case 0x2007:    /* FIGURE SPACE */
3545                  case 0x2008:    /* PUNCTUATION SPACE */
3546                  case 0x2009:    /* THIN SPACE */
3547                  case 0x200A:    /* HAIR SPACE */
3548                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3549                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3550                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3551                  RRETURN(MATCH_NOMATCH);
3552                  }
3553                break;
3554    
3555                case OP_HSPACE:
3556                switch(c)
3557                  {
3558                  default: RRETURN(MATCH_NOMATCH);
3559                  case 0x09:      /* HT */
3560                  case 0x20:      /* SPACE */
3561                  case 0xa0:      /* NBSP */
3562                  case 0x1680:    /* OGHAM SPACE MARK */
3563                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3564                  case 0x2000:    /* EN QUAD */
3565                  case 0x2001:    /* EM QUAD */
3566                  case 0x2002:    /* EN SPACE */
3567                  case 0x2003:    /* EM SPACE */
3568                  case 0x2004:    /* THREE-PER-EM SPACE */
3569                  case 0x2005:    /* FOUR-PER-EM SPACE */
3570                  case 0x2006:    /* SIX-PER-EM SPACE */
3571                  case 0x2007:    /* FIGURE SPACE */
3572                  case 0x2008:    /* PUNCTUATION SPACE */
3573                  case 0x2009:    /* THIN SPACE */
3574                  case 0x200A:    /* HAIR SPACE */
3575                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3576                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3577                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3578                  break;
3579                  }
3580                break;
3581    
3582                case OP_NOT_VSPACE:
3583                switch(c)
3584                  {
3585                  default: break;
3586                  case 0x0a:      /* LF */
3587                  case 0x0b:      /* VT */
3588                  case 0x0c:      /* FF */
3589                  case 0x0d:      /* CR */
3590                  case 0x85:      /* NEL */
3591                  case 0x2028:    /* LINE SEPARATOR */
3592                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3593                  RRETURN(MATCH_NOMATCH);
3594                  }
3595                break;
3596    
3597                case OP_VSPACE:
3598                switch(c)
3599                  {
3600                  default: RRETURN(MATCH_NOMATCH);
3601                  case 0x0a:      /* LF */
3602                  case 0x0b:      /* VT */
3603                  case 0x0c:      /* FF */
3604                  case 0x0d:      /* CR */
3605                  case 0x85:      /* NEL */
3606                  case 0x2028:    /* LINE SEPARATOR */
3607                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3608                break;                break;
3609                }                }
3610              break;              break;
# Line 3117  for (;;) Line 3650  for (;;)
3650          {          {
3651          for (fi = min;; fi++)          for (fi = min;; fi++)
3652            {            {
3653            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3654            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3655            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3656                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3657              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3658    
3659            c = *eptr++;            c = *eptr++;
3660            switch(ctype)            switch(ctype)
3661              {              {
3662              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3663              break;              case OP_ALLANY:
   
3664              case OP_ANYBYTE:              case OP_ANYBYTE:
3665              break;              break;
3666    
# Line 3139  for (;;) Line 3671  for (;;)
3671                case 0x000d:                case 0x000d:
3672                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3673                break;                break;
3674    
3675                case 0x000a:                case 0x000a:
3676                  break;
3677    
3678                case 0x000b:                case 0x000b:
3679                case 0x000c:                case 0x000c:
3680                case 0x0085:                case 0x0085:
3681                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3682                  break;
3683                  }
3684                break;
3685    
3686                case OP_NOT_HSPACE:
3687                switch(c)
3688                  {
3689                  default: break;
3690                  case 0x09:      /* HT */
3691                  case 0x20:      /* SPACE */
3692                  case 0xa0:      /* NBSP */
3693                  RRETURN(MATCH_NOMATCH);
3694                  }
3695                break;
3696    
3697                case OP_HSPACE:
3698                switch(c)
3699                  {
3700                  default: RRETURN(MATCH_NOMATCH);
3701                  case 0x09:      /* HT */
3702                  case 0x20:      /* SPACE */
3703                  case 0xa0:      /* NBSP */
3704                  break;
3705                  }
3706                break;
3707    
3708                case OP_NOT_VSPACE:
3709                switch(c)
3710                  {
3711                  default: break;
3712                  case 0x0a:      /* LF */
3713                  case 0x0b:      /* VT */
3714                  case 0x0c:      /* FF */
3715                  case 0x0d:      /* CR */
3716                  case 0x85:      /* NEL */
3717                  RRETURN(MATCH_NOMATCH);
3718                  }
3719                break;
3720    
3721                case OP_VSPACE:
3722                switch(c)
3723                  {
3724                  default: RRETURN(MATCH_NOMATCH);
3725                  case 0x0a:      /* LF */
3726                  case 0x0b:      /* VT */
3727                  case 0x0c:      /* FF */
3728                  case 0x0d:      /* CR */
3729                  case 0x85:      /* NEL */
3730                break;                break;
3731                }                }
3732              break;              break;
# Line 3209  for (;;) Line 3793  for (;;)
3793              int len = 1;              int len = 1;
3794              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3795              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3796              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3797              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3798                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3799                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3224  for (;;) Line 3808  for (;;)
3808              int len = 1;              int len = 1;
3809              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3810              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3811              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3812              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3813                break;                break;
3814              eptr+= len;              eptr+= len;
# Line 3237  for (;;) Line 3821  for (;;)
3821              int len = 1;              int len = 1;
3822              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3823              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3824              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3825              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3826                break;                break;
3827              eptr+= len;              eptr+= len;
# Line 3250  for (;;) Line 3834  for (;;)
3834              int len = 1;              int len = 1;
3835              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3836              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3837              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3838              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3839                break;                break;
3840              eptr+= len;              eptr+= len;
# Line 3263  for (;;) Line 3847  for (;;)
3847          if (possessive) continue;          if (possessive) continue;
3848          for(;;)          for(;;)
3849            {            {
3850            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3851            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3852            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3853            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
3854            }            }
3855          }          }
3856    
# Line 3279  for (;;) Line 3863  for (;;)
3863            {            {
3864            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3865            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3866            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3867            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3868            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3869              {              {
# Line 3288  for (;;) Line 3872  for (;;)
3872                {                {
3873                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3874                }                }
3875              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3876              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3877              eptr += len;              eptr += len;
3878              }              }
# Line 3299  for (;;) Line 3883  for (;;)
3883          if (possessive) continue;          if (possessive) continue;
3884          for(;;)          for(;;)
3885            {            {
3886            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3887            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3888            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3889            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
3890              {              {
3891              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
3892              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
3893                {                {
3894                  BACKCHAR(eptr);
3895                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3896                }                }
3897              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3898              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3899              eptr--;              eptr--;
3900              }              }
# Line 3328  for (;;) Line 3912  for (;;)
3912          switch(ctype)          switch(ctype)
3913            {            {
3914            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
3915            if (max < INT_MAX)            if (max < INT_MAX)
3916              {              {
3917              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3918                {                {
3919                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3920                  {                eptr++;
3921                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3922                }                }
3923              }              }
3924    
# Line 3359  for (;;) Line 3926  for (;;)
3926    
3927            else            else
3928              {              {
3929              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3930                {                {
3931                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3932                  {                eptr++;
3933                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 }  
               break;  
3934                }                }
3935              else              }
3936              break;
3937    
3938              case OP_ALLANY:
3939              if (max < INT_MAX)
3940                {
3941                for (i = min; i < max; i++)
3942                {                {
3943                c = max - min;                if (eptr >= md->end_subject) break;
3944                if (c > (unsigned int)(md->end_subject - eptr))                eptr++;
3945                  c = md->end_subject - eptr;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               eptr += c;  
3946                }                }
3947              }              }
3948              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3949            break;            break;
3950    
3951            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 3400  for (;;) Line 3970  for (;;)
3970                }                }
3971              else              else
3972                {                {
3973                if (c != 0x000a && c != 0x000b && c != 0x000c &&                if (c != 0x000a &&
3974                    c != 0x0085 && c != 0x2028 && c != 0x2029)                    (md->bsr_anycrlf ||
3975                       (c != 0x000b && c != 0x000c &&
3976                        c != 0x0085 && c != 0x2028 && c != 0x2029)))
3977                  break;                  break;
3978                eptr += len;                eptr += len;
3979                }                }
3980              }              }
3981            break;            break;
3982    
3983              case OP_NOT_HSPACE:
3984              case OP_HSPACE:
3985              for (i = min; i < max; i++)
3986                {
3987                BOOL gotspace;
3988                int len = 1;
3989                if (eptr >= md->end_subject) break;
3990                GETCHARLEN(c, eptr, len);
3991                switch(c)
3992                  {
3993                  default: gotspace = FALSE; break;
3994                  case 0x09:      /* HT */
3995                  case 0x20:      /* SPACE */
3996                  case 0xa0:      /* NBSP */
3997                  case 0x1680:    /* OGHAM SPACE MARK */
3998                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3999                  case 0x2000:    /* EN QUAD */
4000                  case 0x2001:    /* EM QUAD */
4001                  case 0x2002:    /* EN SPACE */
4002                  case 0x2003:    /* EM SPACE */
4003                  case 0x2004:    /* THREE-PER-EM SPACE */
4004                  case 0x2005:    /* FOUR-PER-EM SPACE */
4005                  case 0x2006:    /* SIX-PER-EM SPACE */
4006                  case 0x2007:    /* FIGURE SPACE */
4007                  case 0x2008:    /* PUNCTUATION SPACE */
4008                  case 0x2009:    /* THIN SPACE */
4009                  case 0x200A:    /* HAIR SPACE */
4010                  case 0x202f:    /* NARROW NO-BREAK SPACE */
4011                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4012                  case 0x3000:    /* IDEOGRAPHIC SPACE */
4013                  gotspace = TRUE;
4014                  break;
4015                  }
4016                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
4017                eptr += len;
4018                }
4019              break;
4020    
4021              case OP_NOT_VSPACE:
4022              case OP_VSPACE:
4023              for (i = min; i < max; i++)
4024                {
4025                BOOL gotspace;
4026                int len = 1;
4027                if (eptr >= md->end_subject) break;
4028                GETCHARLEN(c, eptr, len);
4029                switch(c)
4030                  {
4031                  default: gotspace = FALSE; break;
4032                  case 0x0a:      /* LF */
4033                  case 0x0b:      /* VT */
4034                  case 0x0c:      /* FF */
4035                  case 0x0d:      /* CR */
4036                  case 0x85:      /* NEL */
4037                  case 0x2028:    /* LINE SEPARATOR */
4038                  case 0x2029:    /* PARAGRAPH SEPARATOR */
4039                  gotspace = TRUE;
4040                  break;
4041                  }
4042                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
4043                eptr += len;
4044                }
4045              break;
4046    
4047            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4048            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4049              {              {
# Line 3483  for (;;) Line 4119  for (;;)
4119          if (possessive) continue;          if (possessive) continue;
4120          for(;;)          for(;;)
4121            {            {
4122            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4123            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4124            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4125            BACKCHAR(eptr);            BACKCHAR(eptr);
4126            }            }
4127          }          }
4128        else        else
4129  #endif  #endif  /* SUPPORT_UTF8 */
4130    
4131        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4132          {          {
4133          switch(ctype)          switch(ctype)
4134            {            {
4135            case OP_ANY:            case OP_ANY:
4136            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4137              {              {
4138              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4139                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4140              }              }
4141            /* For DOTALL case, fall through and treat as \C */            break;
4142    
4143              case OP_ALLANY:
4144            case OP_ANYBYTE:            case OP_ANYBYTE:
4145            c = max - min;            c = max - min;
4146            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 3527  for (;;) Line 4160  for (;;)
4160                }                }
4161              else              else
4162                {                {
4163                if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)                if (c != 0x000a &&
4164                      (md->bsr_anycrlf ||
4165                        (c != 0x000b && c != 0x000c && c != 0x0085)))
4166                  break;                  break;
4167                eptr++;                eptr++;
4168                }                }
4169              }              }
4170            break;            break;
4171    
4172              case OP_NOT_HSPACE:
4173              for (i = min; i < max; i++)
4174                {
4175                if (eptr >= md->end_subject) break;
4176                c = *eptr;
4177                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4178                eptr++;
4179                }
4180              break;
4181    
4182              case OP_HSPACE:
4183              for (i = min; i < max; i++)
4184                {
4185                if (eptr >= md->end_subject) break;
4186                c = *eptr;
4187                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4188                eptr++;
4189                }
4190              break;
4191    
4192              case OP_NOT_VSPACE:
4193              for (i = min; i < max; i++)
4194                {
4195                if (eptr >= md->end_subject) break;
4196                c = *eptr;
4197                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4198                  break;
4199                eptr++;
4200                }
4201              break;
4202    
4203              case OP_VSPACE:
4204              for (i = min; i < max; i++)
4205                {
4206                if (eptr >= md->end_subject) break;
4207                c = *eptr;
4208                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4209                  break;
4210                eptr++;
4211                }
4212              break;
4213    
4214            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4215            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4216              {              {
# Line 3597  for (;;) Line 4274  for (;;)
4274          if (possessive) continue;          if (possessive) continue;
4275          while (eptr >= pp)          while (eptr >= pp)
4276            {            {
4277            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4278            eptr--;            eptr--;
4279            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4280            }            }
# Line 3623  for (;;) Line 4300  for (;;)
4300    
4301    }             /* End of main loop */    }             /* End of main loop */
4302  /* Control never reaches here */  /* Control never reaches here */
4303    
4304    
4305    /* When compiling to use the heap rather than the stack for recursive calls to
4306    match(), the RRETURN() macro jumps here. The number that is saved in
4307    frame->Xwhere indicates which label we actually want to return to. */
4308    
4309    #ifdef NO_RECURSE
4310    #define LBL(val) case val: goto L_RM##val;
4311    HEAP_RETURN:
4312    switch (frame->Xwhere)
4313      {
4314      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4315      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4316      LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4317      LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4318      LBL(53) LBL(54)
4319    #ifdef SUPPORT_UTF8
4320      LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4321      LBL(32) LBL(34) LBL(42) LBL(46)
4322    #ifdef SUPPORT_UCP
4323      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4324    #endif  /* SUPPORT_UCP */
4325    #endif  /* SUPPORT_UTF8 */
4326      default:
4327      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4328      return PCRE_ERROR_INTERNAL;
4329      }
4330    #undef LBL
4331    #endif  /* NO_RECURSE */
4332  }  }
4333    
4334    
# Line 3635  Undefine all the macros that were define Line 4341  Undefine all the macros that were define
4341  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4342  #undef eptr  #undef eptr
4343  #undef ecode  #undef ecode
4344    #undef mstart
4345  #undef offset_top  #undef offset_top
4346  #undef ims  #undef ims
4347  #undef eptrb  #undef eptrb
# Line 3707  Returns:          > 0 => success; value Line 4414  Returns:          > 0 => success; value
4414                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4415  */  */
4416    
4417  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4418  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4419    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4420    int offsetcount)    int offsetcount)
# Line 3732  const uschar *start_bits = NULL; Line 4439  const uschar *start_bits = NULL;
4439  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4440  USPTR end_subject;  USPTR end_subject;
4441  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
4442    
4443  pcre_study_data internal_study;  pcre_study_data internal_study;
4444  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3795  if (re->magic_number != MAGIC_NUMBER) Line 4501  if (re->magic_number != MAGIC_NUMBER)
4501  /* Set up other data */  /* Set up other data */
4502    
4503  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4504  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
4505  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
4506    
4507  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
# Line 3810  end_subject = md->end_subject; Line 4516  end_subject = md->end_subject;
4516    
4517  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4518  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4519    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4520    
4521  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4522  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 3818  md->partial = (options & PCRE_PARTIAL) ! Line 4525  md->partial = (options & PCRE_PARTIAL) !
4525  md->hitend = FALSE;  md->hitend = FALSE;
4526    
4527  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
4528    
4529  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4530  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
4531    
4532    /* Handle different \R options. */
4533    
4534    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4535      {
4536      case 0:
4537      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4538        md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4539      else
4540    #ifdef BSR_ANYCRLF
4541      md->bsr_anycrlf = TRUE;
4542    #else
4543      md->bsr_anycrlf = FALSE;
4544    #endif
4545      break;
4546    
4547      case PCRE_BSR_ANYCRLF:
4548      md->bsr_anycrlf = TRUE;
4549      break;
4550    
4551      case PCRE_BSR_UNICODE:
4552      md->bsr_anycrlf = FALSE;
4553      break;
4554    
4555      default: return PCRE_ERROR_BADNEWLINE;
4556      }
4557    
4558  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
4559  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4560    
4561  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4562         PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4563    {    {
4564    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4565    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
# Line 3835  switch ((((options & PCRE_NEWLINE_BITS) Line 4567  switch ((((options & PCRE_NEWLINE_BITS)
4567    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4568         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4569    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4570      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4571    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
4572    }    }
4573    
4574  if (newline < 0)  if (newline == -2)
4575      {
4576      md->nltype = NLTYPE_ANYCRLF;
4577      }
4578    else if (newline < 0)
4579    {    {
4580    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
4581    }    }
# Line 3861  else Line 4598  else
4598  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4599  moment. */  moment. */
4600    
4601  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4602    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4603    
4604  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 3938  studied, there may be a bitmap of possib Line 4675  studied, there may be a bitmap of possib
4675    
4676  if (!anchored)  if (!anchored)
4677    {    {
4678    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
4679      {      {
4680      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4681      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 3953  if (!anchored) Line 4690  if (!anchored)
4690  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
4691  character" set. */  character" set. */
4692    
4693  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
4694    {    {
4695    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
4696    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 3969  the loop runs just once. */ Line 4706  the loop runs just once. */
4706  for(;;)  for(;;)
4707    {    {
4708    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4709      USPTR new_start_match;
4710    
4711    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4712    
# Line 3988  for(;;) Line 4726  for(;;)
4726    if (firstline)    if (firstline)
4727      {      {
4728      USPTR t = start_match;      USPTR t = start_match;
4729    #ifdef SUPPORT_UTF8
4730        if (utf8)
4731          {
4732          while (t < md->end_subject && !IS_NEWLINE(t))
4733            {
4734            t++;
4735            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4736            }
4737          }
4738        else
4739    #endif
4740      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4741      end_subject = t;      end_subject = t;
4742      }      }
4743    
4744    /* Now test for a unique first byte */    /* Now advance to a unique first byte if there is one. */
4745    
4746    if (first_byte >= 0)    if (first_byte >= 0)
4747      {      {
4748      if (first_byte_caseless)      if (first_byte_caseless)
4749        while (start_match < end_subject &&        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
              md->lcc[*start_match] != first_byte)  
4750          start_match++;          start_match++;
4751      else      else
4752        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4753          start_match++;          start_match++;
4754      }      }
4755    
4756    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match */
4757    
4758    else if (startline)    else if (startline)
4759      {      {
4760      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4761        {        {
4762        while (start_match <= end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4763          if (utf8)
4764            {
4765            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4766              {
4767              start_match++;
4768              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4769                start_match++;
4770              }
4771            }
4772          else
4773    #endif
4774          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4775            start_match++;
4776    
4777          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4778          and we are now at a LF, advance the match position by one more character.
4779          */
4780    
4781          if (start_match[-1] == '\r' &&
4782               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4783               start_match < end_subject &&
4784               *start_match == '\n')
4785          start_match++;          start_match++;
4786        }        }
4787      }      }
4788    
4789    /* Or to a non-unique first char after study */    /* Or to a non-unique first byte after study */
4790    
4791    else if (start_bits != NULL)    else if (start_bits != NULL)
4792      {      {
4793      while (start_match < end_subject)      while (start_match < end_subject)
4794        {        {
4795        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4796        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4797            else break;
4798        }        }
4799      }      }
4800    
# Line 4099  for(;;) Line 4870  for(;;)
4870    
4871    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4872    
4873    md->start_match = start_match;    md->start_match_ptr = start_match;
4874    md->match_call_count = 0;    md->match_call_count = 0;
4875    md->eptrn = 0;                          /* Next free eptrchain slot */    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
   rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);  
4876    
4877    /* Any return other than MATCH_NOMATCH breaks the loop. */    switch(rc)
4878        {
4879        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4880        exactly like PRUNE. */
4881    
4882    if (rc != MATCH_NOMATCH) break;      case MATCH_NOMATCH:
4883        case MATCH_PRUNE:
4884        case MATCH_THEN:
4885        new_start_match = start_match + 1;
4886    #ifdef SUPPORT_UTF8
4887        if (utf8)
4888          while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4889            new_start_match++;
4890    #endif
4891        break;
4892    
4893        /* SKIP passes back the next starting point explicitly. */
4894    
4895        case MATCH_SKIP:
4896        new_start_match = md->start_match_ptr;
4897        break;
4898    
4899        /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4900    
4901        case MATCH_COMMIT:
4902        rc = MATCH_NOMATCH;
4903        goto ENDLOOP;
4904    
4905        /* Any other return is some kind of error. */
4906    
4907        default:
4908        goto ENDLOOP;
4909        }
4910    
4911      /* Control reaches here for the various types of "no match at this point"
4912      result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4913    
4914      rc = MATCH_NOMATCH;
4915    
4916    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4917    newline in the subject (though it may continue over the newline). Therefore,    newline in the subject (though it may continue over the newline). Therefore,
# Line 4114  for(;;) Line 4919  for(;;)
4919    
4920    if (firstline && IS_NEWLINE(start_match)) break;    if (firstline && IS_NEWLINE(start_match)) break;
4921    
4922    /* Advance the match position by one character. */    /* Advance to new matching position */
4923    
4924    start_match++;    start_match = new_start_match;
 #ifdef SUPPORT_UTF8  
   if (utf8)  
     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)  
       start_match++;  
 #endif  
4925    
4926    /* Break the loop if the pattern is anchored or if we have passed the end of    /* Break the loop if the pattern is anchored or if we have passed the end of
4927    the subject. */    the subject. */
4928    
4929    if (anchored || start_match > end_subject) break;    if (anchored || start_match > end_subject) break;
4930    
4931    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and we are now at a LF, and the pattern does
4932    are now at a LF, advance the match position by one more character. */    not contain any explicit matches for \r or \n, and the newline option is CRLF
4933      or ANY or ANYCRLF, advance the match position by one more character. */
4934    
4935    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4936         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&        start_match < end_subject &&
4937         start_match < end_subject &&        *start_match == '\n' &&
4938         *start_match == '\n')        (re->flags & PCRE_HASCRORLF) == 0 &&
4939            (md->nltype == NLTYPE_ANY ||
4940             md->nltype == NLTYPE_ANYCRLF ||
4941             md->nllen == 2))
4942      start_match++;      start_match++;
4943    
4944    }   /* End of for(;;) "bumpalong" loop */    }   /* End of for(;;) "bumpalong" loop */
# Line 4144  for(;;) Line 4948  for(;;)
4948  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4949  conditions is true:  conditions is true:
4950    
4951  (1) The pattern is anchored;  (1) The pattern is anchored or the match was failed by (*COMMIT);
4952    
4953  (2) We are past the end of the subject;  (2) We are past the end of the subject;
4954    
# Line 4159  processing, copy those that we can. In t Line 4963  processing, copy those that we can. In t
4963  certain parts of the pattern were not used, even though there are more  certain parts of the pattern were not used, even though there are more
4964  capturing parentheses than vector slots. */  capturing parentheses than vector slots. */
4965    
4966    ENDLOOP:
4967    
4968  if (rc == MATCH_MATCH)  if (rc == MATCH_MATCH)
4969    {    {
4970    if (using_temporary_offsets)    if (using_temporary_offsets)
# Line 4179  if (rc == MATCH_MATCH) Line 4985  if (rc == MATCH_MATCH)
4985    
4986    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
4987    
4988    /* If there is space, set up the whole thing as substring 0. */    /* If there is space, set up the whole thing as substring 0. The value of
4989      md->start_match_ptr might be modified if \K was encountered on the success
4990      matching path. */
4991    
4992    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4993      {      {
4994      offsets[0] = start_match - md->start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4995      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4996      }      }
4997    

Legend:
Removed from v.123  
changed lines
  Added in v.384

  ViewVC Help
Powered by ViewVC 1.1.5