/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 144 by ph10, Mon Apr 2 13:32:07 2007 UTC revision 197 by ph10, Tue Jul 31 10:50:18 2007 UTC
# Line 53  possible. There are also some static sup Line 53  possible. There are also some static sup
53  #undef min  #undef min
54  #undef max  #undef max
55    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
56  /* Flag bits for the match() function */  /* Flag bits for the match() function */
57    
58  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
59  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
60    
61  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
62  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 188  calls by keeping local variables that ne Line 182  calls by keeping local variables that ne
182  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
183  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
184  always used to.  always used to.
185    
186    The original heap-recursive code used longjmp(). However, it seems that this
187    can be very slow on some operating systems. Following a suggestion from Stan
188    Switzer, the use of longjmp() has been abolished, at the cost of having to
189    provide a unique number for each call to RMATCH. There is no way of generating
190    a sequence of numbers at compile time in C. I have given them names, to make
191    them stand out more clearly.
192    
193    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
194    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
195    tests. Furthermore, not using longjmp() means that local dynamic variables
196    don't have indeterminate values; this has meant that the frame size can be
197    reduced because the result can be "passed back" by straight setting of the
198    variable instead of being passed in the frame.
199  ****************************************************************************  ****************************************************************************
200  ***************************************************************************/  ***************************************************************************/
201    
202    
203    /* Numbers for RMATCH calls */
204    
205    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
206           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
207           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
208           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
209           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50 };
210    
211    
212  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
213  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
214    actuall used in this definition. */
215    
216  #ifndef NO_RECURSE  #ifndef NO_RECURSE
217  #define REGISTER register  #define REGISTER register
218    
219  #ifdef DEBUG  #ifdef DEBUG
220  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
221    { \    { \
222    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
223    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
224    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
225    }    }
226  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 210  versions and production versions. */ Line 229  versions and production versions. */
229    return ra; \    return ra; \
230    }    }
231  #else  #else
232  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
233    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
234  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
235  #endif  #endif
236    
237  #else  #else
238    
239    
240  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
241  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
242  match(), which never changes. */  argument of match(), which never changes. */
243    
244  #define REGISTER  #define REGISTER
245    
246  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
247    {\    {\
248    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
249    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
250      {\    newframe->Xeptr = ra;\
251      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
252      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
253      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
254      newframe->Xims = re;\    newframe->Xims = re;\
255      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
256      newframe->Xflags = rg;\    newframe->Xflags = rg;\
257      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
258      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
259      frame = newframe;\    frame = newframe;\
260      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
261      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
262      }\    L_##rw:\
263    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
264    }    }
265    
266  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 256  match(), which never changes. */ Line 270  match(), which never changes. */
270    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
271    if (frame != NULL)\    if (frame != NULL)\
272      {\      {\
273      frame->Xresult = ra;\      rrc = ra;\
274      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
275      }\      }\
276    return ra;\    return ra;\
277    }    }
# Line 273  typedef struct heapframe { Line 286  typedef struct heapframe {
286    
287    const uschar *Xeptr;    const uschar *Xeptr;
288    const uschar *Xecode;    const uschar *Xecode;
289      const uschar *Xmstart;
290    int Xoffset_top;    int Xoffset_top;
291    long int Xims;    long int Xims;
292    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 323  typedef struct heapframe { Line 337  typedef struct heapframe {
337    
338    eptrblock Xnewptrb;    eptrblock Xnewptrb;
339    
340    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
341    
342    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
343    
344  } heapframe;  } heapframe;
345    
# Line 354  made performance worse. Line 367  made performance worse.
367  Arguments:  Arguments:
368     eptr        pointer to current character in subject     eptr        pointer to current character in subject
369     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
370       mstart      pointer to the current match start position (can be modified
371                     by encountering \K)
372     offset_top  current top pointer     offset_top  current top pointer
373     md          pointer to "static" info for the match     md          pointer to "static" info for the match
374     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 363  Arguments: Line 378  Arguments:
378                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
379                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
380                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
381     rdepth      the recursion depth     rdepth      the recursion depth
382    
383  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 373  Returns:       MATCH_MATCH if matched Line 387  Returns:       MATCH_MATCH if matched
387  */  */
388    
389  static int  static int
390  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
391    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
392    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
393  {  {
# Line 401  frame->Xprevframe = NULL;            /* Line 415  frame->Xprevframe = NULL;            /*
415    
416  frame->Xeptr = eptr;  frame->Xeptr = eptr;
417  frame->Xecode = ecode;  frame->Xecode = ecode;
418    frame->Xmstart = mstart;
419  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
420  frame->Xims = ims;  frame->Xims = ims;
421  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 415  HEAP_RECURSE: Line 430  HEAP_RECURSE:
430    
431  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
432  #define ecode              frame->Xecode  #define ecode              frame->Xecode
433    #define mstart             frame->Xmstart
434  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
435  #define ims                frame->Xims  #define ims                frame->Xims
436  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 545  defined). However, RMATCH isn't like a f Line 561  defined). However, RMATCH isn't like a f
561  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
562  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
563    
564    #ifdef SUPPORT_UTF8
565    utf8 = md->utf8;       /* Local copy of the flag */
566    #else
567    utf8 = FALSE;
568    #endif
569    
570  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
571  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
572    
# Line 553  if (rdepth >= md->match_limit_recursion) Line 575  if (rdepth >= md->match_limit_recursion)
575    
576  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
577    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
578  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
579  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
580  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
581  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
582  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
583  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
584  already used. */  block that is used is on the stack, so a new one may be required for each
585    match(). */
586    
587  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
588    {    {
589    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
590    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
591      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
592    }    }
593    
594  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 593  for (;;) Line 603  for (;;)
603    
604    if (md->partial &&    if (md->partial &&
605        eptr >= md->end_subject &&        eptr >= md->end_subject &&
606        eptr > md->start_match)        eptr > mstart)
607      md->hitend = TRUE;      md->hitend = TRUE;
608    
609    switch(op)    switch(op)
# Line 637  for (;;) Line 647  for (;;)
647        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
648        do        do
649          {          {
650          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
651            ims, eptrb, flags);            ims, eptrb, flags, RM1);
652          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
653          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
654          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 654  for (;;) Line 664  for (;;)
664        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
665        }        }
666    
667      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
668      bracket. */      as a non-capturing bracket. */
669    
670        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
671        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
672    
673      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
674    
675        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
676        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
677    
678      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
679      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
680      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
681      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
682        is set.*/
683    
684      case OP_BRA:      case OP_BRA:
685      case OP_SBRA:      case OP_SBRA:
# Line 670  for (;;) Line 687  for (;;)
687      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
688      for (;;)      for (;;)
689        {        {
690        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
691          {          {
692          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
693          flags |= match_tail_recursed;            {
694          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
695          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
696              goto TAIL_RECURSE;
697              }
698    
699            /* Possibly empty group; can't use tail recursion. */
700    
701            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
702              eptrb, flags, RM48);
703            RRETURN(rrc);
704          }          }
705    
706        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
707        otherwise return. */        otherwise return. */
708    
709        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
710          eptrb, flags);          eptrb, flags, RM2);
711        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
712        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
713        }        }
# Line 723  for (;;) Line 748  for (;;)
748    
749      else      else
750        {        {
751        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
752            match_condassert);            match_condassert, RM3);
753        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
754          {          {
755          condition = TRUE;          condition = TRUE;
# Line 743  for (;;) Line 768  for (;;)
768        }        }
769    
770      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
771      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
772      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
773        group. If the second alternative doesn't exist, we can just plough on. */
774    
775      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
776        {        {
777        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
778        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
779        goto TAIL_RECURSE;          {
780            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
781            RRETURN(rrc);
782            }
783          else                       /* Group must match something */
784            {
785            flags = 0;
786            goto TAIL_RECURSE;
787            }
788        }        }
789      else      else                         /* Condition false & no 2nd alternative */
790        {        {
791        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
792        }        }
# Line 770  for (;;) Line 804  for (;;)
804        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
805        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
806          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
807        md->start_match = rec->save_start;        mstart = rec->save_start;
808        ims = original_ims;        ims = original_ims;
809        ecode = rec->after_call;        ecode = rec->after_call;
810        break;        break;
# Line 779  for (;;) Line 813  for (;;)
813      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
814      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
815    
816      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
817      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
818      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
819        md->start_match_ptr = mstart;  /* and the start (\K can modify) */
820      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
821    
822      /* Change option settings */      /* Change option settings */
# Line 802  for (;;) Line 837  for (;;)
837      case OP_ASSERTBACK:      case OP_ASSERTBACK:
838      do      do
839        {        {
840        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
841            RM4);
842        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
843        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
844        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 828  for (;;) Line 864  for (;;)
864      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
865      do      do
866        {        {
867        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
868            RM5);
869        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
870        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
871        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 885  for (;;) Line 922  for (;;)
922        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
923        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
924        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
925        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
926        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
927        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
928        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 947  for (;;) Line 984  for (;;)
984    
985        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
986              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
987        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
988        md->start_match = eptr;        mstart = eptr;
989    
990        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
991        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 957  for (;;) Line 994  for (;;)
994        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
995        do        do
996          {          {
997          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
998            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
999          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1000            {            {
1001            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 1001  for (;;) Line 1038  for (;;)
1038    
1039      do      do
1040        {        {
1041        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, 0);  
1042        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1043        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1044        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 1047  for (;;) Line 1083  for (;;)
1083    
1084      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1085        {        {
1086        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1087        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1088        ecode = prev;        ecode = prev;
1089        flags = match_tail_recursed;        flags = 0;
1090        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1091        }        }
1092      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1093        {        {
1094        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1095        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1096        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1097        flags = match_tail_recursed;        flags = 0;
1098        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1099        }        }
1100      /* Control never gets here */      /* Control never gets here */
# Line 1079  for (;;) Line 1115  for (;;)
1115      case OP_BRAZERO:      case OP_BRAZERO:
1116        {        {
1117        next = ecode+1;        next = ecode+1;
1118        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1119        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1120        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1121        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1090  for (;;) Line 1126  for (;;)
1126        {        {
1127        next = ecode+1;        next = ecode+1;
1128        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1129        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1130        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1131        ecode++;        ecode++;
1132        }        }
# Line 1160  for (;;) Line 1196  for (;;)
1196          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1197          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1198          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
1199          md->start_match = rec->save_start;          mstart = rec->save_start;
1200          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1201            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1202          ecode = rec->after_call;          ecode = rec->after_call;
# Line 1189  for (;;) Line 1225  for (;;)
1225    
1226      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1227      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1228      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1229        unlimited repeat of a group that can match an empty string. */
1230    
1231      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1232    
1233      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1234        {        {
1235        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1236        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1237          if (flags != 0)    /* Could match an empty string */
1238            {
1239            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1240            RRETURN(rrc);
1241            }
1242        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1243        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1244        }        }
1245      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1246        {        {
1247        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1248        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1249        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1250        flags = match_tail_recursed;        flags = 0;
1251        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1252        }        }
1253      /* Control never gets here */      /* Control never gets here */
# Line 1239  for (;;) Line 1280  for (;;)
1280      ecode++;      ecode++;
1281      break;      break;
1282    
1283        /* Reset the start of match point */
1284    
1285        case OP_SET_SOM:
1286        mstart = eptr;
1287        ecode++;
1288        break;
1289    
1290      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1291      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1292    
# Line 1447  for (;;) Line 1495  for (;;)
1495      ecode++;      ecode++;
1496      break;      break;
1497    
1498        case OP_NOT_HSPACE:
1499        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1500        GETCHARINCTEST(c, eptr);
1501        switch(c)
1502          {
1503          default: break;
1504          case 0x09:      /* HT */
1505          case 0x20:      /* SPACE */
1506          case 0xa0:      /* NBSP */
1507          case 0x1680:    /* OGHAM SPACE MARK */
1508          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1509          case 0x2000:    /* EN QUAD */
1510          case 0x2001:    /* EM QUAD */
1511          case 0x2002:    /* EN SPACE */
1512          case 0x2003:    /* EM SPACE */
1513          case 0x2004:    /* THREE-PER-EM SPACE */
1514          case 0x2005:    /* FOUR-PER-EM SPACE */
1515          case 0x2006:    /* SIX-PER-EM SPACE */
1516          case 0x2007:    /* FIGURE SPACE */
1517          case 0x2008:    /* PUNCTUATION SPACE */
1518          case 0x2009:    /* THIN SPACE */
1519          case 0x200A:    /* HAIR SPACE */
1520          case 0x202f:    /* NARROW NO-BREAK SPACE */
1521          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1522          case 0x3000:    /* IDEOGRAPHIC SPACE */
1523          RRETURN(MATCH_NOMATCH);
1524          }
1525        ecode++;
1526        break;
1527    
1528        case OP_HSPACE:
1529        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1530        GETCHARINCTEST(c, eptr);
1531        switch(c)
1532          {
1533          default: RRETURN(MATCH_NOMATCH);
1534          case 0x09:      /* HT */
1535          case 0x20:      /* SPACE */
1536          case 0xa0:      /* NBSP */
1537          case 0x1680:    /* OGHAM SPACE MARK */
1538          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1539          case 0x2000:    /* EN QUAD */
1540          case 0x2001:    /* EM QUAD */
1541          case 0x2002:    /* EN SPACE */
1542          case 0x2003:    /* EM SPACE */
1543          case 0x2004:    /* THREE-PER-EM SPACE */
1544          case 0x2005:    /* FOUR-PER-EM SPACE */
1545          case 0x2006:    /* SIX-PER-EM SPACE */
1546          case 0x2007:    /* FIGURE SPACE */
1547          case 0x2008:    /* PUNCTUATION SPACE */
1548          case 0x2009:    /* THIN SPACE */
1549          case 0x200A:    /* HAIR SPACE */
1550          case 0x202f:    /* NARROW NO-BREAK SPACE */
1551          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1552          case 0x3000:    /* IDEOGRAPHIC SPACE */
1553          break;
1554          }
1555        ecode++;
1556        break;
1557    
1558        case OP_NOT_VSPACE:
1559        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1560        GETCHARINCTEST(c, eptr);
1561        switch(c)
1562          {
1563          default: break;
1564          case 0x0a:      /* LF */
1565          case 0x0b:      /* VT */
1566          case 0x0c:      /* FF */
1567          case 0x0d:      /* CR */
1568          case 0x85:      /* NEL */
1569          case 0x2028:    /* LINE SEPARATOR */
1570          case 0x2029:    /* PARAGRAPH SEPARATOR */
1571          RRETURN(MATCH_NOMATCH);
1572          }
1573        ecode++;
1574        break;
1575    
1576        case OP_VSPACE:
1577        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1578        GETCHARINCTEST(c, eptr);
1579        switch(c)
1580          {
1581          default: RRETURN(MATCH_NOMATCH);
1582          case 0x0a:      /* LF */
1583          case 0x0b:      /* VT */
1584          case 0x0c:      /* FF */
1585          case 0x0d:      /* CR */
1586          case 0x85:      /* NEL */
1587          case 0x2028:    /* LINE SEPARATOR */
1588          case 0x2029:    /* PARAGRAPH SEPARATOR */
1589          break;
1590          }
1591        ecode++;
1592        break;
1593    
1594  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1595      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1596      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1602  for (;;) Line 1746  for (;;)
1746          {          {
1747          for (fi = min;; fi++)          for (fi = min;; fi++)
1748            {            {
1749            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1750            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1751            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1752              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1623  for (;;) Line 1767  for (;;)
1767            }            }
1768          while (eptr >= pp)          while (eptr >= pp)
1769            {            {
1770            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1771            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1772            eptr -= length;            eptr -= length;
1773            }            }
# Line 1728  for (;;) Line 1872  for (;;)
1872            {            {
1873            for (fi = min;; fi++)            for (fi = min;; fi++)
1874              {              {
1875              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1876              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1877              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1878              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1748  for (;;) Line 1892  for (;;)
1892            {            {
1893            for (fi = min;; fi++)            for (fi = min;; fi++)
1894              {              {
1895              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1896              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1897              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1898              c = *eptr++;              c = *eptr++;
# Line 1785  for (;;) Line 1929  for (;;)
1929              }              }
1930            for (;;)            for (;;)
1931              {              {
1932              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1933              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1934              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1935              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1804  for (;;) Line 1948  for (;;)
1948              }              }
1949            while (eptr >= pp)            while (eptr >= pp)
1950              {              {
1951              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1952              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1953              eptr--;              eptr--;
1954              }              }
# Line 1875  for (;;) Line 2019  for (;;)
2019          {          {
2020          for (fi = min;; fi++)          for (fi = min;; fi++)
2021            {            {
2022            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2023            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2024            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2025            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1899  for (;;) Line 2043  for (;;)
2043            }            }
2044          for(;;)          for(;;)
2045            {            {
2046            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2047            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2049            BACKCHAR(eptr)            BACKCHAR(eptr)
# Line 2086  for (;;) Line 2230  for (;;)
2230            {            {
2231            for (fi = min;; fi++)            for (fi = min;; fi++)
2232              {              {
2233              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2234              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2235              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2236              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
# Line 2127  for (;;) Line 2271  for (;;)
2271            if (possessive) continue;            if (possessive) continue;
2272            for(;;)            for(;;)
2273             {             {
2274             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2275             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2276             if (eptr == pp) RRETURN(MATCH_NOMATCH);             if (eptr == pp) RRETURN(MATCH_NOMATCH);
2277  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2176  for (;;) Line 2320  for (;;)
2320          {          {
2321          for (fi = min;; fi++)          for (fi = min;; fi++)
2322            {            {
2323            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2324            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2325            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2326                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2195  for (;;) Line 2339  for (;;)
2339          if (possessive) continue;          if (possessive) continue;
2340          while (eptr >= pp)          while (eptr >= pp)
2341            {            {
2342            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2343            eptr--;            eptr--;
2344            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345            }            }
# Line 2214  for (;;) Line 2358  for (;;)
2358          {          {
2359          for (fi = min;; fi++)          for (fi = min;; fi++)
2360            {            {
2361            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2362            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2363            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2364              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2232  for (;;) Line 2376  for (;;)
2376          if (possessive) continue;          if (possessive) continue;
2377          while (eptr >= pp)          while (eptr >= pp)
2378            {            {
2379            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2380            eptr--;            eptr--;
2381            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2382            }            }
# Line 2377  for (;;) Line 2521  for (;;)
2521            register unsigned int d;            register unsigned int d;
2522            for (fi = min;; fi++)            for (fi = min;; fi++)
2523              {              {
2524              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2525              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2526              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2527              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2391  for (;;) Line 2535  for (;;)
2535            {            {
2536            for (fi = min;; fi++)            for (fi = min;; fi++)
2537              {              {
2538              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2539              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2540              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2541                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2423  for (;;) Line 2567  for (;;)
2567          if (possessive) continue;          if (possessive) continue;
2568          for(;;)          for(;;)
2569              {              {
2570              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2571              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2572              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2573              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2441  for (;;) Line 2585  for (;;)
2585            if (possessive) continue;            if (possessive) continue;
2586            while (eptr >= pp)            while (eptr >= pp)
2587              {              {
2588              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2589              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2590              eptr--;              eptr--;
2591              }              }
# Line 2486  for (;;) Line 2630  for (;;)
2630            register unsigned int d;            register unsigned int d;
2631            for (fi = min;; fi++)            for (fi = min;; fi++)
2632              {              {
2633              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2634              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2635              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2636              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2499  for (;;) Line 2643  for (;;)
2643            {            {
2644            for (fi = min;; fi++)            for (fi = min;; fi++)
2645              {              {
2646              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2647              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2648              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2649                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2530  for (;;) Line 2674  for (;;)
2674            if (possessive) continue;            if (possessive) continue;
2675            for(;;)            for(;;)
2676              {              {
2677              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2678              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2679              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2680              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2548  for (;;) Line 2692  for (;;)
2692            if (possessive) continue;            if (possessive) continue;
2693            while (eptr >= pp)            while (eptr >= pp)
2694              {              {
2695              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2696              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2697              eptr--;              eptr--;
2698              }              }
# Line 2655  for (;;) Line 2799  for (;;)
2799            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2800              {              {
2801              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2802              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2803              }              }
2804            break;            break;
2805    
# Line 2663  for (;;) Line 2807  for (;;)
2807            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2808              {              {
2809              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2810              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2811              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2812              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2813                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 2676  for (;;) Line 2820  for (;;)
2820            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2821              {              {
2822              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2823              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2824              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2825              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2826                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2687  for (;;) Line 2831  for (;;)
2831            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2832              {              {
2833              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2834              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2835              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2836              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2837                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2698  for (;;) Line 2842  for (;;)
2842            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2843              {              {
2844              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2845              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2846              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2847              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2848                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2779  for (;;) Line 2923  for (;;)
2923            }            }
2924          break;          break;
2925    
2926            case OP_NOT_HSPACE:
2927            for (i = 1; i <= min; i++)
2928              {
2929              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2930              GETCHARINC(c, eptr);
2931              switch(c)
2932                {
2933                default: break;
2934                case 0x09:      /* HT */
2935                case 0x20:      /* SPACE */
2936                case 0xa0:      /* NBSP */
2937                case 0x1680:    /* OGHAM SPACE MARK */
2938                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2939                case 0x2000:    /* EN QUAD */
2940                case 0x2001:    /* EM QUAD */
2941                case 0x2002:    /* EN SPACE */
2942                case 0x2003:    /* EM SPACE */
2943                case 0x2004:    /* THREE-PER-EM SPACE */
2944                case 0x2005:    /* FOUR-PER-EM SPACE */
2945                case 0x2006:    /* SIX-PER-EM SPACE */
2946                case 0x2007:    /* FIGURE SPACE */
2947                case 0x2008:    /* PUNCTUATION SPACE */
2948                case 0x2009:    /* THIN SPACE */
2949                case 0x200A:    /* HAIR SPACE */
2950                case 0x202f:    /* NARROW NO-BREAK SPACE */
2951                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2952                case 0x3000:    /* IDEOGRAPHIC SPACE */
2953                RRETURN(MATCH_NOMATCH);
2954                }
2955              }
2956            break;
2957    
2958            case OP_HSPACE:
2959            for (i = 1; i <= min; i++)
2960              {
2961              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2962              GETCHARINC(c, eptr);
2963              switch(c)
2964                {
2965                default: RRETURN(MATCH_NOMATCH);
2966                case 0x09:      /* HT */
2967                case 0x20:      /* SPACE */
2968                case 0xa0:      /* NBSP */
2969                case 0x1680:    /* OGHAM SPACE MARK */
2970                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2971                case 0x2000:    /* EN QUAD */
2972                case 0x2001:    /* EM QUAD */
2973                case 0x2002:    /* EN SPACE */
2974                case 0x2003:    /* EM SPACE */
2975                case 0x2004:    /* THREE-PER-EM SPACE */
2976                case 0x2005:    /* FOUR-PER-EM SPACE */
2977                case 0x2006:    /* SIX-PER-EM SPACE */
2978                case 0x2007:    /* FIGURE SPACE */
2979                case 0x2008:    /* PUNCTUATION SPACE */
2980                case 0x2009:    /* THIN SPACE */
2981                case 0x200A:    /* HAIR SPACE */
2982                case 0x202f:    /* NARROW NO-BREAK SPACE */
2983                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2984                case 0x3000:    /* IDEOGRAPHIC SPACE */
2985                break;
2986                }
2987              }
2988            break;
2989    
2990            case OP_NOT_VSPACE:
2991            for (i = 1; i <= min; i++)
2992              {
2993              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2994              GETCHARINC(c, eptr);
2995              switch(c)
2996                {
2997                default: break;
2998                case 0x0a:      /* LF */
2999                case 0x0b:      /* VT */
3000                case 0x0c:      /* FF */
3001                case 0x0d:      /* CR */
3002                case 0x85:      /* NEL */
3003                case 0x2028:    /* LINE SEPARATOR */
3004                case 0x2029:    /* PARAGRAPH SEPARATOR */
3005                RRETURN(MATCH_NOMATCH);
3006                }
3007              }
3008            break;
3009    
3010            case OP_VSPACE:
3011            for (i = 1; i <= min; i++)
3012              {
3013              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3014              GETCHARINC(c, eptr);
3015              switch(c)
3016                {
3017                default: RRETURN(MATCH_NOMATCH);
3018                case 0x0a:      /* LF */
3019                case 0x0b:      /* VT */
3020                case 0x0c:      /* FF */
3021                case 0x0d:      /* CR */
3022                case 0x85:      /* NEL */
3023                case 0x2028:    /* LINE SEPARATOR */
3024                case 0x2029:    /* PARAGRAPH SEPARATOR */
3025                break;
3026                }
3027              }
3028            break;
3029    
3030          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3031          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3032            {            {
# Line 2890  for (;;) Line 3138  for (;;)
3138            }            }
3139          break;          break;
3140    
3141            case OP_NOT_HSPACE:
3142            for (i = 1; i <= min; i++)
3143              {
3144              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3145              switch(*eptr++)
3146                {
3147                default: break;
3148                case 0x09:      /* HT */
3149                case 0x20:      /* SPACE */
3150                case 0xa0:      /* NBSP */
3151                RRETURN(MATCH_NOMATCH);
3152                }
3153              }
3154            break;
3155    
3156            case OP_HSPACE:
3157            for (i = 1; i <= min; i++)
3158              {
3159              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3160              switch(*eptr++)
3161                {
3162                default: RRETURN(MATCH_NOMATCH);
3163                case 0x09:      /* HT */
3164                case 0x20:      /* SPACE */
3165                case 0xa0:      /* NBSP */
3166                break;
3167                }
3168              }
3169            break;
3170    
3171            case OP_NOT_VSPACE:
3172            for (i = 1; i <= min; i++)
3173              {
3174              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3175              switch(*eptr++)
3176                {
3177                default: break;
3178                case 0x0a:      /* LF */
3179                case 0x0b:      /* VT */
3180                case 0x0c:      /* FF */
3181                case 0x0d:      /* CR */
3182                case 0x85:      /* NEL */
3183                RRETURN(MATCH_NOMATCH);
3184                }
3185              }
3186            break;
3187    
3188            case OP_VSPACE:
3189            for (i = 1; i <= min; i++)
3190              {
3191              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3192              switch(*eptr++)
3193                {
3194                default: RRETURN(MATCH_NOMATCH);
3195                case 0x0a:      /* LF */
3196                case 0x0b:      /* VT */
3197                case 0x0c:      /* FF */
3198                case 0x0d:      /* CR */
3199                case 0x85:      /* NEL */
3200                break;
3201                }
3202              }
3203            break;
3204    
3205          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3206          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3207            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2945  for (;;) Line 3257  for (;;)
3257            case PT_ANY:            case PT_ANY:
3258            for (fi = min;; fi++)            for (fi = min;; fi++)
3259              {              {
3260              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3261              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3262              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3263              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2956  for (;;) Line 3268  for (;;)
3268            case PT_LAMP:            case PT_LAMP:
3269            for (fi = min;; fi++)            for (fi = min;; fi++)
3270              {              {
3271              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3272              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3273              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3274              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2971  for (;;) Line 3283  for (;;)
3283            case PT_GC:            case PT_GC:
3284            for (fi = min;; fi++)            for (fi = min;; fi++)
3285              {              {
3286              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3287              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3288              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3289              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2984  for (;;) Line 3296  for (;;)
3296            case PT_PC:            case PT_PC:
3297            for (fi = min;; fi++)            for (fi = min;; fi++)
3298              {              {
3299              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3300              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3302              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2997  for (;;) Line 3309  for (;;)
3309            case PT_SC:            case PT_SC:
3310            for (fi = min;; fi++)            for (fi = min;; fi++)
3311              {              {
3312              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3313              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3314              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3315              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 3019  for (;;) Line 3331  for (;;)
3331          {          {
3332          for (fi = min;; fi++)          for (fi = min;; fi++)
3333            {            {
3334            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3335            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3336            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3337            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
# Line 3048  for (;;) Line 3360  for (;;)
3360          {          {
3361          for (fi = min;; fi++)          for (fi = min;; fi++)
3362            {            {
3363            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3364            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3365            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3366                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
# Line 3081  for (;;) Line 3393  for (;;)
3393                }                }
3394              break;              break;
3395    
3396                case OP_NOT_HSPACE:
3397                switch(c)
3398                  {
3399                  default: break;
3400                  case 0x09:      /* HT */
3401                  case 0x20:      /* SPACE */
3402                  case 0xa0:      /* NBSP */
3403                  case 0x1680:    /* OGHAM SPACE MARK */
3404                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3405                  case 0x2000:    /* EN QUAD */
3406                  case 0x2001:    /* EM QUAD */
3407                  case 0x2002:    /* EN SPACE */
3408                  case 0x2003:    /* EM SPACE */
3409                  case 0x2004:    /* THREE-PER-EM SPACE */
3410                  case 0x2005:    /* FOUR-PER-EM SPACE */
3411                  case 0x2006:    /* SIX-PER-EM SPACE */
3412                  case 0x2007:    /* FIGURE SPACE */
3413                  case 0x2008:    /* PUNCTUATION SPACE */
3414                  case 0x2009:    /* THIN SPACE */
3415                  case 0x200A:    /* HAIR SPACE */
3416                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3417                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3418                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3419                  RRETURN(MATCH_NOMATCH);
3420                  }
3421                break;
3422    
3423                case OP_HSPACE:
3424                switch(c)
3425                  {
3426                  default: RRETURN(MATCH_NOMATCH);
3427                  case 0x09:      /* HT */
3428                  case 0x20:      /* SPACE */
3429                  case 0xa0:      /* NBSP */
3430                  case 0x1680:    /* OGHAM SPACE MARK */
3431                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3432                  case 0x2000:    /* EN QUAD */
3433                  case 0x2001:    /* EM QUAD */
3434                  case 0x2002:    /* EN SPACE */
3435                  case 0x2003:    /* EM SPACE */
3436                  case 0x2004:    /* THREE-PER-EM SPACE */
3437                  case 0x2005:    /* FOUR-PER-EM SPACE */
3438                  case 0x2006:    /* SIX-PER-EM SPACE */
3439                  case 0x2007:    /* FIGURE SPACE */
3440                  case 0x2008:    /* PUNCTUATION SPACE */
3441                  case 0x2009:    /* THIN SPACE */
3442                  case 0x200A:    /* HAIR SPACE */
3443                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3444                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3445                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3446                  break;
3447                  }
3448                break;
3449    
3450                case OP_NOT_VSPACE:
3451                switch(c)
3452                  {
3453                  default: break;
3454                  case 0x0a:      /* LF */
3455                  case 0x0b:      /* VT */
3456                  case 0x0c:      /* FF */
3457                  case 0x0d:      /* CR */
3458                  case 0x85:      /* NEL */
3459                  case 0x2028:    /* LINE SEPARATOR */
3460                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3461                  RRETURN(MATCH_NOMATCH);
3462                  }
3463                break;
3464    
3465                case OP_VSPACE:
3466                switch(c)
3467                  {
3468                  default: RRETURN(MATCH_NOMATCH);
3469                  case 0x0a:      /* LF */
3470                  case 0x0b:      /* VT */
3471                  case 0x0c:      /* FF */
3472                  case 0x0d:      /* CR */
3473                  case 0x85:      /* NEL */
3474                  case 0x2028:    /* LINE SEPARATOR */
3475                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3476                  break;
3477                  }
3478                break;
3479    
3480              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3481              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3482                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 3122  for (;;) Line 3518  for (;;)
3518          {          {
3519          for (fi = min;; fi++)          for (fi = min;; fi++)
3520            {            {
3521            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3522            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3523            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3524                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
# Line 3152  for (;;) Line 3548  for (;;)
3548                }                }
3549              break;              break;
3550    
3551                case OP_NOT_HSPACE:
3552                switch(c)
3553                  {
3554                  default: break;
3555                  case 0x09:      /* HT */
3556                  case 0x20:      /* SPACE */
3557                  case 0xa0:      /* NBSP */
3558                  RRETURN(MATCH_NOMATCH);
3559                  }
3560                break;
3561    
3562                case OP_HSPACE:
3563                switch(c)
3564                  {
3565                  default: RRETURN(MATCH_NOMATCH);
3566                  case 0x09:      /* HT */
3567                  case 0x20:      /* SPACE */
3568                  case 0xa0:      /* NBSP */
3569                  break;
3570                  }
3571                break;
3572    
3573                case OP_NOT_VSPACE:
3574                switch(c)
3575                  {
3576                  default: break;
3577                  case 0x0a:      /* LF */
3578                  case 0x0b:      /* VT */
3579                  case 0x0c:      /* FF */
3580                  case 0x0d:      /* CR */
3581                  case 0x85:      /* NEL */
3582                  RRETURN(MATCH_NOMATCH);
3583                  }
3584                break;
3585    
3586                case OP_VSPACE:
3587                switch(c)
3588                  {
3589                  default: RRETURN(MATCH_NOMATCH);
3590                  case 0x0a:      /* LF */
3591                  case 0x0b:      /* VT */
3592                  case 0x0c:      /* FF */
3593                  case 0x0d:      /* CR */
3594                  case 0x85:      /* NEL */
3595                  break;
3596                  }
3597                break;
3598    
3599              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3600              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3601              break;              break;
# Line 3268  for (;;) Line 3712  for (;;)
3712          if (possessive) continue;          if (possessive) continue;
3713          for(;;)          for(;;)
3714            {            {
3715            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3716            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3717            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3718            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3304  for (;;) Line 3748  for (;;)
3748          if (possessive) continue;          if (possessive) continue;
3749          for(;;)          for(;;)
3750            {            {
3751            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3752            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3753            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3754            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 3333  for (;;) Line 3777  for (;;)
3777          switch(ctype)          switch(ctype)
3778            {            {
3779            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
3780            if (max < INT_MAX)            if (max < INT_MAX)
3781              {              {
3782              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
# Line 3370  for (;;) Line 3809  for (;;)
3809                  {                  {
3810                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3811                  eptr++;                  eptr++;
3812                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3813                  }                  }
               break;  
3814                }                }
3815              else              else
3816                {                {
3817                c = max - min;                eptr = md->end_subject;
               if (c > (unsigned int)(md->end_subject - eptr))  
                 c = md->end_subject - eptr;  
               eptr += c;  
3818                }                }
3819              }              }
3820            break;            break;
# Line 3413  for (;;) Line 3849  for (;;)
3849              }              }
3850            break;            break;
3851    
3852              case OP_NOT_HSPACE:
3853              case OP_HSPACE:
3854              for (i = min; i < max; i++)
3855                {
3856                BOOL gotspace;
3857                int len = 1;
3858                if (eptr >= md->end_subject) break;
3859                GETCHARLEN(c, eptr, len);
3860                switch(c)
3861                  {
3862                  default: gotspace = FALSE; break;
3863                  case 0x09:      /* HT */
3864                  case 0x20:      /* SPACE */
3865                  case 0xa0:      /* NBSP */
3866                  case 0x1680:    /* OGHAM SPACE MARK */
3867                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3868                  case 0x2000:    /* EN QUAD */
3869                  case 0x2001:    /* EM QUAD */
3870                  case 0x2002:    /* EN SPACE */
3871                  case 0x2003:    /* EM SPACE */
3872                  case 0x2004:    /* THREE-PER-EM SPACE */
3873                  case 0x2005:    /* FOUR-PER-EM SPACE */
3874                  case 0x2006:    /* SIX-PER-EM SPACE */
3875                  case 0x2007:    /* FIGURE SPACE */
3876                  case 0x2008:    /* PUNCTUATION SPACE */
3877                  case 0x2009:    /* THIN SPACE */
3878                  case 0x200A:    /* HAIR SPACE */
3879                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3880                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3881                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3882                  gotspace = TRUE;
3883                  break;
3884                  }
3885                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3886                eptr += len;
3887                }
3888              break;
3889    
3890              case OP_NOT_VSPACE:
3891              case OP_VSPACE:
3892              for (i = min; i < max; i++)
3893                {
3894                BOOL gotspace;
3895                int len = 1;
3896                if (eptr >= md->end_subject) break;
3897                GETCHARLEN(c, eptr, len);
3898                switch(c)
3899                  {
3900                  default: gotspace = FALSE; break;
3901                  case 0x0a:      /* LF */
3902                  case 0x0b:      /* VT */
3903                  case 0x0c:      /* FF */
3904                  case 0x0d:      /* CR */
3905                  case 0x85:      /* NEL */
3906                  case 0x2028:    /* LINE SEPARATOR */
3907                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3908                  gotspace = TRUE;
3909                  break;
3910                  }
3911                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3912                eptr += len;
3913                }
3914              break;
3915    
3916            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3917            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3918              {              {
# Line 3488  for (;;) Line 3988  for (;;)
3988          if (possessive) continue;          if (possessive) continue;
3989          for(;;)          for(;;)
3990            {            {
3991            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3992            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3993            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3994            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3539  for (;;) Line 4039  for (;;)
4039              }              }
4040            break;            break;
4041    
4042              case OP_NOT_HSPACE:
4043              for (i = min; i < max; i++)
4044                {
4045                if (eptr >= md->end_subject) break;
4046                c = *eptr;
4047                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4048                eptr++;
4049                }
4050              break;
4051    
4052              case OP_HSPACE:
4053              for (i = min; i < max; i++)
4054                {
4055                if (eptr >= md->end_subject) break;
4056                c = *eptr;
4057                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4058                eptr++;
4059                }
4060              break;
4061    
4062              case OP_NOT_VSPACE:
4063              for (i = min; i < max; i++)
4064                {
4065                if (eptr >= md->end_subject) break;
4066                c = *eptr;
4067                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4068                  break;
4069                eptr++;
4070                }
4071              break;
4072    
4073              case OP_VSPACE:
4074              for (i = min; i < max; i++)
4075                {
4076                if (eptr >= md->end_subject) break;
4077                c = *eptr;
4078                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4079                  break;
4080                eptr++;
4081                }
4082              break;
4083    
4084            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4085            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4086              {              {
# Line 3602  for (;;) Line 4144  for (;;)
4144          if (possessive) continue;          if (possessive) continue;
4145          while (eptr >= pp)          while (eptr >= pp)
4146            {            {
4147            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4148            eptr--;            eptr--;
4149            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4150            }            }
# Line 3628  for (;;) Line 4170  for (;;)
4170    
4171    }             /* End of main loop */    }             /* End of main loop */
4172  /* Control never reaches here */  /* Control never reaches here */
4173    
4174    
4175    /* When compiling to use the heap rather than the stack for recursive calls to
4176    match(), the RRETURN() macro jumps here. The number that is saved in
4177    frame->Xwhere indicates which label we actually want to return to. */
4178    
4179    #ifdef NO_RECURSE
4180    #define LBL(val) case val: goto L_RM##val;
4181    HEAP_RETURN:
4182    switch (frame->Xwhere)
4183      {
4184      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4185      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4186      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4187      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4188      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4189      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4190      default:
4191      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4192      return PCRE_ERROR_INTERNAL;
4193      }
4194    #undef LBL
4195    #endif  /* NO_RECURSE */
4196  }  }
4197    
4198    
# Line 3640  Undefine all the macros that were define Line 4205  Undefine all the macros that were define
4205  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4206  #undef eptr  #undef eptr
4207  #undef ecode  #undef ecode
4208    #undef mstart
4209  #undef offset_top  #undef offset_top
4210  #undef ims  #undef ims
4211  #undef eptrb  #undef eptrb
# Line 3712  Returns:          > 0 => success; value Line 4278  Returns:          > 0 => success; value
4278                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4279  */  */
4280    
4281  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
4282  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4283    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4284    int offsetcount)    int offsetcount)
# Line 3737  const uschar *start_bits = NULL; Line 4303  const uschar *start_bits = NULL;
4303  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4304  USPTR end_subject;  USPTR end_subject;
4305  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
4306    
4307  pcre_study_data internal_study;  pcre_study_data internal_study;
4308  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3823  md->partial = (options & PCRE_PARTIAL) ! Line 4388  md->partial = (options & PCRE_PARTIAL) !
4388  md->hitend = FALSE;  md->hitend = FALSE;
4389    
4390  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
4391    
4392  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4393  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
# Line 3840  switch ((((options & PCRE_NEWLINE_BITS) Line 4404  switch ((((options & PCRE_NEWLINE_BITS)
4404    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4405         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4406    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4407      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4408    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
4409    }    }
4410    
4411  if (newline < 0)  if (newline == -2)
4412      {
4413      md->nltype = NLTYPE_ANYCRLF;
4414      }
4415    else if (newline < 0)
4416    {    {
4417    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
4418    }    }
# Line 4019  for(;;) Line 4588  for(;;)
4588        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4589          start_match++;          start_match++;
4590    
4591        /* If we have just passed a CR and the newline option is ANY, and we are        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4592        now at a LF, advance the match position by one more character. */        and we are now at a LF, advance the match position by one more character.
4593          */
4594    
4595        if (start_match[-1] == '\r' &&        if (start_match[-1] == '\r' &&
4596             md->nltype == NLTYPE_ANY &&             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4597             start_match < end_subject &&             start_match < end_subject &&
4598             *start_match == '\n')             *start_match == '\n')
4599          start_match++;          start_match++;
# Line 4113  for(;;) Line 4683  for(;;)
4683    
4684    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4685    
4686    md->start_match = start_match;    md->start_match_ptr = start_match;      /* Insurance */
4687    md->match_call_count = 0;    md->match_call_count = 0;
4688    md->eptrn = 0;                          /* Next free eptrchain slot */    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
   rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);  
4689    
4690    /* Any return other than MATCH_NOMATCH breaks the loop. */    /* Any return other than MATCH_NOMATCH breaks the loop. */
4691    
# Line 4142  for(;;) Line 4711  for(;;)
4711    
4712    if (anchored || start_match > end_subject) break;    if (anchored || start_match > end_subject) break;
4713    
4714    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and the newline option is CRLF or ANY or
4715    are now at a LF, advance the match position by one more character. */    ANYCRLF, and we are now at a LF, advance the match position by one more
4716      character. */
4717    
4718    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4719         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&         (md->nltype == NLTYPE_ANY ||
4720            md->nltype == NLTYPE_ANYCRLF ||
4721            md->nllen == 2) &&
4722         start_match < end_subject &&         start_match < end_subject &&
4723         *start_match == '\n')         *start_match == '\n')
4724      start_match++;      start_match++;
# Line 4193  if (rc == MATCH_MATCH) Line 4765  if (rc == MATCH_MATCH)
4765    
4766    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
4767    
4768    /* If there is space, set up the whole thing as substring 0. */    /* If there is space, set up the whole thing as substring 0. The value of
4769      md->start_match_ptr might be modified if \K was encountered on the success
4770      matching path. */
4771    
4772    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4773      {      {
4774      offsets[0] = start_match - md->start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4775      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4776      }      }
4777    

Legend:
Removed from v.144  
changed lines
  Added in v.197

  ViewVC Help
Powered by ViewVC 1.1.5