/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 115 by ph10, Fri Mar 9 12:23:37 2007 UTC
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #define NLBLOCK md           /* The block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
52    obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
53    
54  /* Structure for building a chain of data that actually lives on the  #define EPTR_WORK_SIZE (1000)
 stack, for holding the values of the subject pointer at the start of each  
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
55    
56  /* Flag bits for the match() function */  /* Flag bits for the match() function */
57    
58  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
59  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
60    #define match_tail_recursed  0x04  /* Tail recursive call */
61    
62  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
63  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 98  Returns:     nothing
98  static void  static void
99  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
100  {  {
101  int c;  unsigned int c;
102  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
103  while (length-- > 0)  while (length-- > 0)
104    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 291  typedef struct heapframe { Line 288  typedef struct heapframe {
288    
289    BOOL Xcur_is_word;    BOOL Xcur_is_word;
290    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
291    BOOL Xprev_is_word;    BOOL Xprev_is_word;
292    
293    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 299  typedef struct heapframe {
299    int Xprop_category;    int Xprop_category;
300    int Xprop_chartype;    int Xprop_chartype;
301    int Xprop_script;    int Xprop_script;
302    int *Xprop_test_variable;    int Xoclength;
303      uschar Xocchars[8];
304  #endif  #endif
305    
306    int Xctype;    int Xctype;
307    int Xfc;    unsigned int Xfc;
308    int Xfi;    int Xfi;
309    int Xlength;    int Xlength;
310    int Xmax;    int Xmax;
# Line 340  typedef struct heapframe { Line 337  typedef struct heapframe {
337  *         Match from current position            *  *         Match from current position            *
338  *************************************************/  *************************************************/
339    
340  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
341  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
342  same response.  same response.
343    
# Line 353  performance. Tests using gcc on a SPARC Line 347  performance. Tests using gcc on a SPARC
347  made performance worse.  made performance worse.
348    
349  Arguments:  Arguments:
350     eptr        pointer in subject     eptr        pointer to current character in subject
351     ecode       position in code     ecode       pointer to current position in compiled code
352     offset_top  current top pointer     offset_top  current top pointer
353     md          pointer to "static" info for the match     md          pointer to "static" info for the match
354     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 356  Arguments:
356                   brackets - for testing for empty matches                   brackets - for testing for empty matches
357     flags       can contain     flags       can contain
358                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
359                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
360                       group that can match an empty string
361                     match_tail_recursed - this is a tail_recursed group
362     rdepth      the recursion depth     rdepth      the recursion depth
363    
364  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 377  match(REGISTER USPTR eptr, REGISTER cons Line 373  match(REGISTER USPTR eptr, REGISTER cons
373    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
374  {  {
375  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
376  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
377  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
378    
379  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
380  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
381  register unsigned int  c;  /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
382  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
383    
384    BOOL minimize, possessive; /* Quantifier options */
385    
386  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
387  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
388  heap storage. Set up the top-level frame here; others are obtained from the  heap storage. Set up the top-level frame here; others are obtained from the
# Line 434  HEAP_RECURSE: Line 432  HEAP_RECURSE:
432    
433  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
434  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
435  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
436    
437  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 443  HEAP_RECURSE:
443  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
444  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
445  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
446  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
447    #define occhars            frame->Xocchars
448  #endif  #endif
449    
450  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 468  HEAP_RECURSE:
468  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
469  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
470    
471  #else  #else         /* NO_RECURSE not defined */
472  #define fi i  #define fi i
473  #define fc c  #define fc c
474    
# Line 489  recursion_info new_recursive;      /* wi Line 487  recursion_info new_recursive;      /* wi
487                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
488  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
489  BOOL condition;  BOOL condition;
 BOOL minimize;  
490  BOOL prev_is_word;  BOOL prev_is_word;
491    
492  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 498  int prop_fail_result;
498  int prop_category;  int prop_category;
499  int prop_chartype;  int prop_chartype;
500  int prop_script;  int prop_script;
501  int *prop_test_variable;  int oclength;
502    uschar occhars[8];
503  #endif  #endif
504    
505  int ctype;  int ctype;
# Line 516  int save_offset1, save_offset2, save_off Line 514  int save_offset1, save_offset2, save_off
514  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
515    
516  eptrblock newptrb;  eptrblock newptrb;
517  #endif  #endif     /* NO_RECURSE */
518    
519  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
520  variables. */  variables. */
# Line 524  variables. */ Line 522  variables. */
522  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
523  prop_value = 0;  prop_value = 0;
524  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
525  #endif  #endif
526    
527    
528  /* This label is used for tail recursion, which is used in a few cases even  /* This label is used for tail recursion, which is used in a few cases even
529  when NO_RECURSE is not defined, in order to reduce the amount of stack that is  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
530  used. Thanks to Ian Taylor for noticing this possibility and sending the  used. Thanks to Ian Taylor for noticing this possibility and sending the
# Line 556  utf8 = md->utf8;       /* Local copy of Line 554  utf8 = md->utf8;       /* Local copy of
554  utf8 = FALSE;  utf8 = FALSE;
555  #endif  #endif
556    
557  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
558  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
559  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
560  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
561    When match() is called in other circumstances, don't add to the chain. If this
562    is a tail recursion, use a block from the workspace, as the one on the stack is
563    already used. */
564    
565  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
566    {    {
567    newptrb.epb_prev = eptrb;    eptrblock *p;
568    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
569    eptrb = &newptrb;      {
570        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
571        p = md->eptrchain + md->eptrn++;
572        }
573      else p = &newptrb;
574      p->epb_saved_eptr = eptr;
575      p->epb_prev = eptrb;
576      eptrb = p;
577    }    }
578    
579  /* Now start processing the operations. */  /* Now start processing the opcodes. */
580    
581  for (;;)  for (;;)
582    {    {
583      minimize = possessive = FALSE;
584    op = *ecode;    op = *ecode;
   minimize = FALSE;  
585    
586    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
587    matching at least one subject character. */    matching at least one subject character. */
# Line 583  for (;;) Line 591  for (;;)
591        eptr > md->start_match)        eptr > md->start_match)
592      md->hitend = TRUE;      md->hitend = TRUE;
593    
594    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
595      {      {
596      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
597        the current subject position in the working slot at the top of the vector.
598      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
599      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
600        reference inside the group.
601      if (number > EXTRACT_BASIC_MAX)  
602        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
603        values of the final offsets, in case they were set by a previous iteration
604        of the same bracket.
605    
606        If there isn't enough space in the offset vector, treat this as if it were
607        a non-capturing bracket. Don't worry about setting the flag for the error
608        case here; that is handled in the code for KET. */
609    
610        case OP_CBRA:
611        case OP_SCBRA:
612        number = GET2(ecode, 1+LINK_SIZE);
613      offset = number << 1;      offset = number << 1;
614    
615  #ifdef DEBUG  #ifdef DEBUG
616      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
617        printf("subject=");
618      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
619      printf("\n");      printf("\n");
620  #endif  #endif
# Line 624  for (;;) Line 629  for (;;)
629        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
630        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
631    
632          flags = (op == OP_SCBRA)? match_cbegroup : 0;
633        do        do
634          {          {
635          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
636            match_isgroup);            ims, eptrb, flags);
637          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
638          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
639          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 643  for (;;) Line 649  for (;;)
649        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
650        }        }
651    
652      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
653        bracket. */
     else op = OP_BRA;  
     }  
   
   /* Other types of node can be handled by a switch */  
654    
655    switch(op)      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     {  
     case OP_BRA:     /* Non-capturing bracket: optimized */  
     DPRINTF(("start bracket 0\n"));  
   
     /* Loop for all the alternatives */  
656    
657        /* Non-capturing bracket. Loop for all the alternatives. When we get to the
658        final alternative within the brackets, we would return the result of a
659        recursive call to match() whatever happened. We can reduce stack usage by
660        turning this into a tail recursion. */
661    
662        case OP_BRA:
663        case OP_SBRA:
664        DPRINTF(("start non-capturing bracket\n"));
665        flags = (op >= OP_SBRA)? match_cbegroup : 0;
666      for (;;)      for (;;)
667        {        {
       /* When we get to the final alternative within the brackets, we would  
       return the result of a recursive call to match() whatever happened. We  
       can reduce stack usage by turning this into a tail recursion. */  
   
668        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)
669         {          {
670         ecode += 1 + LINK_SIZE;          ecode += _pcre_OP_lengths[*ecode];
671         flags = match_isgroup;          flags |= match_tail_recursed;
672         DPRINTF(("bracket 0 tail recursion\n"));          DPRINTF(("bracket 0 tail recursion\n"));
673         goto TAIL_RECURSE;          goto TAIL_RECURSE;
674         }          }
675    
676        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
677        otherwise return. */        otherwise return. */
678    
679        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
680          match_isgroup);          eptrb, flags);
681        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
682        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
683        }        }
# Line 688  for (;;) Line 690  for (;;)
690      obeyed, we can use tail recursion to avoid using another stack frame. */      obeyed, we can use tail recursion to avoid using another stack frame. */
691    
692      case OP_COND:      case OP_COND:
693      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
694        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
695          {
696          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
697          condition = md->recursive != NULL &&
698            (offset == RREF_ANY || offset == md->recursive->group_num);
699          ecode += condition? 3 : GET(ecode, 1);
700          }
701    
702        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
703        {        {
704        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
705        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
706          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
707          (offset < offset_top && md->offset_vector[offset] >= 0);        }
708        ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));  
709        flags = match_isgroup;      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
710        goto TAIL_RECURSE;        {
711          condition = FALSE;
712          ecode += GET(ecode, 1);
713        }        }
714    
715      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
716      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
717        assertion. */
718    
719      else      else
720        {        {
721        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
722            match_condassert | match_isgroup);            match_condassert);
723        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
724          {          {
725          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
726            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
727          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
728          }          }
729        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
730          {          {
731          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
732          }          }
733        else ecode += GET(ecode, 1);        else
734            {
735            condition = FALSE;
736            ecode += GET(ecode, 1);
737            }
738          }
739    
740        /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
741        we can use tail recursion to avoid using another stack frame. */      we can use tail recursion to avoid using another stack frame. If the second
742        alternative doesn't exist, we can just plough on. */
743    
744        if (condition || *ecode == OP_ALT)
745          {
746        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
747        flags = match_isgroup;        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
748        goto TAIL_RECURSE;        goto TAIL_RECURSE;
749        }        }
750      /* Control never reaches here */      else
751          {
752      /* Skip over conditional reference or large extraction number data if        ecode += 1 + LINK_SIZE;
753      encountered. */        }
   
     case OP_CREF:  
     case OP_BRANUMBER:  
     ecode += 3;  
754      break;      break;
755    
756      /* End of the pattern. If we are in a recursion, we should restore the  
757      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
758        restore the offsets appropriately and continue from after the call. */
759    
760      case OP_END:      case OP_END:
761      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
# Line 777  for (;;) Line 797  for (;;)
797      case OP_ASSERTBACK:      case OP_ASSERTBACK:
798      do      do
799        {        {
800        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
801        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
802        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
803        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 804  for (;;) Line 823  for (;;)
823      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
824      do      do
825        {        {
826        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
827        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
828        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 826  for (;;) Line 844  for (;;)
844  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
845      if (utf8)      if (utf8)
846        {        {
847        c = GET(ecode,1);        i = GET(ecode, 1);
848        for (i = 0; i < c; i++)        while (i-- > 0)
849          {          {
850          eptr--;          eptr--;
851          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 840  for (;;) Line 858  for (;;)
858      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
859    
860        {        {
861        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
862        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
863        }        }
864    
# Line 897  for (;;) Line 915  for (;;)
915      case OP_RECURSE:      case OP_RECURSE:
916        {        {
917        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
918        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
919            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
920    
921        /* Add to "recursing stack" */        /* Add to "recursing stack" */
922    
# Line 936  for (;;) Line 949  for (;;)
949        restore the offset and recursion data. */        restore the offset and recursion data. */
950    
951        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
952          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
953        do        do
954          {          {
955          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
956              eptrb, match_isgroup);            md, ims, eptrb, flags);
957          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
958            {            {
959            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 983  for (;;) Line 997  for (;;)
997      do      do
998        {        {
999        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1000          eptrb, match_isgroup);          eptrb, 0);
1001        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1002        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1003        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 997  for (;;) Line 1011  for (;;)
1011      /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1012      mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1013    
1014      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1015    
1016      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1017      eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
# Line 1031  for (;;) Line 1045  for (;;)
1045        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1046        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1047        ecode = prev;        ecode = prev;
1048        flags = match_isgroup;        flags = match_tail_recursed;
1049        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1050        }        }
1051      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1052        {        {
1053        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1054        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1055        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1056        flags = 0;        flags = match_tail_recursed;
1057        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1058        }        }
1059      /* Control never gets here */      /* Control never gets here */
# Line 1060  for (;;) Line 1074  for (;;)
1074      case OP_BRAZERO:      case OP_BRAZERO:
1075        {        {
1076        next = ecode+1;        next = ecode+1;
1077        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1078        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1079        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1080        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1081        }        }
1082      break;      break;
1083    
1084      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1085        {        {
1086        next = ecode+1;        next = ecode+1;
1087        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1088        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
         match_isgroup);  
1089        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1090        ecode++;        ecode++;
1091        }        }
1092      break;      break;
1093    
1094      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1095    
1096      case OP_KET:      case OP_KET:
1097      case OP_KETRMIN:      case OP_KETRMIN:
1098      case OP_KETRMAX:      case OP_KETRMAX:
1099      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
     saved_eptr = eptrb->epb_saved_eptr;  
1100    
1101      /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1102        infinite repeats of empty string matches, retrieve the subject start from
1103        the chain. Otherwise, set it NULL. */
1104    
1105        if (*prev >= OP_SBRA)
1106          {
1107          saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1108          eptrb = eptrb->epb_prev;              /* Backup to previous group */
1109          }
1110        else saved_eptr = NULL;
1111    
1112      eptrb = eptrb->epb_prev;      /* If we are at the end of an assertion group, stop matching and return
1113        MATCH_MATCH, but record the current high water mark for use by positive
1114        assertions. Do this also for the "once" (atomic) groups. */
1115    
1116      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1117          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1102  for (;;) Line 1122  for (;;)
1122        RRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);
1123        }        }
1124    
1125      /* In all other cases except a conditional group we have to check the      /* For capturing groups we have to check the group number back at the start
1126      group number back at the start and if necessary complete handling an      and if necessary complete handling an extraction by setting the offsets and
1127      extraction by setting the offsets and bumping the high water mark. */      bumping the high water mark. Note that whole-pattern recursion is coded as
1128        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1129        when the OP_END is reached. Other recursion is handled here. */
1130    
1131      if (*prev != OP_COND)      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1132        {        {
1133        number = *prev - OP_BRA;        number = GET2(prev, 1+LINK_SIZE);
   
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);  
1134        offset = number << 1;        offset = number << 1;
1135    
1136  #ifdef DEBUG  #ifdef DEBUG
# Line 1121  for (;;) Line 1138  for (;;)
1138        printf("\n");        printf("\n");
1139  #endif  #endif
1140    
1141        /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1142        of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       into group 0, so it won't be picked up here. Instead, we catch it when  
       the OP_END is reached. */  
   
       if (number > 0)  
1143          {          {
1144          md->capture_last = number;          md->offset_vector[offset] =
1145          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            md->offset_vector[md->offset_end - number];
1146            {          md->offset_vector[offset+1] = eptr - md->start_subject;
1147            md->offset_vector[offset] =          if (offset_top <= offset) offset_top = offset + 2;
1148              md->offset_vector[md->offset_end - number];          }
1149            md->offset_vector[offset+1] = eptr - md->start_subject;  
1150            if (offset_top <= offset) offset_top = offset + 2;        /* Handle a recursively called group. Restore the offsets
1151            }        appropriately and continue from after the call. */
1152    
1153          /* Handle a recursively called group. Restore the offsets        if (md->recursive != NULL && md->recursive->group_num == number)
1154          appropriately and continue from after the call. */          {
1155            recursion_info *rec = md->recursive;
1156          if (md->recursive != NULL && md->recursive->group_num == number)          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1157            {          md->recursive = rec->prevrec;
1158            recursion_info *rec = md->recursive;          md->start_match = rec->save_start;
1159            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          memcpy(md->offset_vector, rec->offset_save,
1160            md->recursive = rec->prevrec;            rec->saved_max * sizeof(int));
1161            md->start_match = rec->save_start;          ecode = rec->after_call;
1162            memcpy(md->offset_vector, rec->offset_save,          ims = original_ims;
1163              rec->saved_max * sizeof(int));          break;
           ecode = rec->after_call;  
           ims = original_ims;  
           break;  
           }  
1164          }          }
1165        }        }
1166    
1167      /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1168      the group. */      flags, in case they got changed during the group. */
1169    
1170      ims = original_ims;      ims = original_ims;
1171      DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
# Line 1177  for (;;) Line 1186  for (;;)
1186      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1187      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame. */
1188    
1189        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1190    
1191      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1192        {        {
1193        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1194        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195        ecode = prev;        ecode = prev;
1196        flags = match_isgroup;        flags |= match_tail_recursed;
1197        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1198        }        }
1199      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1200        {        {
1201        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1202        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1203        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1204        flags = 0;        flags = match_tail_recursed;
1205        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1206        }        }
1207      /* Control never gets here */      /* Control never gets here */
# Line 1202  for (;;) Line 1213  for (;;)
1213      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1214        {        {
1215        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1216            (eptr == md->end_subject ||            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
            eptr < md->start_subject + md->nllen ||  
            !IS_NEWLINE(eptr - md->nllen)))  
1217          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1218        ecode++;        ecode++;
1219        break;        break;
# Line 1244  for (;;) Line 1253  for (;;)
1253        if (!md->endonly)        if (!md->endonly)
1254          {          {
1255          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1256              (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1257            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1258          ecode++;          ecode++;
1259          break;          break;
# Line 1263  for (;;) Line 1272  for (;;)
1272    
1273      case OP_EODN:      case OP_EODN:
1274      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1275          (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1276        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1277      ecode++;      ecode++;
1278      break;      break;
# Line 1319  for (;;) Line 1328  for (;;)
1328      case OP_ANY:      case OP_ANY:
1329      if ((ims & PCRE_DOTALL) == 0)      if ((ims & PCRE_DOTALL) == 0)
1330        {        {
1331        if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
         RRETURN(MATCH_NOMATCH);  
1332        }        }
1333      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1334      if (utf8)      if (utf8)
# Line 1414  for (;;) Line 1422  for (;;)
1422      ecode++;      ecode++;
1423      break;      break;
1424    
1425        case OP_ANYNL:
1426        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1427        GETCHARINCTEST(c, eptr);
1428        switch(c)
1429          {
1430          default: RRETURN(MATCH_NOMATCH);
1431          case 0x000d:
1432          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1433          break;
1434          case 0x000a:
1435          case 0x000b:
1436          case 0x000c:
1437          case 0x0085:
1438          case 0x2028:
1439          case 0x2029:
1440          break;
1441          }
1442        ecode++;
1443        break;
1444    
1445  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1446      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1447      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1456  for (;;) Line 1484  for (;;)
1484    
1485          default:          default:
1486          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
1487          }          }
1488    
1489        ecode += 3;        ecode += 3;
# Line 1926  for (;;) Line 1953  for (;;)
1953    
1954        else        else
1955          {          {
1956          int dc;          unsigned int dc;
1957          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1958          ecode += length;          ecode += length;
1959    
# Line 1953  for (;;) Line 1980  for (;;)
1980        }        }
1981      break;      break;
1982    
1983      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
1984    
1985      case OP_EXACT:      case OP_EXACT:
1986      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
1987      ecode += 3;      ecode += 3;
1988      goto REPEATCHAR;      goto REPEATCHAR;
1989    
1990        case OP_POSUPTO:
1991        possessive = TRUE;
1992        /* Fall through */
1993    
1994      case OP_UPTO:      case OP_UPTO:
1995      case OP_MINUPTO:      case OP_MINUPTO:
1996      min = 0;      min = 0;
# Line 1968  for (;;) Line 1999  for (;;)
1999      ecode += 3;      ecode += 3;
2000      goto REPEATCHAR;      goto REPEATCHAR;
2001    
2002        case OP_POSSTAR:
2003        possessive = TRUE;
2004        min = 0;
2005        max = INT_MAX;
2006        ecode++;
2007        goto REPEATCHAR;
2008    
2009        case OP_POSPLUS:
2010        possessive = TRUE;
2011        min = 1;
2012        max = INT_MAX;
2013        ecode++;
2014        goto REPEATCHAR;
2015    
2016        case OP_POSQUERY:
2017        possessive = TRUE;
2018        min = 0;
2019        max = 1;
2020        ecode++;
2021        goto REPEATCHAR;
2022    
2023      case OP_STAR:      case OP_STAR:
2024      case OP_MINSTAR:      case OP_MINSTAR:
2025      case OP_PLUS:      case OP_PLUS:
# Line 1999  for (;;) Line 2051  for (;;)
2051    
2052        if (length > 1)        if (length > 1)
2053          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2054  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2055          int othercase;          unsigned int othercase;
2056          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2057              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase >= 0)  
2058            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2059            else oclength = 0;
2060  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2061    
2062          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2063            {            {
2064            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2065    #ifdef SUPPORT_UCP
2066            /* Need braces because of following else */            /* Need braces because of following else */
2067            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2068            else            else
# Line 2020  for (;;) Line 2070  for (;;)
2070              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2071              eptr += oclength;              eptr += oclength;
2072              }              }
2073    #else   /* without SUPPORT_UCP */
2074              else { RRETURN(MATCH_NOMATCH); }
2075    #endif  /* SUPPORT_UCP */
2076            }            }
2077    
2078          if (min == max) continue;          if (min == max) continue;
# Line 2032  for (;;) Line 2085  for (;;)
2085              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2086              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2087              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2088    #ifdef SUPPORT_UCP
2089              /* Need braces because of following else */              /* Need braces because of following else */
2090              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2091              else              else
# Line 2039  for (;;) Line 2093  for (;;)
2093                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2094                eptr += oclength;                eptr += oclength;
2095                }                }
2096    #else   /* without SUPPORT_UCP */
2097                else { RRETURN (MATCH_NOMATCH); }
2098    #endif  /* SUPPORT_UCP */
2099              }              }
2100            /* Control never gets here */            /* Control never gets here */
2101            }            }
2102          else  
2103            else  /* Maximize */
2104            {            {
2105            pp = eptr;            pp = eptr;
2106            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2107              {              {
2108              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2109              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2110    #ifdef SUPPORT_UCP
2111              else if (oclength == 0) break;              else if (oclength == 0) break;
2112              else              else
2113                {                {
2114                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2115                eptr += oclength;                eptr += oclength;
2116                }                }
2117    #else   /* without SUPPORT_UCP */
2118                else break;
2119    #endif  /* SUPPORT_UCP */
2120              }              }
2121    
2122              if (possessive) continue;
2123            while (eptr >= pp)            while (eptr >= pp)
2124             {             {
2125             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2126             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2127    #ifdef SUPPORT_UCP
2128               eptr--;
2129               BACKCHAR(eptr);
2130    #else   /* without SUPPORT_UCP */
2131             eptr -= length;             eptr -= length;
2132    #endif  /* SUPPORT_UCP */
2133             }             }
2134            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2135            }            }
# Line 2110  for (;;) Line 2179  for (;;)
2179            }            }
2180          /* Control never gets here */          /* Control never gets here */
2181          }          }
2182        else        else  /* Maximize */
2183          {          {
2184          pp = eptr;          pp = eptr;
2185          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2118  for (;;) Line 2187  for (;;)
2187            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2188            eptr++;            eptr++;
2189            }            }
2190            if (possessive) continue;
2191          while (eptr >= pp)          while (eptr >= pp)
2192            {            {
2193            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2146  for (;;) Line 2216  for (;;)
2216            }            }
2217          /* Control never gets here */          /* Control never gets here */
2218          }          }
2219        else        else  /* Maximize */
2220          {          {
2221          pp = eptr;          pp = eptr;
2222          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2154  for (;;) Line 2224  for (;;)
2224            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2225            eptr++;            eptr++;
2226            }            }
2227            if (possessive) continue;
2228          while (eptr >= pp)          while (eptr >= pp)
2229            {            {
2230            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2206  for (;;) Line 2277  for (;;)
2277      ecode += 3;      ecode += 3;
2278      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2279    
2280        case OP_NOTPOSSTAR:
2281        possessive = TRUE;
2282        min = 0;
2283        max = INT_MAX;
2284        ecode++;
2285        goto REPEATNOTCHAR;
2286    
2287        case OP_NOTPOSPLUS:
2288        possessive = TRUE;
2289        min = 1;
2290        max = INT_MAX;
2291        ecode++;
2292        goto REPEATNOTCHAR;
2293    
2294        case OP_NOTPOSQUERY:
2295        possessive = TRUE;
2296        min = 0;
2297        max = 1;
2298        ecode++;
2299        goto REPEATNOTCHAR;
2300    
2301        case OP_NOTPOSUPTO:
2302        possessive = TRUE;
2303        min = 0;
2304        max = GET2(ecode, 1);
2305        ecode += 3;
2306        goto REPEATNOTCHAR;
2307    
2308      case OP_NOTSTAR:      case OP_NOTSTAR:
2309      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2310      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2245  for (;;) Line 2344  for (;;)
2344        /* UTF-8 mode */        /* UTF-8 mode */
2345        if (utf8)        if (utf8)
2346          {          {
2347          register int d;          register unsigned int d;
2348          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2349            {            {
2350            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2270  for (;;) Line 2369  for (;;)
2369          /* UTF-8 mode */          /* UTF-8 mode */
2370          if (utf8)          if (utf8)
2371            {            {
2372            register int d;            register unsigned int d;
2373            for (fi = min;; fi++)            for (fi = min;; fi++)
2374              {              {
2375              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2306  for (;;) Line 2405  for (;;)
2405          /* UTF-8 mode */          /* UTF-8 mode */
2406          if (utf8)          if (utf8)
2407            {            {
2408            register int d;            register unsigned int d;
2409            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2410              {              {
2411              int len = 1;              int len = 1;
# Line 2316  for (;;) Line 2415  for (;;)
2415              if (fc == d) break;              if (fc == d) break;
2416              eptr += len;              eptr += len;
2417              }              }
2418            for(;;)          if (possessive) continue;
2419            for(;;)
2420              {              {
2421              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2422              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 2333  for (;;) Line 2433  for (;;)
2433              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2434              eptr++;              eptr++;
2435              }              }
2436              if (possessive) continue;
2437            while (eptr >= pp)            while (eptr >= pp)
2438              {              {
2439              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2354  for (;;) Line 2455  for (;;)
2455        /* UTF-8 mode */        /* UTF-8 mode */
2456        if (utf8)        if (utf8)
2457          {          {
2458          register int d;          register unsigned int d;
2459          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2460            {            {
2461            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2377  for (;;) Line 2478  for (;;)
2478          /* UTF-8 mode */          /* UTF-8 mode */
2479          if (utf8)          if (utf8)
2480            {            {
2481            register int d;            register unsigned int d;
2482            for (fi = min;; fi++)            for (fi = min;; fi++)
2483              {              {
2484              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2412  for (;;) Line 2513  for (;;)
2513          /* UTF-8 mode */          /* UTF-8 mode */
2514          if (utf8)          if (utf8)
2515            {            {
2516            register int d;            register unsigned int d;
2517            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2518              {              {
2519              int len = 1;              int len = 1;
# Line 2421  for (;;) Line 2522  for (;;)
2522              if (fc == d) break;              if (fc == d) break;
2523              eptr += len;              eptr += len;
2524              }              }
2525              if (possessive) continue;
2526            for(;;)            for(;;)
2527              {              {
2528              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2438  for (;;) Line 2540  for (;;)
2540              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2541              eptr++;              eptr++;
2542              }              }
2543              if (possessive) continue;
2544            while (eptr >= pp)            while (eptr >= pp)
2545              {              {
2546              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2469  for (;;) Line 2572  for (;;)
2572      ecode += 3;      ecode += 3;
2573      goto REPEATTYPE;      goto REPEATTYPE;
2574    
2575        case OP_TYPEPOSSTAR:
2576        possessive = TRUE;
2577        min = 0;
2578        max = INT_MAX;
2579        ecode++;
2580        goto REPEATTYPE;
2581    
2582        case OP_TYPEPOSPLUS:
2583        possessive = TRUE;
2584        min = 1;
2585        max = INT_MAX;
2586        ecode++;
2587        goto REPEATTYPE;
2588    
2589        case OP_TYPEPOSQUERY:
2590        possessive = TRUE;
2591        min = 0;
2592        max = 1;
2593        ecode++;
2594        goto REPEATTYPE;
2595    
2596        case OP_TYPEPOSUPTO:
2597        possessive = TRUE;
2598        min = 0;
2599        max = GET2(ecode, 1);
2600        ecode += 3;
2601        goto REPEATTYPE;
2602    
2603      case OP_TYPESTAR:      case OP_TYPESTAR:
2604      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2605      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2571  for (;;) Line 2702  for (;;)
2702    
2703            default:            default:
2704            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
2705            }            }
2706          }          }
2707    
# Line 2611  for (;;) Line 2741  for (;;)
2741          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2742            {            {
2743            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2744                 ((ims & PCRE_DOTALL) == 0 &&                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
                  eptr <= md->end_subject - md->nllen &&  
                  IS_NEWLINE(eptr)))  
2745              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2746            eptr++;            eptr++;
2747            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
# Line 2624  for (;;) Line 2752  for (;;)
2752          eptr += min;          eptr += min;
2753          break;          break;
2754    
2755            case OP_ANYNL:
2756            for (i = 1; i <= min; i++)
2757              {
2758              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2759              GETCHARINC(c, eptr);
2760              switch(c)
2761                {
2762                default: RRETURN(MATCH_NOMATCH);
2763                case 0x000d:
2764                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2765                break;
2766                case 0x000a:
2767                case 0x000b:
2768                case 0x000c:
2769                case 0x0085:
2770                case 0x2028:
2771                case 0x2029:
2772                break;
2773                }
2774              }
2775            break;
2776    
2777          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2778          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2779            {            {
# Line 2692  for (;;) Line 2842  for (;;)
2842  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2843    
2844        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2845        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2846          number of bytes present, as this was tested above. */
2847    
2848        switch(ctype)        switch(ctype)
2849          {          {
# Line 2701  for (;;) Line 2852  for (;;)
2852            {            {
2853            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2854              {              {
2855              if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2856              eptr++;              eptr++;
2857              }              }
2858            }            }
# Line 2713  for (;;) Line 2863  for (;;)
2863          eptr += min;          eptr += min;
2864          break;          break;
2865    
2866            /* Because of the CRLF case, we can't assume the minimum number of
2867            bytes are present in this case. */
2868    
2869            case OP_ANYNL:
2870            for (i = 1; i <= min; i++)
2871              {
2872              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2873              switch(*eptr++)
2874                {
2875                default: RRETURN(MATCH_NOMATCH);
2876                case 0x000d:
2877                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2878                break;
2879                case 0x000a:
2880                case 0x000b:
2881                case 0x000c:
2882                case 0x0085:
2883                break;
2884                }
2885              }
2886            break;
2887    
2888          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2889          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2890            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2774  for (;;) Line 2946  for (;;)
2946              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2947              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2948              }              }
2949            break;            /* Control never gets here */
2950    
2951            case PT_LAMP:            case PT_LAMP:
2952            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 2789  for (;;) Line 2961  for (;;)
2961                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
2962                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2963              }              }
2964            break;            /* Control never gets here */
2965    
2966            case PT_GC:            case PT_GC:
2967            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 2802  for (;;) Line 2974  for (;;)
2974              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2975                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2976              }              }
2977            break;            /* Control never gets here */
2978    
2979            case PT_PC:            case PT_PC:
2980            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 2815  for (;;) Line 2987  for (;;)
2987              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2988                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2989              }              }
2990            break;            /* Control never gets here */
2991    
2992            case PT_SC:            case PT_SC:
2993            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 2828  for (;;) Line 3000  for (;;)
3000              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3001                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3002              }              }
3003            break;            /* Control never gets here */
3004    
3005            default:            default:
3006            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3007            }            }
3008          }          }
3009    
# Line 2876  for (;;) Line 3047  for (;;)
3047            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3048            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3049                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3050                  eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))                  IS_NEWLINE(eptr)))
3051              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3052    
3053            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 2888  for (;;) Line 3059  for (;;)
3059              case OP_ANYBYTE:              case OP_ANYBYTE:
3060              break;              break;
3061    
3062                case OP_ANYNL:
3063                switch(c)
3064                  {
3065                  default: RRETURN(MATCH_NOMATCH);
3066                  case 0x000d:
3067                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3068                  break;
3069                  case 0x000a:
3070                  case 0x000b:
3071                  case 0x000c:
3072                  case 0x0085:
3073                  case 0x2028:
3074                  case 0x2029:
3075                  break;
3076                  }
3077                break;
3078    
3079              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3080              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3081                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2932  for (;;) Line 3120  for (;;)
3120            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3121            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3122            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3123                 ((ims & PCRE_DOTALL) == 0 &&                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
                  eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
3124              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3125    
3126            c = *eptr++;            c = *eptr++;
# Line 2945  for (;;) Line 3132  for (;;)
3132              case OP_ANYBYTE:              case OP_ANYBYTE:
3133              break;              break;
3134    
3135                case OP_ANYNL:
3136                switch(c)
3137                  {
3138                  default: RRETURN(MATCH_NOMATCH);
3139                  case 0x000d:
3140                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3141                  break;
3142                  case 0x000a:
3143                  case 0x000b:
3144                  case 0x000c:
3145                  case 0x0085:
3146                  break;
3147                  }
3148                break;
3149    
3150              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3151              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3152              break;              break;
# Line 2977  for (;;) Line 3179  for (;;)
3179        /* Control never gets here */        /* Control never gets here */
3180        }        }
3181    
3182      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3183      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3184      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3185    
# Line 3058  for (;;) Line 3260  for (;;)
3260    
3261          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3262    
3263            if (possessive) continue;
3264          for(;;)          for(;;)
3265            {            {
3266            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3093  for (;;) Line 3296  for (;;)
3296    
3297          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3298    
3299            if (possessive) continue;
3300          for(;;)          for(;;)
3301            {            {
3302            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3135  for (;;) Line 3339  for (;;)
3339                {                {
3340                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3341                  {                  {
3342                  if (eptr >= md->end_subject ||                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                     (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
                   break;  
3343                  eptr++;                  eptr++;
3344                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3345                  }                  }
# Line 3161  for (;;) Line 3363  for (;;)
3363                {                {
3364                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3365                  {                  {
3366                  if (eptr >= md->end_subject ||                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                     (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
                   break;  
3367                  eptr++;                  eptr++;
3368                  }                  }
3369                break;                break;
# Line 3171  for (;;) Line 3371  for (;;)
3371              else              else
3372                {                {
3373                c = max - min;                c = max - min;
3374                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3375                    c = md->end_subject - eptr;
3376                eptr += c;                eptr += c;
3377                }                }
3378              }              }
# Line 3181  for (;;) Line 3382  for (;;)
3382    
3383            case OP_ANYBYTE:            case OP_ANYBYTE:
3384            c = max - min;            c = max - min;
3385            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3386                c = md->end_subject - eptr;
3387            eptr += c;            eptr += c;
3388            break;            break;
3389    
3390              case OP_ANYNL:
3391              for (i = min; i < max; i++)
3392                {
3393                int len = 1;
3394                if (eptr >= md->end_subject) break;
3395                GETCHARLEN(c, eptr, len);
3396                if (c == 0x000d)
3397                  {
3398                  if (++eptr >= md->end_subject) break;
3399                  if (*eptr == 0x000a) eptr++;
3400                  }
3401                else
3402                  {
3403                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3404                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3405                    break;
3406                  eptr += len;
3407                  }
3408                }
3409              break;
3410    
3411            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3412            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3413              {              {
# Line 3257  for (;;) Line 3480  for (;;)
3480    
3481          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3482    
3483            if (possessive) continue;
3484          for(;;)          for(;;)
3485            {            {
3486            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3277  for (;;) Line 3501  for (;;)
3501              {              {
3502              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3503                {                {
3504                if (eptr >= md->end_subject ||                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                   (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
                 break;  
3505                eptr++;                eptr++;
3506                }                }
3507              break;              break;
# Line 3288  for (;;) Line 3510  for (;;)
3510    
3511            case OP_ANYBYTE:            case OP_ANYBYTE:
3512            c = max - min;            c = max - min;
3513            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3514                c = md->end_subject - eptr;
3515            eptr += c;            eptr += c;
3516            break;            break;
3517    
3518              case OP_ANYNL:
3519              for (i = min; i < max; i++)
3520                {
3521                if (eptr >= md->end_subject) break;
3522                c = *eptr;
3523                if (c == 0x000d)
3524                  {
3525                  if (++eptr >= md->end_subject) break;
3526                  if (*eptr == 0x000a) eptr++;
3527                  }
3528                else
3529                  {
3530                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3531                    break;
3532                  eptr++;
3533                  }
3534                }
3535              break;
3536    
3537            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3538            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3539              {              {
# Line 3352  for (;;) Line 3594  for (;;)
3594    
3595          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3596    
3597            if (possessive) continue;
3598          while (eptr >= pp)          while (eptr >= pp)
3599            {            {
3600            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3366  for (;;) Line 3609  for (;;)
3609        }        }
3610      /* Control never gets here */      /* Control never gets here */
3611    
3612      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3613      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3614    
3615      default:      default:
3616      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3617      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3618      }      }
3619    
3620    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3411  Undefine all the macros that were define Line 3652  Undefine all the macros that were define
3652    
3653  #undef cur_is_word  #undef cur_is_word
3654  #undef condition  #undef condition
 #undef minimize  
3655  #undef prev_is_word  #undef prev_is_word
3656    
3657  #undef original_ims  #undef original_ims
# Line 3484  BOOL startline; Line 3724  BOOL startline;
3724  BOOL firstline;  BOOL firstline;
3725  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3726  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3727    BOOL utf8;
3728  match_data match_block;  match_data match_block;
3729  match_data *md = &match_block;  match_data *md = &match_block;
3730  const uschar *tables;  const uschar *tables;
# Line 3491  const uschar *start_bits = NULL; Line 3732  const uschar *start_bits = NULL;
3732  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3733  USPTR end_subject;  USPTR end_subject;
3734  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3735    eptrblock eptrchain[EPTR_WORK_SIZE];
3736    
3737  pcre_study_data internal_study;  pcre_study_data internal_study;
3738  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3567  md->end_subject = md->start_subject + le Line 3809  md->end_subject = md->start_subject + le
3809  end_subject = md->end_subject;  end_subject = md->end_subject;
3810    
3811  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3812  md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3813    
3814  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3815  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 3576  md->partial = (options & PCRE_PARTIAL) ! Line 3818  md->partial = (options & PCRE_PARTIAL) !
3818  md->hitend = FALSE;  md->hitend = FALSE;
3819    
3820  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
3821    md->eptrchain = eptrchain;              /* Make workspace generally available */
3822    
3823  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
3824  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
3825    
3826  /* Handle different types of newline. The two bits give four cases. If nothing  /* Handle different types of newline. The three bits give eight cases. If
3827  is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
3828    
3829  switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3830           PCRE_NEWLINE_CRLF)         PCRE_NEWLINE_BITS)
3831    {    {
3832    default:              newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
3833    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
3834    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = '\n'; break;
3835    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
3836         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3837      case PCRE_NEWLINE_ANY: newline = -1; break;
3838      default: return PCRE_ERROR_BADNEWLINE;
3839    }    }
3840    
3841  if (newline > 255)  if (newline < 0)
3842    {    {
3843    md->nllen = 2;    md->nltype = NLTYPE_ANY;
   md->nl[0] = (newline >> 8) & 255;  
   md->nl[1] = newline & 255;  
3844    }    }
3845  else  else
3846    {    {
3847    md->nllen = 1;    md->nltype = NLTYPE_FIXED;
3848    md->nl[0] = newline;    if (newline > 255)
3849        {
3850        md->nllen = 2;
3851        md->nl[0] = (newline >> 8) & 255;
3852        md->nl[1] = newline & 255;
3853        }
3854      else
3855        {
3856        md->nllen = 1;
3857        md->nl[0] = newline;
3858        }
3859    }    }
3860    
3861  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
# Line 3615  if (md->partial && (re->options & PCRE_N Line 3868  if (md->partial && (re->options & PCRE_N
3868  back the character offset. */  back the character offset. */
3869    
3870  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3871  if (md->utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3872    {    {
3873    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3874      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3707  if ((re->options & PCRE_REQCHSET) != 0) Line 3960  if ((re->options & PCRE_REQCHSET) != 0)
3960    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
3961    }    }
3962    
3963    
3964    /* ==========================================================================*/
3965    
3966  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3967  the loop runs just once. */  the loop runs just once. */
3968    
3969  do  for(;;)
3970    {    {
3971    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
3972    
# Line 3725  do Line 3981  do
3981    
3982    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
3983    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
3984    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
3985    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
3986    */    the match fails at the newline, later code breaks this loop. */
3987    
3988    if (firstline)    if (firstline)
3989      {      {
3990      USPTR t = start_match;      USPTR t = start_match;
3991      while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3992      end_subject = t;      end_subject = t;
3993      }      }
3994    
# Line 3753  do Line 4009  do
4009    
4010    else if (startline)    else if (startline)
4011      {      {
4012      if (start_match >= md->start_subject + md->nllen +      if (start_match > md->start_subject + start_offset)
           start_offset)  
4013        {        {
4014        while (start_match <= end_subject &&        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
              !IS_NEWLINE(start_match - md->nllen))  
4015          start_match++;          start_match++;
4016        }        }
4017      }      }
# Line 3793  do Line 4047  do
4047    
4048    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4049    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4050    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4051    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4052    
4053    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4054    */    */
# Line 3826  do Line 4080  do
4080            }            }
4081          }          }
4082    
4083        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4084          forcing a match failure. */
4085    
4086        if (p >= end_subject) break;        if (p >= end_subject)
4087            {
4088            rc = MATCH_NOMATCH;
4089            break;
4090            }
4091    
4092        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4093        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3838  do Line 4097  do
4097        }        }
4098      }      }
4099    
4100    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
4101    
4102    md->start_match = start_match;    md->start_match = start_match;
4103    md->match_call_count = 0;    md->match_call_count = 0;
4104      md->eptrn = 0;                          /* Next free eptrchain slot */
4105      rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4106    
4107    rc = match(start_match, md->start_code, 2, md, ims, NULL, match_isgroup, 0);    /* Any return other than MATCH_NOMATCH breaks the loop. */
4108    
4109    /* When the result is no match, if the subject's first character was a    if (rc != MATCH_NOMATCH) break;
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4110    
4111    if (rc == MATCH_NOMATCH)    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4112      {    newline in the subject (though it may continue over the newline). Therefore,
4113      if (firstline &&    if we have just failed to match, starting at a newline, do not continue. */
4114          start_match <= md->end_subject - md->nllen &&  
4115          IS_NEWLINE(start_match))    if (firstline && IS_NEWLINE(start_match)) break;
4116        break;  
4117      start_match++;    /* Advance the match position by one character. */
4118    
4119      start_match++;
4120  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4121      if (md->utf8)    if (utf8)
4122        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4123          start_match++;        start_match++;
4124  #endif  #endif
     continue;  
     }  
4125    
4126    if (rc != MATCH_MATCH)    /* Break the loop if the pattern is anchored or if we have passed the end of
4127      {    the subject. */
4128      DPRINTF((">>>> error: returning %d\n", rc));  
4129      return rc;    if (anchored || start_match > end_subject) break;
4130      }  
4131      /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4132      are now at a LF, advance the match position by one more character. */
4133    
4134      if (start_match[-1] == '\r' &&
4135           (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4136           start_match < end_subject &&
4137           *start_match == '\n')
4138        start_match++;
4139    
4140      }   /* End of for(;;) "bumpalong" loop */
4141    
4142    /* ==========================================================================*/
4143    
4144    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4145    conditions is true:
4146    
4147    /* We have a match! Copy the offset information from temporary store if  (1) The pattern is anchored;
   necessary */  
4148    
4149    (2) We are past the end of the subject;
4150    
4151    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4152        this option requests that a match occur at or before the first newline in
4153        the subject.
4154    
4155    When we have a match and the offset vector is big enough to deal with any
4156    backreferences, captured substring offsets will already be set up. In the case
4157    where we had to get some local store to hold offsets for backreference
4158    processing, copy those that we can. In this case there need not be overflow if
4159    certain parts of the pattern were not used, even though there are more
4160    capturing parentheses than vector slots. */
4161    
4162    if (rc == MATCH_MATCH)
4163      {
4164    if (using_temporary_offsets)    if (using_temporary_offsets)
4165      {      {
4166      if (offsetcount >= 4)      if (offsetcount >= 4)
# Line 3889  do Line 4169  do
4169          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4170        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4171        }        }
4172      if (md->end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       md->offset_overflow = TRUE;  
   
4173      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4174      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
4175      }      }
4176    
4177      /* Set the return code to the number of captured strings, or 0 if there are
4178      too many to fit into the vector. */
4179    
4180    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
4181    
4182      /* If there is space, set up the whole thing as substring 0. */
4183    
4184    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4185      {      {
4186      offsets[0] = start_match - md->start_subject;      offsets[0] = start_match - md->start_subject;
# Line 3908  do Line 4191  do
4191    return rc;    return rc;
4192    }    }
4193    
4194  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4195    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4196    
4197  if (using_temporary_offsets)  if (using_temporary_offsets)
4198    {    {
# Line 3918  if (using_temporary_offsets) Line 4200  if (using_temporary_offsets)
4200    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
4201    }    }
4202    
4203  if (md->partial && md->hitend)  if (rc != MATCH_NOMATCH)
4204      {
4205      DPRINTF((">>>> error: returning %d\n", rc));
4206      return rc;
4207      }
4208    else if (md->partial && md->hitend)
4209    {    {
4210    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4211    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.91  
changed lines
  Added in v.115

  ViewVC Help
Powered by ViewVC 1.1.5