/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 130 by ph10, Mon Mar 26 15:09:47 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
52    obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
53    
54  /* Structure for building a chain of data that actually lives on the  #define EPTR_WORK_SIZE (1000)
 stack, for holding the values of the subject pointer at the start of each  
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
55    
56  /* Flag bits for the match() function */  /* Flag bits for the match() function */
57    
58  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
59  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
60    #define match_tail_recursed  0x04  /* Tail recursive call */
61    
62  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
63  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 98  Returns:     nothing
98  static void  static void
99  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
100  {  {
101  int c;  unsigned int c;
102  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
103  while (length-- > 0)  while (length-- > 0)
104    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 275  typedef struct heapframe { Line 272  typedef struct heapframe {
272    long int Xims;    long int Xims;
273    eptrblock *Xeptrb;    eptrblock *Xeptrb;
274    int Xflags;    int Xflags;
275    int Xrdepth;    unsigned int Xrdepth;
276    
277    /* Function local variables */    /* Function local variables */
278    
# Line 291  typedef struct heapframe { Line 288  typedef struct heapframe {
288    
289    BOOL Xcur_is_word;    BOOL Xcur_is_word;
290    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
291    BOOL Xprev_is_word;    BOOL Xprev_is_word;
292    
293    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 299  typedef struct heapframe {
299    int Xprop_category;    int Xprop_category;
300    int Xprop_chartype;    int Xprop_chartype;
301    int Xprop_script;    int Xprop_script;
302    int *Xprop_test_variable;    int Xoclength;
303      uschar Xocchars[8];
304  #endif  #endif
305    
306    int Xctype;    int Xctype;
307    int Xfc;    unsigned int Xfc;
308    int Xfi;    int Xfi;
309    int Xlength;    int Xlength;
310    int Xmax;    int Xmax;
# Line 340  typedef struct heapframe { Line 337  typedef struct heapframe {
337  *         Match from current position            *  *         Match from current position            *
338  *************************************************/  *************************************************/
339    
340  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
341  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
342  same response.  same response.
343    
# Line 353  performance. Tests using gcc on a SPARC Line 347  performance. Tests using gcc on a SPARC
347  made performance worse.  made performance worse.
348    
349  Arguments:  Arguments:
350     eptr        pointer in subject     eptr        pointer to current character in subject
351     ecode       position in code     ecode       pointer to current position in compiled code
352     offset_top  current top pointer     offset_top  current top pointer
353     md          pointer to "static" info for the match     md          pointer to "static" info for the match
354     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 356  Arguments:
356                   brackets - for testing for empty matches                   brackets - for testing for empty matches
357     flags       can contain     flags       can contain
358                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
359                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
360                       group that can match an empty string
361                     match_tail_recursed - this is a tail_recursed group
362     rdepth      the recursion depth     rdepth      the recursion depth
363    
364  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 374  Returns:       MATCH_MATCH if matched Line 370  Returns:       MATCH_MATCH if matched
370  static int  static int
371  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
372    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
373    int flags, int rdepth)    int flags, unsigned int rdepth)
374  {  {
375  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
376  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
377  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
378    
379  register int  rrc;    /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
380  register int  i;      /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
381  register int  c;      /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
382  register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
383    
384    BOOL minimize, possessive; /* Quantifier options */
385    
386  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
387  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 434  HEAP_RECURSE: Line 432  HEAP_RECURSE:
432    
433  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
434  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
435  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
436    
437  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 443  HEAP_RECURSE:
443  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
444  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
445  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
446  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
447    #define occhars            frame->Xocchars
448  #endif  #endif
449    
450  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 468  HEAP_RECURSE:
468  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
469  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
470    
471  #else  #else         /* NO_RECURSE not defined */
472  #define fi i  #define fi i
473  #define fc c  #define fc c
474    
# Line 489  recursion_info new_recursive;      /* wi Line 487  recursion_info new_recursive;      /* wi
487                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
488  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
489  BOOL condition;  BOOL condition;
 BOOL minimize;  
490  BOOL prev_is_word;  BOOL prev_is_word;
491    
492  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 498  int prop_fail_result;
498  int prop_category;  int prop_category;
499  int prop_chartype;  int prop_chartype;
500  int prop_script;  int prop_script;
501  int *prop_test_variable;  int oclength;
502    uschar occhars[8];
503  #endif  #endif
504    
505  int ctype;  int ctype;
# Line 516  int save_offset1, save_offset2, save_off Line 514  int save_offset1, save_offset2, save_off
514  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
515    
516  eptrblock newptrb;  eptrblock newptrb;
517  #endif  #endif     /* NO_RECURSE */
518    
519  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
520  variables. */  variables. */
# Line 524  variables. */ Line 522  variables. */
522  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
523  prop_value = 0;  prop_value = 0;
524  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
525  #endif  #endif
526    
527    
528    /* This label is used for tail recursion, which is used in a few cases even
529    when NO_RECURSE is not defined, in order to reduce the amount of stack that is
530    used. Thanks to Ian Taylor for noticing this possibility and sending the
531    original patch. */
532    
533    TAIL_RECURSE:
534    
535  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
536  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
537  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
# Line 542  if (md->match_call_count++ >= md->match_ Line 547  if (md->match_call_count++ >= md->match_
547  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
548    
549  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
550    
551    #ifdef SUPPORT_UTF8
552  utf8 = md->utf8;       /* Local copy of the flag */  utf8 = md->utf8;       /* Local copy of the flag */
553    #else
554    utf8 = FALSE;
555    #endif
556    
557  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
558  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
559  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
560  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
561    When match() is called in other circumstances, don't add to the chain. If this
562    is a tail recursion, use a block from the workspace, as the one on the stack is
563    already used. */
564    
565  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
566    {    {
567    newptrb.epb_prev = eptrb;    eptrblock *p;
568    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
569    eptrb = &newptrb;      {
570        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
571        p = md->eptrchain + md->eptrn++;
572        }
573      else p = &newptrb;
574      p->epb_saved_eptr = eptr;
575      p->epb_prev = eptrb;
576      eptrb = p;
577    }    }
578    
579  /* Now start processing the operations. */  /* Now start processing the opcodes. */
580    
581  for (;;)  for (;;)
582    {    {
583      minimize = possessive = FALSE;
584    op = *ecode;    op = *ecode;
   minimize = FALSE;  
585    
586    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
587    matching at least one subject character. */    matching at least one subject character. */
# Line 571  for (;;) Line 591  for (;;)
591        eptr > md->start_match)        eptr > md->start_match)
592      md->hitend = TRUE;      md->hitend = TRUE;
593    
594    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
595      {      {
596      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
597        the current subject position in the working slot at the top of the vector.
598      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
599      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
600        reference inside the group.
601      if (number > EXTRACT_BASIC_MAX)  
602        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
603        values of the final offsets, in case they were set by a previous iteration
604        of the same bracket.
605    
606        If there isn't enough space in the offset vector, treat this as if it were
607        a non-capturing bracket. Don't worry about setting the flag for the error
608        case here; that is handled in the code for KET. */
609    
610        case OP_CBRA:
611        case OP_SCBRA:
612        number = GET2(ecode, 1+LINK_SIZE);
613      offset = number << 1;      offset = number << 1;
614    
615  #ifdef DEBUG  #ifdef DEBUG
616      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
617        printf("subject=");
618      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
619      printf("\n");      printf("\n");
620  #endif  #endif
# Line 612  for (;;) Line 629  for (;;)
629        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
630        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
631    
632          flags = (op == OP_SCBRA)? match_cbegroup : 0;
633        do        do
634          {          {
635          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
636            match_isgroup);            ims, eptrb, flags);
637          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
638          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
639          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 631  for (;;) Line 649  for (;;)
649        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
650        }        }
651    
652      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
653        bracket. */
654    
655      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
656    
657    /* Other types of node can be handled by a switch */      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
658        final alternative within the brackets, we would return the result of a
659        recursive call to match() whatever happened. We can reduce stack usage by
660        turning this into a tail recursion. */
661    
662    switch(op)      case OP_BRA:
663      {      case OP_SBRA:
664      case OP_BRA:     /* Non-capturing bracket: optimized */      DPRINTF(("start non-capturing bracket\n"));
665      DPRINTF(("start bracket 0\n"));      flags = (op >= OP_SBRA)? match_cbegroup : 0;
666      do      for (;;)
667        {        {
668        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)
669          match_isgroup);          {
670            ecode += _pcre_OP_lengths[*ecode];
671            flags |= match_tail_recursed;
672            DPRINTF(("bracket 0 tail recursion\n"));
673            goto TAIL_RECURSE;
674            }
675    
676          /* For non-final alternatives, continue the loop for a NOMATCH result;
677          otherwise return. */
678    
679          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
680            eptrb, flags);
681        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
682        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
683        }        }
684      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
685    
686      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
687      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
688      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
689      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
690        obeyed, we can use tail recursion to avoid using another stack frame. */
691    
692      case OP_COND:      case OP_COND:
693      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
694        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
695          {
696          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
697          condition = md->recursive != NULL &&
698            (offset == RREF_ANY || offset == md->recursive->group_num);
699          ecode += condition? 3 : GET(ecode, 1);
700          }
701    
702        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
703        {        {
704        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
705        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
706          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
707          (offset < offset_top && md->offset_vector[offset] >= 0);        }
708        RMATCH(rrc, eptr, ecode + (condition?  
709          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
710          offset_top, md, ims, eptrb, match_isgroup);        {
711        RRETURN(rrc);        condition = FALSE;
712          ecode += GET(ecode, 1);
713        }        }
714    
715      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
716      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
717        assertion. */
718    
719      else      else
720        {        {
721        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
722            match_condassert | match_isgroup);            match_condassert);
723        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
724          {          {
725          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
726            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
727          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
728          }          }
729        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
730          {          {
731          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
732          }          }
733        else ecode += GET(ecode, 1);        else
734        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
735          match_isgroup);          condition = FALSE;
736        RRETURN(rrc);          ecode += GET(ecode, 1);
737            }
738        }        }
     /* Control never reaches here */  
739    
740      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
741      encountered. */      we can use tail recursion to avoid using another stack frame. If the second
742        alternative doesn't exist, we can just plough on. */
743    
744      case OP_CREF:      if (condition || *ecode == OP_ALT)
745      case OP_BRANUMBER:        {
746      ecode += 3;        ecode += 1 + LINK_SIZE;
747          flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
748          goto TAIL_RECURSE;
749          }
750        else
751          {
752          ecode += 1 + LINK_SIZE;
753          }
754      break;      break;
755    
756      /* End of the pattern. If we are in a recursion, we should restore the  
757      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
758        restore the offsets appropriately and continue from after the call. */
759    
760      case OP_END:      case OP_END:
761      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
# Line 745  for (;;) Line 797  for (;;)
797      case OP_ASSERTBACK:      case OP_ASSERTBACK:
798      do      do
799        {        {
800        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
801        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
802        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
803        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 772  for (;;) Line 823  for (;;)
823      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
824      do      do
825        {        {
826        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
827        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
828        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 794  for (;;) Line 844  for (;;)
844  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
845      if (utf8)      if (utf8)
846        {        {
847        c = GET(ecode,1);        i = GET(ecode, 1);
848        for (i = 0; i < c; i++)        while (i-- > 0)
849          {          {
850          eptr--;          eptr--;
851          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 808  for (;;) Line 858  for (;;)
858      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
859    
860        {        {
861        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
862        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
863        }        }
864    
# Line 865  for (;;) Line 915  for (;;)
915      case OP_RECURSE:      case OP_RECURSE:
916        {        {
917        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
918        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
919            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
920    
921        /* Add to "recursing stack" */        /* Add to "recursing stack" */
922    
# Line 904  for (;;) Line 949  for (;;)
949        restore the offset and recursion data. */        restore the offset and recursion data. */
950    
951        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
952          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
953        do        do
954          {          {
955          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
956              eptrb, match_isgroup);            md, ims, eptrb, flags);
957          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
958            {            {
959            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 945  for (;;) Line 991  for (;;)
991      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
992    
993      case OP_ONCE:      case OP_ONCE:
994        {      prev = ecode;
995        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
996    
997        do      do
998          {        {
999          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1000            eptrb, match_isgroup);          eptrb, 0);
1001          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1002          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1003          ecode += GET(ecode,1);        ecode += GET(ecode,1);
1004          }        }
1005        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1006    
1007        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1008    
1009        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1010    
1011        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1012        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1013    
1014        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1015    
1016        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1017        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1018    
1019        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1020        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1021        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1022        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1023        course of events. */      course of events. */
1024    
1025        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1026          {        {
1027          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1028          break;        break;
1029          }        }
1030    
1031        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1032        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1033        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1034        opcode. */      any options that changed within the bracket before re-running it, so
1035        check the next opcode. */
1036    
1037        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1038          {        {
1039          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1040          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1041          }        }
1042    
1043        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1044          {        {
1045          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1046          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1047          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1048          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = match_tail_recursed;
1049          }        goto TAIL_RECURSE;
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1050        }        }
1051      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1052          {
1053          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1054          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1055          ecode += 1 + LINK_SIZE;
1056          flags = match_tail_recursed;
1057          goto TAIL_RECURSE;
1058          }
1059        /* Control never gets here */
1060    
1061      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1062      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1027  for (;;) Line 1074  for (;;)
1074      case OP_BRAZERO:      case OP_BRAZERO:
1075        {        {
1076        next = ecode+1;        next = ecode+1;
1077        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1078        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1079        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1080        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1081        }        }
1082      break;      break;
1083    
1084      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1085        {        {
1086        next = ecode+1;        next = ecode+1;
1087        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1088        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
         match_isgroup);  
1089        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1090        ecode++;        ecode++;
1091        }        }
1092      break;      break;
1093    
1094      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1095    
1096      case OP_KET:      case OP_KET:
1097      case OP_KETRMIN:      case OP_KETRMIN:
1098      case OP_KETRMAX:      case OP_KETRMAX:
1099        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
   
       /* Back up the stack of bracket start pointers. */  
1100    
1101        eptrb = eptrb->epb_prev;      /* If this was a group that remembered the subject start, in order to break
1102        infinite repeats of empty string matches, retrieve the subject start from
1103        the chain. Otherwise, set it NULL. */
1104    
1105        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev >= OP_SBRA)
1106            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        {
1107            *prev == OP_ONCE)        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1108          {        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1109          md->end_match_ptr = eptr;      /* For ONCE */        }
1110          md->end_offset_top = offset_top;      else saved_eptr = NULL;
         RRETURN(MATCH_MATCH);  
         }  
1111    
1112        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1113        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1114        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1115    
1116        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1117          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1118          number = *prev - OP_BRA;          *prev == OP_ONCE)
1119          {
1120          md->end_match_ptr = eptr;      /* For ONCE */
1121          md->end_offset_top = offset_top;
1122          RRETURN(MATCH_MATCH);
1123          }
1124    
1125          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1126          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1127        bumping the high water mark. Note that whole-pattern recursion is coded as
1128        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1129        when the OP_END is reached. Other recursion is handled here. */
1130    
1131          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1132          offset = number << 1;        {
1133          number = GET2(prev, 1+LINK_SIZE);
1134          offset = number << 1;
1135    
1136  #ifdef DEBUG  #ifdef DEBUG
1137          printf("end bracket %d", number);        printf("end bracket %d", number);
1138          printf("\n");        printf("\n");
1139  #endif  #endif
1140    
1141          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1142          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1143          into group 0, so it won't be picked up here. Instead, we catch it when          {
1144          the OP_END is reached. */          md->offset_vector[offset] =
1145              md->offset_vector[md->offset_end - number];
1146          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1147            {          if (offset_top <= offset) offset_top = offset + 2;
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
   
           /* Handle a recursively called group. Restore the offsets  
           appropriately and continue from after the call. */  
   
           if (md->recursive != NULL && md->recursive->group_num == number)  
             {  
             recursion_info *rec = md->recursive;  
             DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1148          }          }
1149    
1150        /* Reset the value of the ims flags, in case they got changed during        /* Handle a recursively called group. Restore the offsets
1151        the group. */        appropriately and continue from after the call. */
1152    
1153        ims = original_ims;        if (md->recursive != NULL && md->recursive->group_num == number)
       DPRINTF(("ims reset to %02lx\n", ims));  
   
       /* For a non-repeating ket, just continue at this level. This also  
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
   
       if (*ecode == OP_KET || eptr == saved_eptr)  
1154          {          {
1155          ecode += 1 + LINK_SIZE;          recursion_info *rec = md->recursive;
1156            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1157            md->recursive = rec->prevrec;
1158            md->start_match = rec->save_start;
1159            memcpy(md->offset_vector, rec->offset_save,
1160              rec->saved_max * sizeof(int));
1161            ecode = rec->after_call;
1162            ims = original_ims;
1163          break;          break;
1164          }          }
1165          }
1166    
1167        /* The repeating kets try the rest of the pattern or restart from the      /* For both capturing and non-capturing groups, reset the value of the ims
1168        preceding bracket, in the appropriate order. */      flags, in case they got changed during the group. */
1169    
1170        if (*ecode == OP_KETRMIN)      ims = original_ims;
1171          {      DPRINTF(("ims reset to %02lx\n", ims));
1172          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
1173          if (rrc != MATCH_NOMATCH) RRETURN(rrc);      /* For a non-repeating ket, just continue at this level. This also
1174          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      happens for a repeating ket if no characters were matched in the group.
1175          if (rrc != MATCH_NOMATCH) RRETURN(rrc);      This is the forcible breaking of infinite loops as implemented in Perl
1176          }      5.005. If there is an options reset, it will get obeyed in the normal
1177        else  /* OP_KETRMAX */      course of events. */
1178          {  
1179          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      if (*ecode == OP_KET || eptr == saved_eptr)
1180          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1181          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        ecode += 1 + LINK_SIZE;
1182          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        break;
         }  
1183        }        }
1184    
1185      RRETURN(MATCH_NOMATCH);      /* The repeating kets try the rest of the pattern or restart from the
1186        preceding bracket, in the appropriate order. In the second case, we can use
1187        tail recursion to avoid using another stack frame. */
1188    
1189        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1190    
1191        if (*ecode == OP_KETRMIN)
1192          {
1193          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1194          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195          ecode = prev;
1196          flags |= match_tail_recursed;
1197          goto TAIL_RECURSE;
1198          }
1199        else  /* OP_KETRMAX */
1200          {
1201          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1202          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1203          ecode += 1 + LINK_SIZE;
1204          flags = match_tail_recursed;
1205          goto TAIL_RECURSE;
1206          }
1207        /* Control never gets here */
1208    
1209      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1210    
# Line 1168  for (;;) Line 1212  for (;;)
1212      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1213      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1214        {        {
1215        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1216              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1217          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1218        ecode++;        ecode++;
1219        break;        break;
# Line 1196  for (;;) Line 1241  for (;;)
1241      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1242        {        {
1243        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1244          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1245        else        else
1246          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1247        ecode++;        ecode++;
# Line 1207  for (;;) Line 1252  for (;;)
1252        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1253        if (!md->endonly)        if (!md->endonly)
1254          {          {
1255          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1256             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1257            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1258          ecode++;          ecode++;
1259          break;          break;
1260          }          }
1261        }        }
1262      /* ... else fall through */      /* ... else fall through for endonly */
1263    
1264      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1265    
# Line 1226  for (;;) Line 1271  for (;;)
1271      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1272    
1273      case OP_EODN:      case OP_EODN:
1274      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1275         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1276          RRETURN(MATCH_NOMATCH);
1277      ecode++;      ecode++;
1278      break;      break;
1279    
# Line 1280  for (;;) Line 1326  for (;;)
1326      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1327    
1328      case OP_ANY:      case OP_ANY:
1329      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1330        RRETURN(MATCH_NOMATCH);        {
1331          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1332          }
1333      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1334      if (utf8)      if (utf8)
1335        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1336      ecode++;      ecode++;
1337      break;      break;
1338    
# Line 1376  for (;;) Line 1422  for (;;)
1422      ecode++;      ecode++;
1423      break;      break;
1424    
1425        case OP_ANYNL:
1426        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1427        GETCHARINCTEST(c, eptr);
1428        switch(c)
1429          {
1430          default: RRETURN(MATCH_NOMATCH);
1431          case 0x000d:
1432          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1433          break;
1434          case 0x000a:
1435          case 0x000b:
1436          case 0x000c:
1437          case 0x0085:
1438          case 0x2028:
1439          case 0x2029:
1440          break;
1441          }
1442        ecode++;
1443        break;
1444    
1445  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1446      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1447      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1418  for (;;) Line 1484  for (;;)
1484    
1485          default:          default:
1486          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
1487          }          }
1488    
1489        ecode += 3;        ecode += 3;
# Line 1888  for (;;) Line 1953  for (;;)
1953    
1954        else        else
1955          {          {
1956          int dc;          unsigned int dc;
1957          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1958          ecode += length;          ecode += length;
1959    
# Line 1915  for (;;) Line 1980  for (;;)
1980        }        }
1981      break;      break;
1982    
1983      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
1984    
1985      case OP_EXACT:      case OP_EXACT:
1986      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
1987      ecode += 3;      ecode += 3;
1988      goto REPEATCHAR;      goto REPEATCHAR;
1989    
1990        case OP_POSUPTO:
1991        possessive = TRUE;
1992        /* Fall through */
1993    
1994      case OP_UPTO:      case OP_UPTO:
1995      case OP_MINUPTO:      case OP_MINUPTO:
1996      min = 0;      min = 0;
# Line 1930  for (;;) Line 1999  for (;;)
1999      ecode += 3;      ecode += 3;
2000      goto REPEATCHAR;      goto REPEATCHAR;
2001    
2002        case OP_POSSTAR:
2003        possessive = TRUE;
2004        min = 0;
2005        max = INT_MAX;
2006        ecode++;
2007        goto REPEATCHAR;
2008    
2009        case OP_POSPLUS:
2010        possessive = TRUE;
2011        min = 1;
2012        max = INT_MAX;
2013        ecode++;
2014        goto REPEATCHAR;
2015    
2016        case OP_POSQUERY:
2017        possessive = TRUE;
2018        min = 0;
2019        max = 1;
2020        ecode++;
2021        goto REPEATCHAR;
2022    
2023      case OP_STAR:      case OP_STAR:
2024      case OP_MINSTAR:      case OP_MINSTAR:
2025      case OP_PLUS:      case OP_PLUS:
# Line 1961  for (;;) Line 2051  for (;;)
2051    
2052        if (length > 1)        if (length > 1)
2053          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2054  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2055          int othercase;          unsigned int othercase;
2056          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2057              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase >= 0)  
2058            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2059            else oclength = 0;
2060  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2061    
2062          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2063            {            {
2064            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2065    #ifdef SUPPORT_UCP
2066            /* Need braces because of following else */            /* Need braces because of following else */
2067            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2068            else            else
# Line 1982  for (;;) Line 2070  for (;;)
2070              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2071              eptr += oclength;              eptr += oclength;
2072              }              }
2073    #else   /* without SUPPORT_UCP */
2074              else { RRETURN(MATCH_NOMATCH); }
2075    #endif  /* SUPPORT_UCP */
2076            }            }
2077    
2078          if (min == max) continue;          if (min == max) continue;
# Line 1994  for (;;) Line 2085  for (;;)
2085              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2086              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2087              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2088    #ifdef SUPPORT_UCP
2089              /* Need braces because of following else */              /* Need braces because of following else */
2090              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2091              else              else
# Line 2001  for (;;) Line 2093  for (;;)
2093                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2094                eptr += oclength;                eptr += oclength;
2095                }                }
2096    #else   /* without SUPPORT_UCP */
2097                else { RRETURN (MATCH_NOMATCH); }
2098    #endif  /* SUPPORT_UCP */
2099              }              }
2100            /* Control never gets here */            /* Control never gets here */
2101            }            }
2102          else  
2103            else  /* Maximize */
2104            {            {
2105            pp = eptr;            pp = eptr;
2106            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2107              {              {
2108              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2109              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2110    #ifdef SUPPORT_UCP
2111              else if (oclength == 0) break;              else if (oclength == 0) break;
2112              else              else
2113                {                {
2114                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2115                eptr += oclength;                eptr += oclength;
2116                }                }
2117    #else   /* without SUPPORT_UCP */
2118                else break;
2119    #endif  /* SUPPORT_UCP */
2120              }              }
2121            while (eptr >= pp)  
2122              if (possessive) continue;
2123              for(;;)
2124             {             {
2125             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2126             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2127               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2128    #ifdef SUPPORT_UCP
2129               eptr--;
2130               BACKCHAR(eptr);
2131    #else   /* without SUPPORT_UCP */
2132             eptr -= length;             eptr -= length;
2133    #endif  /* SUPPORT_UCP */
2134             }             }
           RRETURN(MATCH_NOMATCH);  
2135            }            }
2136          /* Control never gets here */          /* Control never gets here */
2137          }          }
# Line 2072  for (;;) Line 2179  for (;;)
2179            }            }
2180          /* Control never gets here */          /* Control never gets here */
2181          }          }
2182        else        else  /* Maximize */
2183          {          {
2184          pp = eptr;          pp = eptr;
2185          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2080  for (;;) Line 2187  for (;;)
2187            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2188            eptr++;            eptr++;
2189            }            }
2190            if (possessive) continue;
2191          while (eptr >= pp)          while (eptr >= pp)
2192            {            {
2193            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2108  for (;;) Line 2216  for (;;)
2216            }            }
2217          /* Control never gets here */          /* Control never gets here */
2218          }          }
2219        else        else  /* Maximize */
2220          {          {
2221          pp = eptr;          pp = eptr;
2222          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2116  for (;;) Line 2224  for (;;)
2224            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2225            eptr++;            eptr++;
2226            }            }
2227            if (possessive) continue;
2228          while (eptr >= pp)          while (eptr >= pp)
2229            {            {
2230            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2168  for (;;) Line 2277  for (;;)
2277      ecode += 3;      ecode += 3;
2278      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2279    
2280        case OP_NOTPOSSTAR:
2281        possessive = TRUE;
2282        min = 0;
2283        max = INT_MAX;
2284        ecode++;
2285        goto REPEATNOTCHAR;
2286    
2287        case OP_NOTPOSPLUS:
2288        possessive = TRUE;
2289        min = 1;
2290        max = INT_MAX;
2291        ecode++;
2292        goto REPEATNOTCHAR;
2293    
2294        case OP_NOTPOSQUERY:
2295        possessive = TRUE;
2296        min = 0;
2297        max = 1;
2298        ecode++;
2299        goto REPEATNOTCHAR;
2300    
2301        case OP_NOTPOSUPTO:
2302        possessive = TRUE;
2303        min = 0;
2304        max = GET2(ecode, 1);
2305        ecode += 3;
2306        goto REPEATNOTCHAR;
2307    
2308      case OP_NOTSTAR:      case OP_NOTSTAR:
2309      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2310      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2207  for (;;) Line 2344  for (;;)
2344        /* UTF-8 mode */        /* UTF-8 mode */
2345        if (utf8)        if (utf8)
2346          {          {
2347          register int d;          register unsigned int d;
2348          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2349            {            {
2350            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2232  for (;;) Line 2369  for (;;)
2369          /* UTF-8 mode */          /* UTF-8 mode */
2370          if (utf8)          if (utf8)
2371            {            {
2372            register int d;            register unsigned int d;
2373            for (fi = min;; fi++)            for (fi = min;; fi++)
2374              {              {
2375              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2268  for (;;) Line 2405  for (;;)
2405          /* UTF-8 mode */          /* UTF-8 mode */
2406          if (utf8)          if (utf8)
2407            {            {
2408            register int d;            register unsigned int d;
2409            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2410              {              {
2411              int len = 1;              int len = 1;
# Line 2278  for (;;) Line 2415  for (;;)
2415              if (fc == d) break;              if (fc == d) break;
2416              eptr += len;              eptr += len;
2417              }              }
2418            for(;;)          if (possessive) continue;
2419            for(;;)
2420              {              {
2421              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2422              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 2295  for (;;) Line 2433  for (;;)
2433              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2434              eptr++;              eptr++;
2435              }              }
2436              if (possessive) continue;
2437            while (eptr >= pp)            while (eptr >= pp)
2438              {              {
2439              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2316  for (;;) Line 2455  for (;;)
2455        /* UTF-8 mode */        /* UTF-8 mode */
2456        if (utf8)        if (utf8)
2457          {          {
2458          register int d;          register unsigned int d;
2459          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2460            {            {
2461            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2339  for (;;) Line 2478  for (;;)
2478          /* UTF-8 mode */          /* UTF-8 mode */
2479          if (utf8)          if (utf8)
2480            {            {
2481            register int d;            register unsigned int d;
2482            for (fi = min;; fi++)            for (fi = min;; fi++)
2483              {              {
2484              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2374  for (;;) Line 2513  for (;;)
2513          /* UTF-8 mode */          /* UTF-8 mode */
2514          if (utf8)          if (utf8)
2515            {            {
2516            register int d;            register unsigned int d;
2517            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2518              {              {
2519              int len = 1;              int len = 1;
# Line 2383  for (;;) Line 2522  for (;;)
2522              if (fc == d) break;              if (fc == d) break;
2523              eptr += len;              eptr += len;
2524              }              }
2525              if (possessive) continue;
2526            for(;;)            for(;;)
2527              {              {
2528              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2400  for (;;) Line 2540  for (;;)
2540              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2541              eptr++;              eptr++;
2542              }              }
2543              if (possessive) continue;
2544            while (eptr >= pp)            while (eptr >= pp)
2545              {              {
2546              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2431  for (;;) Line 2572  for (;;)
2572      ecode += 3;      ecode += 3;
2573      goto REPEATTYPE;      goto REPEATTYPE;
2574    
2575        case OP_TYPEPOSSTAR:
2576        possessive = TRUE;
2577        min = 0;
2578        max = INT_MAX;
2579        ecode++;
2580        goto REPEATTYPE;
2581    
2582        case OP_TYPEPOSPLUS:
2583        possessive = TRUE;
2584        min = 1;
2585        max = INT_MAX;
2586        ecode++;
2587        goto REPEATTYPE;
2588    
2589        case OP_TYPEPOSQUERY:
2590        possessive = TRUE;
2591        min = 0;
2592        max = 1;
2593        ecode++;
2594        goto REPEATTYPE;
2595    
2596        case OP_TYPEPOSUPTO:
2597        possessive = TRUE;
2598        min = 0;
2599        max = GET2(ecode, 1);
2600        ecode += 3;
2601        goto REPEATTYPE;
2602    
2603      case OP_TYPESTAR:      case OP_TYPESTAR:
2604      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2605      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2533  for (;;) Line 2702  for (;;)
2702    
2703            default:            default:
2704            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
2705            }            }
2706          }          }
2707    
# Line 2573  for (;;) Line 2741  for (;;)
2741          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2742            {            {
2743            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2744               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2745              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2746              eptr++;
2747            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2748            }            }
2749          break;          break;
# Line 2583  for (;;) Line 2752  for (;;)
2752          eptr += min;          eptr += min;
2753          break;          break;
2754    
2755            case OP_ANYNL:
2756            for (i = 1; i <= min; i++)
2757              {
2758              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2759              GETCHARINC(c, eptr);
2760              switch(c)
2761                {
2762                default: RRETURN(MATCH_NOMATCH);
2763                case 0x000d:
2764                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2765                break;
2766                case 0x000a:
2767                case 0x000b:
2768                case 0x000c:
2769                case 0x0085:
2770                case 0x2028:
2771                case 0x2029:
2772                break;
2773                }
2774              }
2775            break;
2776    
2777          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2778          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2779            {            {
# Line 2651  for (;;) Line 2842  for (;;)
2842  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2843    
2844        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2845        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2846          number of bytes present, as this was tested above. */
2847    
2848        switch(ctype)        switch(ctype)
2849          {          {
# Line 2659  for (;;) Line 2851  for (;;)
2851          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
2852            {            {
2853            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2854              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
2855                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2856                eptr++;
2857                }
2858            }            }
2859          else eptr += min;          else eptr += min;
2860          break;          break;
# Line 2668  for (;;) Line 2863  for (;;)
2863          eptr += min;          eptr += min;
2864          break;          break;
2865    
2866            /* Because of the CRLF case, we can't assume the minimum number of
2867            bytes are present in this case. */
2868    
2869            case OP_ANYNL:
2870            for (i = 1; i <= min; i++)
2871              {
2872              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2873              switch(*eptr++)
2874                {
2875                default: RRETURN(MATCH_NOMATCH);
2876                case 0x000d:
2877                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2878                break;
2879                case 0x000a:
2880                case 0x000b:
2881                case 0x000c:
2882                case 0x0085:
2883                break;
2884                }
2885              }
2886            break;
2887    
2888          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2889          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2890            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2729  for (;;) Line 2946  for (;;)
2946              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2947              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2948              }              }
2949            break;            /* Control never gets here */
2950    
2951            case PT_LAMP:            case PT_LAMP:
2952            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 2744  for (;;) Line 2961  for (;;)
2961                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
2962                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2963              }              }
2964            break;            /* Control never gets here */
2965    
2966            case PT_GC:            case PT_GC:
2967            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 2757  for (;;) Line 2974  for (;;)
2974              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2975                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2976              }              }
2977            break;            /* Control never gets here */
2978    
2979            case PT_PC:            case PT_PC:
2980            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 2770  for (;;) Line 2987  for (;;)
2987              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2988                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2989              }              }
2990            break;            /* Control never gets here */
2991    
2992            case PT_SC:            case PT_SC:
2993            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 2783  for (;;) Line 3000  for (;;)
3000              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3001                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3002              }              }
3003            break;            /* Control never gets here */
3004    
3005            default:            default:
3006            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3007            }            }
3008          }          }
3009    
# Line 2829  for (;;) Line 3045  for (;;)
3045            {            {
3046            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3047            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3048            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3049                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3050                    IS_NEWLINE(eptr)))
3051                RRETURN(MATCH_NOMATCH);
3052    
3053            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3054            switch(ctype)            switch(ctype)
3055              {              {
3056              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3057              break;              break;
3058    
3059              case OP_ANYBYTE:              case OP_ANYBYTE:
3060              break;              break;
3061    
3062                case OP_ANYNL:
3063                switch(c)
3064                  {
3065                  default: RRETURN(MATCH_NOMATCH);
3066                  case 0x000d:
3067                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3068                  break;
3069                  case 0x000a:
3070                  case 0x000b:
3071                  case 0x000c:
3072                  case 0x0085:
3073                  case 0x2028:
3074                  case 0x2029:
3075                  break;
3076                  }
3077                break;
3078    
3079              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3080              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3081                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2884  for (;;) Line 3119  for (;;)
3119            {            {
3120            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3121            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3122            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3123                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3124                RRETURN(MATCH_NOMATCH);
3125    
3126            c = *eptr++;            c = *eptr++;
3127            switch(ctype)            switch(ctype)
3128              {              {
3129              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3130              break;              break;
3131    
3132              case OP_ANYBYTE:              case OP_ANYBYTE:
3133              break;              break;
3134    
3135                case OP_ANYNL:
3136                switch(c)
3137                  {
3138                  default: RRETURN(MATCH_NOMATCH);
3139                  case 0x000d:
3140                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3141                  break;
3142                  case 0x000a:
3143                  case 0x000b:
3144                  case 0x000c:
3145                  case 0x0085:
3146                  break;
3147                  }
3148                break;
3149    
3150              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3151              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3152              break;              break;
# Line 2927  for (;;) Line 3179  for (;;)
3179        /* Control never gets here */        /* Control never gets here */
3180        }        }
3181    
3182      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3183      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3184      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3185    
# Line 3008  for (;;) Line 3260  for (;;)
3260    
3261          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3262    
3263            if (possessive) continue;
3264          for(;;)          for(;;)
3265            {            {
3266            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3043  for (;;) Line 3296  for (;;)
3296    
3297          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3298    
3299            if (possessive) continue;
3300          for(;;)          for(;;)
3301            {            {
3302            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3075  for (;;) Line 3329  for (;;)
3329            {            {
3330            case OP_ANY:            case OP_ANY:
3331    
3332            /* Special code is required for UTF8, but when the maximum is unlimited            /* Special code is required for UTF8, but when the maximum is
3333            we don't need it, so we repeat the non-UTF8 code. This is probably            unlimited we don't need it, so we repeat the non-UTF8 code. This is
3334            worth it, because .* is quite a common idiom. */            probably worth it, because .* is quite a common idiom. */
3335    
3336            if (max < INT_MAX)            if (max < INT_MAX)
3337              {              {
# Line 3085  for (;;) Line 3339  for (;;)
3339                {                {
3340                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3341                  {                  {
3342                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3343                  eptr++;                  eptr++;
3344                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3345                  }                  }
# Line 3094  for (;;) Line 3348  for (;;)
3348                {                {
3349                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3350                  {                  {
3351                    if (eptr >= md->end_subject) break;
3352                  eptr++;                  eptr++;
3353                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3354                  }                  }
# Line 3108  for (;;) Line 3363  for (;;)
3363                {                {
3364                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3365                  {                  {
3366                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3367                  eptr++;                  eptr++;
3368                  }                  }
3369                break;                break;
# Line 3116  for (;;) Line 3371  for (;;)
3371              else              else
3372                {                {
3373                c = max - min;                c = max - min;
3374                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3375                    c = md->end_subject - eptr;
3376                eptr += c;                eptr += c;
3377                }                }
3378              }              }
# Line 3126  for (;;) Line 3382  for (;;)
3382    
3383            case OP_ANYBYTE:            case OP_ANYBYTE:
3384            c = max - min;            c = max - min;
3385            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3386                c = md->end_subject - eptr;
3387            eptr += c;            eptr += c;
3388            break;            break;
3389    
3390              case OP_ANYNL:
3391              for (i = min; i < max; i++)
3392                {
3393                int len = 1;
3394                if (eptr >= md->end_subject) break;
3395                GETCHARLEN(c, eptr, len);
3396                if (c == 0x000d)
3397                  {
3398                  if (++eptr >= md->end_subject) break;
3399                  if (*eptr == 0x000a) eptr++;
3400                  }
3401                else
3402                  {
3403                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3404                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3405                    break;
3406                  eptr += len;
3407                  }
3408                }
3409              break;
3410    
3411            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3412            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3413              {              {
# Line 3202  for (;;) Line 3480  for (;;)
3480    
3481          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3482    
3483            if (possessive) continue;
3484          for(;;)          for(;;)
3485            {            {
3486            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3222  for (;;) Line 3501  for (;;)
3501              {              {
3502              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3503                {                {
3504                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3505                eptr++;                eptr++;
3506                }                }
3507              break;              break;
# Line 3231  for (;;) Line 3510  for (;;)
3510    
3511            case OP_ANYBYTE:            case OP_ANYBYTE:
3512            c = max - min;            c = max - min;
3513            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3514                c = md->end_subject - eptr;
3515            eptr += c;            eptr += c;
3516            break;            break;
3517    
3518              case OP_ANYNL:
3519              for (i = min; i < max; i++)
3520                {
3521                if (eptr >= md->end_subject) break;
3522                c = *eptr;
3523                if (c == 0x000d)
3524                  {
3525                  if (++eptr >= md->end_subject) break;
3526                  if (*eptr == 0x000a) eptr++;
3527                  }
3528                else
3529                  {
3530                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3531                    break;
3532                  eptr++;
3533                  }
3534                }
3535              break;
3536    
3537            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3538            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3539              {              {
# Line 3295  for (;;) Line 3594  for (;;)
3594    
3595          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3596    
3597            if (possessive) continue;
3598          while (eptr >= pp)          while (eptr >= pp)
3599            {            {
3600            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3309  for (;;) Line 3609  for (;;)
3609        }        }
3610      /* Control never gets here */      /* Control never gets here */
3611    
3612      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3613      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3614    
3615      default:      default:
3616      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3617      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3618      }      }
3619    
3620    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3354  Undefine all the macros that were define Line 3652  Undefine all the macros that were define
3652    
3653  #undef cur_is_word  #undef cur_is_word
3654  #undef condition  #undef condition
 #undef minimize  
3655  #undef prev_is_word  #undef prev_is_word
3656    
3657  #undef original_ims  #undef original_ims
# Line 3419  int rc, resetcount, ocount; Line 3716  int rc, resetcount, ocount;
3716  int first_byte = -1;  int first_byte = -1;
3717  int req_byte = -1;  int req_byte = -1;
3718  int req_byte2 = -1;  int req_byte2 = -1;
3719  unsigned long int ims = 0;  int newline;
3720    unsigned long int ims;
3721  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
3722  BOOL anchored;  BOOL anchored;
3723  BOOL startline;  BOOL startline;
3724  BOOL firstline;  BOOL firstline;
3725  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3726  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3727    BOOL utf8;
3728  match_data match_block;  match_data match_block;
3729    match_data *md = &match_block;
3730  const uschar *tables;  const uschar *tables;
3731  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3732  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3733  USPTR end_subject;  USPTR end_subject;
3734  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3735    eptrblock eptrchain[EPTR_WORK_SIZE];
3736    
3737  pcre_study_data internal_study;  pcre_study_data internal_study;
3738  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3451  if (offsetcount < 0) return PCRE_ERROR_B Line 3752  if (offsetcount < 0) return PCRE_ERROR_B
3752  the default values. */  the default values. */
3753    
3754  study = NULL;  study = NULL;
3755  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
3756  match_block.match_limit_recursion = MATCH_LIMIT_RECURSION;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3757  match_block.callout_data = NULL;  md->callout_data = NULL;
3758    
3759  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
3760    
# Line 3465  if (extra_data != NULL) Line 3766  if (extra_data != NULL)
3766    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3767      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3768    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3769      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
3770    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3771      match_block.match_limit_recursion = extra_data->match_limit_recursion;      md->match_limit_recursion = extra_data->match_limit_recursion;
3772    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3773      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3774    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3775    }    }
3776    
# Line 3499  firstline = (re->options & PCRE_FIRSTLIN Line 3800  firstline = (re->options & PCRE_FIRSTLIN
3800    
3801  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
3802    
3803  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
3804    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3805    
3806  match_block.start_subject = (USPTR)subject;  md->start_subject = (USPTR)subject;
3807  match_block.start_offset = start_offset;  md->start_offset = start_offset;
3808  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
3809  end_subject = match_block.end_subject;  end_subject = md->end_subject;
3810    
3811  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3812  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3813    
3814  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3815  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
3816  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
3817  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
3818  match_block.hitend = FALSE;  md->hitend = FALSE;
3819    
3820    md->recursive = NULL;                   /* No recursion at top level */
3821    md->eptrchain = eptrchain;              /* Make workspace generally available */
3822    
3823  match_block.recursive = NULL;                   /* No recursion at top level */  md->lcc = tables + lcc_offset;
3824    md->ctypes = tables + ctypes_offset;
3825    
3826  match_block.lcc = tables + lcc_offset;  /* Handle different types of newline. The three bits give eight cases. If
3827  match_block.ctypes = tables + ctypes_offset;  nothing is set at run time, whatever was used at compile time applies. */
3828    
3829    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3830           PCRE_NEWLINE_BITS)
3831      {
3832      case 0: newline = NEWLINE; break;   /* Compile-time default */
3833      case PCRE_NEWLINE_CR: newline = '\r'; break;
3834      case PCRE_NEWLINE_LF: newline = '\n'; break;
3835      case PCRE_NEWLINE_CR+
3836           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3837      case PCRE_NEWLINE_ANY: newline = -1; break;
3838      default: return PCRE_ERROR_BADNEWLINE;
3839      }
3840    
3841    if (newline < 0)
3842      {
3843      md->nltype = NLTYPE_ANY;
3844      }
3845    else
3846      {
3847      md->nltype = NLTYPE_FIXED;
3848      if (newline > 255)
3849        {
3850        md->nllen = 2;
3851        md->nl[0] = (newline >> 8) & 255;
3852        md->nl[1] = newline & 255;
3853        }
3854      else
3855        {
3856        md->nllen = 1;
3857        md->nl[0] = newline;
3858        }
3859      }
3860    
3861  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
3862  moment. */  moment. */
3863    
3864  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3865    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
3866    
3867  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3868  back the character offset. */  back the character offset. */
3869    
3870  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3871  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3872    {    {
3873    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3874      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3563  ocount = offsetcount - (offsetcount % 3) Line 3900  ocount = offsetcount - (offsetcount % 3)
3900  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
3901    {    {
3902    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
3903    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3904    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3905    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3906    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3907    }    }
3908  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
3909    
3910  match_block.offset_end = ocount;  md->offset_end = ocount;
3911  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
3912  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
3913  match_block.capture_last = -1;  md->capture_last = -1;
3914    
3915  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
3916  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3586  if (resetcount > offsetcount) resetcount Line 3923  if (resetcount > offsetcount) resetcount
3923  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
3924  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
3925    
3926  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
3927    {    {
3928    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
3929    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
3930    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
3931    }    }
# Line 3605  if (!anchored) Line 3942  if (!anchored)
3942      {      {
3943      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
3944      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3945        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
3946      }      }
3947    else    else
3948      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3623  if ((re->options & PCRE_REQCHSET) != 0) Line 3960  if ((re->options & PCRE_REQCHSET) != 0)
3960    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
3961    }    }
3962    
3963    
3964    /* ==========================================================================*/
3965    
3966  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3967  the loop runs just once. */  the loop runs just once. */
3968    
3969  do  for(;;)
3970    {    {
3971    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
3972    
3973    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
3974    
3975    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
3976      {      {
3977      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
3978      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
3979      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
3980      }      }
3981    
3982    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
3983    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
3984    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
3985    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
3986    */    the match fails at the newline, later code breaks this loop. */
3987    
3988    if (firstline)    if (firstline)
3989      {      {
3990      USPTR t = start_match;      USPTR t = start_match;
3991      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3992      end_subject = t;      end_subject = t;
3993      }      }
3994    
# Line 3658  do Line 3998  do
3998      {      {
3999      if (first_byte_caseless)      if (first_byte_caseless)
4000        while (start_match < end_subject &&        while (start_match < end_subject &&
4001               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4002          start_match++;          start_match++;
4003      else      else
4004        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4005          start_match++;          start_match++;
4006      }      }
4007    
4008    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4009    
4010    else if (startline)    else if (startline)
4011      {      {
4012      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4013        {        {
4014        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4015            start_match++;
4016    
4017          /* If we have just passed a CR and the newline option is ANY, and we are
4018          now at a LF, advance the match position by one more character. */
4019    
4020          if (start_match[-1] == '\r' &&
4021               md->nltype == NLTYPE_ANY &&
4022               start_match < end_subject &&
4023               *start_match == '\n')
4024          start_match++;          start_match++;
4025        }        }
4026      }      }
# Line 3693  do Line 4042  do
4042    
4043  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4044    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4045    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4046    printf("\n");    printf("\n");
4047  #endif  #endif
4048    
# Line 3707  do Line 4056  do
4056    
4057    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4058    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4059    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4060    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4061    
4062    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4063    */    */
4064    
4065    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4066        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4067        !match_block.partial)        !md->partial)
4068      {      {
4069      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4070    
# Line 3740  do Line 4089  do
4089            }            }
4090          }          }
4091    
4092        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4093          forcing a match failure. */
4094    
4095        if (p >= end_subject) break;        if (p >= end_subject)
4096            {
4097            rc = MATCH_NOMATCH;
4098            break;
4099            }
4100    
4101        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4102        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3752  do Line 4106  do
4106        }        }
4107      }      }
4108    
4109    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup, 0);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4110    
4111    if (rc == MATCH_NOMATCH)    md->start_match = start_match;
4112      {    md->match_call_count = 0;
4113      if (firstline && *start_match == NEWLINE) break;    md->eptrn = 0;                          /* Next free eptrchain slot */
4114      start_match++;    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4115    
4116      /* Any return other than MATCH_NOMATCH breaks the loop. */
4117    
4118      if (rc != MATCH_NOMATCH) break;
4119    
4120      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4121      newline in the subject (though it may continue over the newline). Therefore,
4122      if we have just failed to match, starting at a newline, do not continue. */
4123    
4124      if (firstline && IS_NEWLINE(start_match)) break;
4125    
4126      /* Advance the match position by one character. */
4127    
4128      start_match++;
4129  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4130      if (match_block.utf8)    if (utf8)
4131        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4132          start_match++;        start_match++;
4133  #endif  #endif
     continue;  
     }  
4134    
4135    if (rc != MATCH_MATCH)    /* Break the loop if the pattern is anchored or if we have passed the end of
4136      {    the subject. */
4137      DPRINTF((">>>> error: returning %d\n", rc));  
4138      return rc;    if (anchored || start_match > end_subject) break;
4139      }  
4140      /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4141      are now at a LF, advance the match position by one more character. */
4142    
4143      if (start_match[-1] == '\r' &&
4144           (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4145           start_match < end_subject &&
4146           *start_match == '\n')
4147        start_match++;
4148    
4149      }   /* End of for(;;) "bumpalong" loop */
4150    
4151    /* ==========================================================================*/
4152    
4153    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4154    conditions is true:
4155    
4156    /* We have a match! Copy the offset information from temporary store if  (1) The pattern is anchored;
   necessary */  
4157    
4158    (2) We are past the end of the subject;
4159    
4160    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4161        this option requests that a match occur at or before the first newline in
4162        the subject.
4163    
4164    When we have a match and the offset vector is big enough to deal with any
4165    backreferences, captured substring offsets will already be set up. In the case
4166    where we had to get some local store to hold offsets for backreference
4167    processing, copy those that we can. In this case there need not be overflow if
4168    certain parts of the pattern were not used, even though there are more
4169    capturing parentheses than vector slots. */
4170    
4171    if (rc == MATCH_MATCH)
4172      {
4173    if (using_temporary_offsets)    if (using_temporary_offsets)
4174      {      {
4175      if (offsetcount >= 4)      if (offsetcount >= 4)
4176        {        {
4177        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4178          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4179        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4180        }        }
4181      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4182      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4183      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4184      }      }
4185    
4186    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4187      too many to fit into the vector. */
4188    
4189      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4190    
4191      /* If there is space, set up the whole thing as substring 0. */
4192    
4193    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4194      {      {
4195      offsets[0] = start_match - match_block.start_subject;      offsets[0] = start_match - md->start_subject;
4196      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4197      }      }
4198    
4199    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4200    return rc;    return rc;
4201    }    }
4202    
4203  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4204    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4205    
4206  if (using_temporary_offsets)  if (using_temporary_offsets)
4207    {    {
4208    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4209    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4210    }    }
4211    
4212  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4213      {
4214      DPRINTF((">>>> error: returning %d\n", rc));
4215      return rc;
4216      }
4217    else if (md->partial && md->hitend)
4218    {    {
4219    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4220    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.87  
changed lines
  Added in v.130

  ViewVC Help
Powered by ViewVC 1.1.5