/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2006 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md           /* The block containing newline information */
46  #include "pcre_internal.h"  #include "pcre_internal.h"
47    
48    
# Line 54  are on the heap, not on the stack. */ Line 54  are on the heap, not on the stack. */
54    
55  typedef struct eptrblock {  typedef struct eptrblock {
56    struct eptrblock *epb_prev;    struct eptrblock *epb_prev;
57    const uschar *epb_saved_eptr;    USPTR epb_saved_eptr;
58  } eptrblock;  } eptrblock;
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
# Line 128  Returns:      TRUE if matched Line 128  Returns:      TRUE if matched
128  */  */
129    
130  static BOOL  static BOOL
131  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
132    unsigned long int ims)    unsigned long int ims)
133  {  {
134  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
135    
136  #ifdef DEBUG  #ifdef DEBUG
137  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 169  return TRUE;
169  ****************************************************************************  ****************************************************************************
170                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
171    
172  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
173  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
174  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
175  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
176  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
177    fine.
178  It turns out that on non-Unix systems there are problems with programs that  
179  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
180  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
181  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
182    been known for decades.) So....
183    
184  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
185  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
186  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
187  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
188  always used to.  always used to.
189  ****************************************************************************  ****************************************************************************
190  ***************************************************************************/  ***************************************************************************/
191    
192    
193  /* These versions of the macros use the stack, as normal */  /* These versions of the macros use the stack, as normal. There are debugging
194    versions and production versions. */
195    
196  #ifndef NO_RECURSE  #ifndef NO_RECURSE
197  #define REGISTER register  #define REGISTER register
198  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  #ifdef DEBUG
199    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
200      { \
201      printf("match() called in line %d\n", __LINE__); \
202      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
203      printf("to line %d\n", __LINE__); \
204      }
205    #define RRETURN(ra) \
206      { \
207      printf("match() returned %d from line %d ", ra, __LINE__); \
208      return ra; \
209      }
210    #else
211    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
212      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
213  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
214    #endif
215    
216  #else  #else
217    
218    
# Line 215  match(), which never changes. */ Line 233  match(), which never changes. */
233      newframe->Xims = re;\      newframe->Xims = re;\
234      newframe->Xeptrb = rf;\      newframe->Xeptrb = rf;\
235      newframe->Xflags = rg;\      newframe->Xflags = rg;\
236        newframe->Xrdepth = frame->Xrdepth + 1;\
237      newframe->Xprevframe = frame;\      newframe->Xprevframe = frame;\
238      frame = newframe;\      frame = newframe;\
239      DPRINTF(("restarting from line %d\n", __LINE__));\      DPRINTF(("restarting from line %d\n", __LINE__));\
# Line 256  typedef struct heapframe { Line 275  typedef struct heapframe {
275    long int Xims;    long int Xims;
276    eptrblock *Xeptrb;    eptrblock *Xeptrb;
277    int Xflags;    int Xflags;
278      unsigned int Xrdepth;
279    
280    /* Function local variables */    /* Function local variables */
281    
# Line 278  typedef struct heapframe { Line 298  typedef struct heapframe {
298    
299  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
300    int Xprop_type;    int Xprop_type;
301      int Xprop_value;
302    int Xprop_fail_result;    int Xprop_fail_result;
303    int Xprop_category;    int Xprop_category;
304    int Xprop_chartype;    int Xprop_chartype;
305    int Xprop_othercase;    int Xprop_script;
   int Xprop_test_against;  
306    int *Xprop_test_variable;    int *Xprop_test_variable;
307  #endif  #endif
308    
# Line 343  Arguments: Line 363  Arguments:
363     flags       can contain     flags       can contain
364                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
365                   match_isgroup - this is the start of a bracketed group                   match_isgroup - this is the start of a bracketed group
366       rdepth      the recursion depth
367    
368  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
369                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
370                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
371                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
372  */  */
373    
374  static int  static int
375  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
376    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
377    int flags)    int flags, unsigned int rdepth)
378  {  {
379  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
380  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark them with "register"
381  because they are used a lot in loops. */  because they are used a lot in loops. */
382    
383  register int  rrc;    /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
384  register int  i;      /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
385  register int  c;      /* Character values not kept over RMATCH() calls */  register unsigned int  c;  /* Character values not kept over RMATCH() calls */
386  register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
387    
388  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
389  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 381  frame->Xoffset_top = offset_top; Line 402  frame->Xoffset_top = offset_top;
402  frame->Xims = ims;  frame->Xims = ims;
403  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
404  frame->Xflags = flags;  frame->Xflags = flags;
405    frame->Xrdepth = rdepth;
406    
407  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
408    
# Line 394  HEAP_RECURSE: Line 416  HEAP_RECURSE:
416  #define ims                frame->Xims  #define ims                frame->Xims
417  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
418  #define flags              frame->Xflags  #define flags              frame->Xflags
419    #define rdepth             frame->Xrdepth
420    
421  /* Ditto for the local variables */  /* Ditto for the local variables */
422    
# Line 418  HEAP_RECURSE: Line 441  HEAP_RECURSE:
441    
442  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
443  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
444    #define prop_value         frame->Xprop_value
445  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
446  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
447  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
448  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
 #define prop_test_against  frame->Xprop_test_against  
449  #define prop_test_variable frame->Xprop_test_variable  #define prop_test_variable frame->Xprop_test_variable
450  #endif  #endif
451    
# Line 452  i, and fc and c, can be the same variabl Line 475  i, and fc and c, can be the same variabl
475  #define fc c  #define fc c
476    
477    
478  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
479  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
480  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
481  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
482  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
483  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
484  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
485  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
486  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
487                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
488  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
489                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
490  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
491  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
492  BOOL minimize;  BOOL minimize;
493  BOOL prev_is_word;  BOOL prev_is_word;
494    
# Line 473  unsigned long int original_ims; Line 496  unsigned long int original_ims;
496    
497  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
498  int prop_type;  int prop_type;
499    int prop_value;
500  int prop_fail_result;  int prop_fail_result;
501  int prop_category;  int prop_category;
502  int prop_chartype;  int prop_chartype;
503  int prop_othercase;  int prop_script;
 int prop_test_against;  
504  int *prop_test_variable;  int *prop_test_variable;
505  #endif  #endif
506    
# Line 499  eptrblock newptrb; Line 522  eptrblock newptrb;
522  variables. */  variables. */
523    
524  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
525    prop_value = 0;
526  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
527  prop_test_variable = NULL;  prop_test_variable = NULL;
528  #endif  #endif
529    
530  /* OK, now we can get on with the real code of the function. Recursion is  /* This label is used for tail recursion, which is used in a few cases even
531  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
532  these just turn into a recursive call to match() and a "return", respectively.  used. Thanks to Ian Taylor for noticing this possibility and sending the
533  However, RMATCH isn't like a function call because it's quite a complicated  original patch. */
534  macro. It has to be used in one particular way. This shouldn't, however, impact  
535  performance when true recursion is being used. */  TAIL_RECURSE:
536    
537    /* OK, now we can get on with the real code of the function. Recursive calls
538    are specified by the macro RMATCH and RRETURN is used to return. When
539    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
540    and a "return", respectively (possibly with some debugging if DEBUG is
541    defined). However, RMATCH isn't like a function call because it's quite a
542    complicated macro. It has to be used in one particular way. This shouldn't,
543    however, impact performance when true recursion is being used. */
544    
545    /* First check that we haven't called match() too many times, or that we
546    haven't exceeded the recursive call limit. */
547    
548  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
549    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
550    
551  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
552    
553    #ifdef SUPPORT_UTF8
554  utf8 = md->utf8;       /* Local copy of the flag */  utf8 = md->utf8;       /* Local copy of the flag */
555    #else
556    utf8 = FALSE;
557    #endif
558    
559  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a bracketed group, add the current subject pointer to the
560  stack of such pointers, to be re-instated at the end of the group when we hit  stack of such pointers, to be re-instated at the end of the group when we hit
# Line 614  for (;;) Line 654  for (;;)
654      {      {
655      case OP_BRA:     /* Non-capturing bracket: optimized */      case OP_BRA:     /* Non-capturing bracket: optimized */
656      DPRINTF(("start bracket 0\n"));      DPRINTF(("start bracket 0\n"));
657      do  
658        /* Loop for all the alternatives */
659    
660        for (;;)
661        {        {
662          /* When we get to the final alternative within the brackets, we would
663          return the result of a recursive call to match() whatever happened. We
664          can reduce stack usage by turning this into a tail recursion. */
665    
666          if (ecode[GET(ecode, 1)] != OP_ALT)
667           {
668           ecode += 1 + LINK_SIZE;
669           flags = match_isgroup;
670           DPRINTF(("bracket 0 tail recursion\n"));
671           goto TAIL_RECURSE;
672           }
673    
674          /* For non-final alternatives, continue the loop for a NOMATCH result;
675          otherwise return. */
676    
677        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
678          match_isgroup);          match_isgroup);
679        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
680        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
681        }        }
682      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
683    
684      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
685      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
686      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
687      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
688        obeyed, we can use tail recursion to avoid using another stack frame. */
689    
690      case OP_COND:      case OP_COND:
691      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */
# Line 637  for (;;) Line 694  for (;;)
694        condition = (offset == CREF_RECURSE * 2)?        condition = (offset == CREF_RECURSE * 2)?
695          (md->recursive != NULL) :          (md->recursive != NULL) :
696          (offset < offset_top && md->offset_vector[offset] >= 0);          (offset < offset_top && md->offset_vector[offset] >= 0);
697        RMATCH(rrc, eptr, ecode + (condition?        ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));
698          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),        flags = match_isgroup;
699          offset_top, md, ims, eptrb, match_isgroup);        goto TAIL_RECURSE;
       RRETURN(rrc);  
700        }        }
701    
702      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
# Line 660  for (;;) Line 716  for (;;)
716          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
717          }          }
718        else ecode += GET(ecode, 1);        else ecode += GET(ecode, 1);
719        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,  
720          match_isgroup);        /* We are now at the branch that is to be obeyed. As there is only one,
721        RRETURN(rrc);        we can use tail recursion to avoid using another stack frame. */
722    
723          ecode += 1 + LINK_SIZE;
724          flags = match_isgroup;
725          goto TAIL_RECURSE;
726        }        }
727      /* Control never reaches here */      /* Control never reaches here */
728    
# Line 681  for (;;) Line 741  for (;;)
741      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
742        {        {
743        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
744        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
745        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
746        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
747          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
# Line 800  for (;;) Line 860  for (;;)
860        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
861        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
862        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
863        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
864        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
865        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = md->start_match - md->start_subject;
866        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
# Line 882  for (;;) Line 942  for (;;)
942              eptrb, match_isgroup);              eptrb, match_isgroup);
943          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
944            {            {
945              DPRINTF(("Recursion matched\n"));
946            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
947            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
948              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
949            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
950            }            }
951          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH)
952              {
953              DPRINTF(("Recursion gave error %d\n", rrc));
954              RRETURN(rrc);
955              }
956    
957          md->recursive = &new_recursive;          md->recursive = &new_recursive;
958          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 977  for (;;)
977      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
978    
979      case OP_ONCE:      case OP_ONCE:
980        {      prev = ecode;
981        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
982    
983        do      do
984          {        {
985          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
986            eptrb, match_isgroup);          eptrb, match_isgroup);
987          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
988          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
989          ecode += GET(ecode,1);        ecode += GET(ecode,1);
990          }        }
991        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
992    
993        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
994    
995        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
996    
997        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
998        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
999    
1000        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1001    
1002        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1003        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1004    
1005        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1006        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1007        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1008        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1009        course of events. */      course of events. */
1010    
1011        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1012          {        {
1013          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1014          break;        break;
1015          }        }
1016    
1017        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1018        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1019        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1020        opcode. */      any options that changed within the bracket before re-running it, so
1021        check the next opcode. */
1022    
1023        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1024          {        {
1025          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1026          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1027          }        }
1028    
1029        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1030          {        {
1031          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1032          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1033          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1034          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = match_isgroup;
1035          }        goto TAIL_RECURSE;
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1036        }        }
1037      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1038          {
1039          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
1040          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1041          ecode += 1 + LINK_SIZE;
1042          flags = 0;
1043          goto TAIL_RECURSE;
1044          }
1045        /* Control never gets here */
1046    
1047      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1048      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1020  for (;;) Line 1086  for (;;)
1086      case OP_KET:      case OP_KET:
1087      case OP_KETRMIN:      case OP_KETRMIN:
1088      case OP_KETRMAX:      case OP_KETRMAX:
1089        {      prev = ecode - GET(ecode, 1);
1090        prev = ecode - GET(ecode, 1);      saved_eptr = eptrb->epb_saved_eptr;
       saved_eptr = eptrb->epb_saved_eptr;  
1091    
1092        /* Back up the stack of bracket start pointers. */      /* Back up the stack of bracket start pointers. */
1093    
1094        eptrb = eptrb->epb_prev;      eptrb = eptrb->epb_prev;
1095    
1096        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1097            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1098            *prev == OP_ONCE)          *prev == OP_ONCE)
1099          {        {
1100          md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1101          md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1102          RRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);
1103          }        }
1104    
1105        /* In all other cases except a conditional group we have to check the      /* In all other cases except a conditional group we have to check the
1106        group number back at the start and if necessary complete handling an      group number back at the start and if necessary complete handling an
1107        extraction by setting the offsets and bumping the high water mark. */      extraction by setting the offsets and bumping the high water mark. */
1108    
1109        if (*prev != OP_COND)      if (*prev != OP_COND)
1110          {        {
1111          number = *prev - OP_BRA;        number = *prev - OP_BRA;
1112    
1113          /* For extended extraction brackets (large number), we have to fish out        /* For extended extraction brackets (large number), we have to fish out
1114          the number from a dummy opcode at the start. */        the number from a dummy opcode at the start. */
1115    
1116          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);        if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);
1117          offset = number << 1;        offset = number << 1;
1118    
1119  #ifdef DEBUG  #ifdef DEBUG
1120          printf("end bracket %d", number);        printf("end bracket %d", number);
1121          printf("\n");        printf("\n");
1122  #endif  #endif
1123    
1124          /* Test for a numbered group. This includes groups called as a result        /* Test for a numbered group. This includes groups called as a result
1125          of recursion. Note that whole-pattern recursion is coded as a recurse        of recursion. Note that whole-pattern recursion is coded as a recurse
1126          into group 0, so it won't be picked up here. Instead, we catch it when        into group 0, so it won't be picked up here. Instead, we catch it when
1127          the OP_END is reached. */        the OP_END is reached. */
1128    
1129          if (number > 0)        if (number > 0)
1130            {
1131            md->capture_last = number;
1132            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1133            {            {
1134            md->capture_last = number;            md->offset_vector[offset] =
1135            if (offset >= md->offset_max) md->offset_overflow = TRUE; else              md->offset_vector[md->offset_end - number];
1136              {            md->offset_vector[offset+1] = eptr - md->start_subject;
1137              md->offset_vector[offset] =            if (offset_top <= offset) offset_top = offset + 2;
1138                md->offset_vector[md->offset_end - number];            }
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
1139    
1140            /* Handle a recursively called group. Restore the offsets          /* Handle a recursively called group. Restore the offsets
1141            appropriately and continue from after the call. */          appropriately and continue from after the call. */
1142    
1143            if (md->recursive != NULL && md->recursive->group_num == number)          if (md->recursive != NULL && md->recursive->group_num == number)
1144              {            {
1145              recursion_info *rec = md->recursive;            recursion_info *rec = md->recursive;
1146              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1147              md->recursive = rec->prevrec;            md->recursive = rec->prevrec;
1148              md->start_match = rec->save_start;            md->start_match = rec->save_start;
1149              memcpy(md->offset_vector, rec->offset_save,            memcpy(md->offset_vector, rec->offset_save,
1150                rec->saved_max * sizeof(int));              rec->saved_max * sizeof(int));
1151              ecode = rec->after_call;            ecode = rec->after_call;
1152              ims = original_ims;            ims = original_ims;
1153              break;            break;
             }  
1154            }            }
1155          }          }
1156          }
1157    
1158        /* Reset the value of the ims flags, in case they got changed during      /* Reset the value of the ims flags, in case they got changed during
1159        the group. */      the group. */
1160    
1161        ims = original_ims;      ims = original_ims;
1162        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1163    
1164        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1165        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1166        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1167        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1168        course of events. */      course of events. */
1169    
1170        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1171          {        {
1172          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1173          break;        break;
1174          }        }
1175    
1176        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1177        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1178        tail recursion to avoid using another stack frame. */
1179    
1180        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1181          {        {
1182          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1183          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1184          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1185          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = match_isgroup;
1186          }        goto TAIL_RECURSE;
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1187        }        }
1188        else  /* OP_KETRMAX */
1189      RRETURN(MATCH_NOMATCH);        {
1190          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
1191          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1192          ecode += 1 + LINK_SIZE;
1193          flags = 0;
1194          goto TAIL_RECURSE;
1195          }
1196        /* Control never gets here */
1197    
1198      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1199    
# Line 1135  for (;;) Line 1201  for (;;)
1201      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1202      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1203        {        {
1204        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1205              (eptr == md->end_subject ||
1206               eptr < md->start_subject + md->nllen ||
1207               !IS_NEWLINE(eptr - md->nllen)))
1208          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1209        ecode++;        ecode++;
1210        break;        break;
# Line 1163  for (;;) Line 1232  for (;;)
1232      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1233        {        {
1234        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1235          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1236        else        else
1237          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1238        ecode++;        ecode++;
# Line 1174  for (;;) Line 1243  for (;;)
1243        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1244        if (!md->endonly)        if (!md->endonly)
1245          {          {
1246          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1247             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))
1248            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1249          ecode++;          ecode++;
1250          break;          break;
1251          }          }
1252        }        }
1253      /* ... else fall through */      /* ... else fall through for endonly */
1254    
1255      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1256    
# Line 1193  for (;;) Line 1262  for (;;)
1262      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1263    
1264      case OP_EODN:      case OP_EODN:
1265      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1266         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))
1267          RRETURN(MATCH_NOMATCH);
1268      ecode++;      ecode++;
1269      break;      break;
1270    
# Line 1247  for (;;) Line 1317  for (;;)
1317      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1318    
1319      case OP_ANY:      case OP_ANY:
1320      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1321        RRETURN(MATCH_NOMATCH);        {
1322          if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))
1323            RRETURN(MATCH_NOMATCH);
1324          }
1325      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1326      if (utf8)      if (utf8)
1327        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1328      ecode++;      ecode++;
1329      break;      break;
1330    
# Line 1352  for (;;) Line 1423  for (;;)
1423      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1424      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1425        {        {
1426        int chartype, rqdtype;        int chartype, script;
1427        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1428    
1429        rqdtype = *(++ecode);        switch(ecode[1])
       ecode++;  
   
       if (rqdtype >= 128)  
1430          {          {
1431          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1432            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1433            break;
1434    
1435            case PT_LAMP:
1436            if ((chartype == ucp_Lu ||
1437                 chartype == ucp_Ll ||
1438                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1439            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1440          }           break;
1441        else  
1442          {          case PT_GC:
1443          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1444              RRETURN(MATCH_NOMATCH);
1445            break;
1446    
1447            case PT_PC:
1448            if ((ecode[2] != chartype) == (op == OP_PROP))
1449              RRETURN(MATCH_NOMATCH);
1450            break;
1451    
1452            case PT_SC:
1453            if ((ecode[2] != script) == (op == OP_PROP))
1454            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1455            break;
1456    
1457            default:
1458            RRETURN(PCRE_ERROR_INTERNAL);
1459            break;
1460          }          }
1461    
1462          ecode += 3;
1463        }        }
1464      break;      break;
1465    
# Line 1379  for (;;) Line 1470  for (;;)
1470      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1471      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1472        {        {
1473        int chartype;        int chartype, script;
1474        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1475        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1476        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1477          {          {
# Line 1390  for (;;) Line 1480  for (;;)
1480            {            {
1481            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1482            }            }
1483          category = _pcre_ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1484          if (category != ucp_M) break;          if (category != ucp_M) break;
1485          eptr += len;          eptr += len;
1486          }          }
# Line 1683  for (;;) Line 1773  for (;;)
1773            while (eptr >= pp)            while (eptr >= pp)
1774              {              {
1775              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
             eptr--;  
1776              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1777                eptr--;
1778              }              }
1779            }            }
1780    
# Line 1841  for (;;) Line 1931  for (;;)
1931          ecode += length;          ecode += length;
1932    
1933          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
1934          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
1935    
1936          if (fc != dc)          if (fc != dc)
1937            {            {
1938  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1939            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
1940  #endif  #endif
1941              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1942            }            }
# Line 1918  for (;;) Line 2004  for (;;)
2004    
2005  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2006          int othercase;          int othercase;
         int chartype;  
2007          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2008               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&
2009               othercase > 0)               othercase >= 0)
2010            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2011  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2012    
# Line 2408  for (;;) Line 2493  for (;;)
2493        {        {
2494        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2495        prop_type = *ecode++;        prop_type = *ecode++;
2496        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2497        }        }
2498      else prop_type = -1;      else prop_type = -1;
2499  #endif  #endif
# Line 2434  for (;;) Line 2510  for (;;)
2510      if (min > 0)      if (min > 0)
2511        {        {
2512  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2513        if (prop_type > 0)        if (prop_type >= 0)
2514          {          {
2515          for (i = 1; i <= min; i++)          switch(prop_type)
2516            {            {
2517            GETCHARINC(c, eptr);            case PT_ANY:
2518            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2519            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2520              RRETURN(MATCH_NOMATCH);              {
2521                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2522                GETCHARINC(c, eptr);
2523                }
2524              break;
2525    
2526              case PT_LAMP:
2527              for (i = 1; i <= min; i++)
2528                {
2529                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2530                GETCHARINC(c, eptr);
2531                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2532                if ((prop_chartype == ucp_Lu ||
2533                     prop_chartype == ucp_Ll ||
2534                     prop_chartype == ucp_Lt) == prop_fail_result)
2535                  RRETURN(MATCH_NOMATCH);
2536                }
2537              break;
2538    
2539              case PT_GC:
2540              for (i = 1; i <= min; i++)
2541                {
2542                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2543                GETCHARINC(c, eptr);
2544                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2545                if ((prop_category == prop_value) == prop_fail_result)
2546                  RRETURN(MATCH_NOMATCH);
2547                }
2548              break;
2549    
2550              case PT_PC:
2551              for (i = 1; i <= min; i++)
2552                {
2553                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2554                GETCHARINC(c, eptr);
2555                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2556                if ((prop_chartype == prop_value) == prop_fail_result)
2557                  RRETURN(MATCH_NOMATCH);
2558                }
2559              break;
2560    
2561              case PT_SC:
2562              for (i = 1; i <= min; i++)
2563                {
2564                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2565                GETCHARINC(c, eptr);
2566                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2567                if ((prop_script == prop_value) == prop_fail_result)
2568                  RRETURN(MATCH_NOMATCH);
2569                }
2570              break;
2571    
2572              default:
2573              RRETURN(PCRE_ERROR_INTERNAL);
2574              break;
2575            }            }
2576          }          }
2577    
# Line 2453  for (;;) Line 2583  for (;;)
2583          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2584            {            {
2585            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2586            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2587            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2588            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2589              {              {
# Line 2462  for (;;) Line 2592  for (;;)
2592                {                {
2593                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2594                }                }
2595              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2596              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2597              eptr += len;              eptr += len;
2598              }              }
# Line 2481  for (;;) Line 2611  for (;;)
2611          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2612            {            {
2613            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2614               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 &&
2615                     eptr <= md->end_subject - md->nllen &&
2616                     IS_NEWLINE(eptr)))
2617              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2618              eptr++;
2619            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2620            }            }
2621          break;          break;
# Line 2567  for (;;) Line 2700  for (;;)
2700          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
2701            {            {
2702            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2703              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
2704                if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))
2705                  RRETURN(MATCH_NOMATCH);
2706                eptr++;
2707                }
2708            }            }
2709          else eptr += min;          else eptr += min;
2710          break;          break;
# Line 2624  for (;;) Line 2761  for (;;)
2761      if (minimize)      if (minimize)
2762        {        {
2763  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2764        if (prop_type > 0)        if (prop_type >= 0)
2765          {          {
2766          for (fi = min;; fi++)          switch(prop_type)
2767            {            {
2768            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
2769            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
2770            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
2771            GETCHARINC(c, eptr);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2772            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2773            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2774              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
2775                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2776                }
2777              break;
2778    
2779              case PT_LAMP:
2780              for (fi = min;; fi++)
2781                {
2782                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2783                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2784                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2785                GETCHARINC(c, eptr);
2786                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2787                if ((prop_chartype == ucp_Lu ||
2788                     prop_chartype == ucp_Ll ||
2789                     prop_chartype == ucp_Lt) == prop_fail_result)
2790                  RRETURN(MATCH_NOMATCH);
2791                }
2792              break;
2793    
2794              case PT_GC:
2795              for (fi = min;; fi++)
2796                {
2797                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2798                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2799                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2800                GETCHARINC(c, eptr);
2801                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2802                if ((prop_category == prop_value) == prop_fail_result)
2803                  RRETURN(MATCH_NOMATCH);
2804                }
2805              break;
2806    
2807              case PT_PC:
2808              for (fi = min;; fi++)
2809                {
2810                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2811                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2812                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2813                GETCHARINC(c, eptr);
2814                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2815                if ((prop_chartype == prop_value) == prop_fail_result)
2816                  RRETURN(MATCH_NOMATCH);
2817                }
2818              break;
2819    
2820              case PT_SC:
2821              for (fi = min;; fi++)
2822                {
2823                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2824                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2825                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2826                GETCHARINC(c, eptr);
2827                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2828                if ((prop_script == prop_value) == prop_fail_result)
2829                  RRETURN(MATCH_NOMATCH);
2830                }
2831              break;
2832    
2833              default:
2834              RRETURN(PCRE_ERROR_INTERNAL);
2835              break;
2836            }            }
2837          }          }
2838    
# Line 2649  for (;;) Line 2847  for (;;)
2847            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2848            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2849            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2850            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2851            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2852            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2853              {              {
# Line 2658  for (;;) Line 2856  for (;;)
2856                {                {
2857                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2858                }                }
2859              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2860              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2861              eptr += len;              eptr += len;
2862              }              }
# Line 2676  for (;;) Line 2874  for (;;)
2874            {            {
2875            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2876            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2877            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
2878                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
2879                    eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
2880                RRETURN(MATCH_NOMATCH);
2881    
2882            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2883            switch(ctype)            switch(ctype)
2884              {              {
2885              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
2886              break;              break;
2887    
2888              case OP_ANYBYTE:              case OP_ANYBYTE:
# Line 2731  for (;;) Line 2931  for (;;)
2931            {            {
2932            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2933            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2934            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
2935                   ((ims & PCRE_DOTALL) == 0 &&
2936                     eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
2937                RRETURN(MATCH_NOMATCH);
2938    
2939            c = *eptr++;            c = *eptr++;
2940            switch(ctype)            switch(ctype)
2941              {              {
2942              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
2943              break;              break;
2944    
2945              case OP_ANYBYTE:              case OP_ANYBYTE:
# Line 2783  for (;;) Line 2986  for (;;)
2986        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
2987    
2988  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2989        if (prop_type > 0)        if (prop_type >= 0)
2990          {          {
2991          for (i = min; i < max; i++)          switch(prop_type)
2992            {            {
2993            int len = 1;            case PT_ANY:
2994            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
2995            GETCHARLEN(c, eptr, len);              {
2996            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
2997            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
2998              break;              GETCHARLEN(c, eptr, len);
2999            eptr+= len;              if (prop_fail_result) break;
3000                eptr+= len;
3001                }
3002              break;
3003    
3004              case PT_LAMP:
3005              for (i = min; i < max; i++)
3006                {
3007                int len = 1;
3008                if (eptr >= md->end_subject) break;
3009                GETCHARLEN(c, eptr, len);
3010                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3011                if ((prop_chartype == ucp_Lu ||
3012                     prop_chartype == ucp_Ll ||
3013                     prop_chartype == ucp_Lt) == prop_fail_result)
3014                  break;
3015                eptr+= len;
3016                }
3017              break;
3018    
3019              case PT_GC:
3020              for (i = min; i < max; i++)
3021                {
3022                int len = 1;
3023                if (eptr >= md->end_subject) break;
3024                GETCHARLEN(c, eptr, len);
3025                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3026                if ((prop_category == prop_value) == prop_fail_result)
3027                  break;
3028                eptr+= len;
3029                }
3030              break;
3031    
3032              case PT_PC:
3033              for (i = min; i < max; i++)
3034                {
3035                int len = 1;
3036                if (eptr >= md->end_subject) break;
3037                GETCHARLEN(c, eptr, len);
3038                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3039                if ((prop_chartype == prop_value) == prop_fail_result)
3040                  break;
3041                eptr+= len;
3042                }
3043              break;
3044    
3045              case PT_SC:
3046              for (i = min; i < max; i++)
3047                {
3048                int len = 1;
3049                if (eptr >= md->end_subject) break;
3050                GETCHARLEN(c, eptr, len);
3051                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3052                if ((prop_script == prop_value) == prop_fail_result)
3053                  break;
3054                eptr+= len;
3055                }
3056              break;
3057            }            }
3058    
3059          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 2816  for (;;) Line 3076  for (;;)
3076            {            {
3077            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3078            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3079            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3080            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3081            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3082              {              {
# Line 2825  for (;;) Line 3085  for (;;)
3085                {                {
3086                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3087                }                }
3088              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3089              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3090              eptr += len;              eptr += len;
3091              }              }
# Line 2846  for (;;) Line 3106  for (;;)
3106                {                {
3107                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3108                }                }
3109              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3110              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3111              eptr--;              eptr--;
3112              }              }
# Line 2865  for (;;) Line 3125  for (;;)
3125            {            {
3126            case OP_ANY:            case OP_ANY:
3127    
3128            /* Special code is required for UTF8, but when the maximum is unlimited            /* Special code is required for UTF8, but when the maximum is
3129            we don't need it, so we repeat the non-UTF8 code. This is probably            unlimited we don't need it, so we repeat the non-UTF8 code. This is
3130            worth it, because .* is quite a common idiom. */            probably worth it, because .* is quite a common idiom. */
3131    
3132            if (max < INT_MAX)            if (max < INT_MAX)
3133              {              {
# Line 2875  for (;;) Line 3135  for (;;)
3135                {                {
3136                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3137                  {                  {
3138                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject ||
3139                        (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
3140                      break;
3141                  eptr++;                  eptr++;
3142                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3143                  }                  }
# Line 2884  for (;;) Line 3146  for (;;)
3146                {                {
3147                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3148                  {                  {
3149                    if (eptr >= md->end_subject) break;
3150                  eptr++;                  eptr++;
3151                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3152                  }                  }
# Line 2898  for (;;) Line 3161  for (;;)
3161                {                {
3162                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3163                  {                  {
3164                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject ||
3165                        (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
3166                      break;
3167                  eptr++;                  eptr++;
3168                  }                  }
3169                break;                break;
# Line 3012  for (;;) Line 3277  for (;;)
3277              {              {
3278              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3279                {                {
3280                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject ||
3281                      (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
3282                    break;
3283                eptr++;                eptr++;
3284                }                }
3285              break;              break;
# Line 3200  Returns:          > 0 => success; value Line 3467  Returns:          > 0 => success; value
3467                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3468  */  */
3469    
3470  PCRE_EXPORT int  PCRE_DATA_SCOPE int
3471  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3472    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3473    int offsetcount)    int offsetcount)
3474  {  {
3475  int rc, resetcount, ocount;  int rc, resetcount, ocount;
3476  int first_byte = -1;  int first_byte = -1;
3477  int req_byte = -1;  int req_byte = -1;
3478  int req_byte2 = -1;  int req_byte2 = -1;
3479  unsigned long int ims = 0;  int newline;
3480    unsigned long int ims;
3481  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
3482  BOOL anchored;  BOOL anchored;
3483  BOOL startline;  BOOL startline;
# Line 3217  BOOL firstline; Line 3485  BOOL firstline;
3485  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3486  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3487  match_data match_block;  match_data match_block;
3488    match_data *md = &match_block;
3489  const uschar *tables;  const uschar *tables;
3490  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3491  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3492  const uschar *end_subject;  USPTR end_subject;
3493  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3494    
3495  pcre_study_data internal_study;  pcre_study_data internal_study;
3496  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3241  if (offsetcount < 0) return PCRE_ERROR_B Line 3510  if (offsetcount < 0) return PCRE_ERROR_B
3510  the default values. */  the default values. */
3511    
3512  study = NULL;  study = NULL;
3513  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
3514  match_block.callout_data = NULL;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3515    md->callout_data = NULL;
3516    
3517  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
3518    
# Line 3254  if (extra_data != NULL) Line 3524  if (extra_data != NULL)
3524    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3525      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3526    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3527      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
3528      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3529        md->match_limit_recursion = extra_data->match_limit_recursion;
3530    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3531      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3532    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3533    }    }
3534    
# Line 3286  firstline = (re->options & PCRE_FIRSTLIN Line 3558  firstline = (re->options & PCRE_FIRSTLIN
3558    
3559  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
3560    
3561  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
3562    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3563    
3564  match_block.start_subject = (const uschar *)subject;  md->start_subject = (USPTR)subject;
3565  match_block.start_offset = start_offset;  md->start_offset = start_offset;
3566  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
3567  end_subject = match_block.end_subject;  end_subject = md->end_subject;
3568    
3569  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3570  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  md->utf8 = (re->options & PCRE_UTF8) != 0;
3571    
3572  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3573  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
3574  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
3575  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
3576  match_block.hitend = FALSE;  md->hitend = FALSE;
3577    
3578    md->recursive = NULL;                   /* No recursion at top level */
3579    
3580  match_block.recursive = NULL;                   /* No recursion at top level */  md->lcc = tables + lcc_offset;
3581    md->ctypes = tables + ctypes_offset;
3582    
3583  match_block.lcc = tables + lcc_offset;  /* Handle different types of newline. The two bits give four cases. If nothing
3584  match_block.ctypes = tables + ctypes_offset;  is set at run time, whatever was used at compile time applies. */
3585    
3586    switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &
3587             PCRE_NEWLINE_CRLF)
3588      {
3589      default:              newline = NEWLINE; break;   /* Compile-time default */
3590      case PCRE_NEWLINE_CR: newline = '\r'; break;
3591      case PCRE_NEWLINE_LF: newline = '\n'; break;
3592      case PCRE_NEWLINE_CR+
3593           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3594      }
3595    
3596    if (newline > 255)
3597      {
3598      md->nllen = 2;
3599      md->nl[0] = (newline >> 8) & 255;
3600      md->nl[1] = newline & 255;
3601      }
3602    else
3603      {
3604      md->nllen = 1;
3605      md->nl[0] = newline;
3606      }
3607    
3608  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
3609  moment. */  moment. */
3610    
3611  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3612    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
3613    
3614  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3615  back the character offset. */  back the character offset. */
3616    
3617  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3618  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (md->utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3619    {    {
3620    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3621      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3350  ocount = offsetcount - (offsetcount % 3) Line 3647  ocount = offsetcount - (offsetcount % 3)
3647  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
3648    {    {
3649    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
3650    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3651    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3652    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3653    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3654    }    }
3655  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
3656    
3657  match_block.offset_end = ocount;  md->offset_end = ocount;
3658  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
3659  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
3660  match_block.capture_last = -1;  md->capture_last = -1;
3661    
3662  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
3663  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3373  if (resetcount > offsetcount) resetcount Line 3670  if (resetcount > offsetcount) resetcount
3670  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
3671  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
3672    
3673  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
3674    {    {
3675    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
3676    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
3677    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
3678    }    }
# Line 3392  if (!anchored) Line 3689  if (!anchored)
3689      {      {
3690      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
3691      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3692        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
3693      }      }
3694    else    else
3695      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3415  the loop runs just once. */ Line 3712  the loop runs just once. */
3712    
3713  do  do
3714    {    {
3715    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
3716    
3717    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
3718    
3719    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
3720      {      {
3721      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
3722      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
3723      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
3724      }      }
# Line 3434  do Line 3731  do
3731    
3732    if (firstline)    if (firstline)
3733      {      {
3734      const uschar *t = start_match;      USPTR t = start_match;
3735      while (t < save_end_subject && *t != '\n') t++;      while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;
3736      end_subject = t;      end_subject = t;
3737      }      }
3738    
# Line 3445  do Line 3742  do
3742      {      {
3743      if (first_byte_caseless)      if (first_byte_caseless)
3744        while (start_match < end_subject &&        while (start_match < end_subject &&
3745               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
3746          start_match++;          start_match++;
3747      else      else
3748        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
3749          start_match++;          start_match++;
3750      }      }
3751    
3752    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
3753    
3754    else if (startline)    else if (startline)
3755      {      {
3756      if (start_match > match_block.start_subject + start_offset)      if (start_match >= md->start_subject + md->nllen +
3757              start_offset)
3758        {        {
3759        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject &&
3760                 !IS_NEWLINE(start_match - md->nllen))
3761          start_match++;          start_match++;
3762        }        }
3763      }      }
# Line 3480  do Line 3779  do
3779    
3780  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3781    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3782    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
3783    printf("\n");    printf("\n");
3784  #endif  #endif
3785    
# Line 3502  do Line 3801  do
3801    
3802    if (req_byte >= 0 &&    if (req_byte >= 0 &&
3803        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
3804        !match_block.partial)        !md->partial)
3805      {      {
3806      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
3807    
3808      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
3809      place we found it at last time. */      place we found it at last time. */
# Line 3546  do Line 3845  do
3845    those back references that we can. In this case there need not be overflow    those back references that we can. In this case there need not be overflow
3846    if certain parts of the pattern were not used. */    if certain parts of the pattern were not used. */
3847    
3848    match_block.start_match = start_match;    md->start_match = start_match;
3849    match_block.match_call_count = 0;    md->match_call_count = 0;
3850    
3851    rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,    rc = match(start_match, md->start_code, 2, md, ims, NULL, match_isgroup, 0);
     match_isgroup);  
3852    
3853    /* When the result is no match, if the subject's first character was a    /* When the result is no match, if the subject's first character was a
3854    newline and the PCRE_FIRSTLINE option is set, break (which will return    newline and the PCRE_FIRSTLINE option is set, break (which will return
# Line 3561  do Line 3859  do
3859    
3860    if (rc == MATCH_NOMATCH)    if (rc == MATCH_NOMATCH)
3861      {      {
3862      if (firstline && *start_match == NEWLINE) break;      if (firstline &&
3863            start_match <= md->end_subject - md->nllen &&
3864            IS_NEWLINE(start_match))
3865          break;
3866      start_match++;      start_match++;
3867  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3868      if (match_block.utf8)      if (md->utf8)
3869        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
3870          start_match++;          start_match++;
3871  #endif  #endif
# Line 3584  do Line 3885  do
3885      {      {
3886      if (offsetcount >= 4)      if (offsetcount >= 4)
3887        {        {
3888        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
3889          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
3890        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
3891        }        }
3892      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount)
3893        match_block.offset_overflow = TRUE;        md->offset_overflow = TRUE;
3894    
3895      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
3896      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
3897      }      }
3898    
3899    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
3900    
3901    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
3902      {      {
3903      offsets[0] = start_match - match_block.start_subject;      offsets[0] = start_match - md->start_subject;
3904      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
3905      }      }
3906    
3907    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 3614  while (!anchored && start_match <= end_s Line 3915  while (!anchored && start_match <= end_s
3915  if (using_temporary_offsets)  if (using_temporary_offsets)
3916    {    {
3917    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
3918    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
3919    }    }
3920    
3921  if (match_block.partial && match_block.hitend)  if (md->partial && md->hitend)
3922    {    {
3923    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
3924    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.85  
changed lines
  Added in v.91

  ViewVC Help
Powered by ViewVC 1.1.5