/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 165 by ph10, Wed May 9 10:50:57 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* Undefine some potentially clashing cpp symbols */
52    
53  /* Structure for building a chain of data that actually lives on the  #undef min
54  stack, for holding the values of the subject pointer at the start of each  #undef max
55  subpattern, so as to detect when an empty string has been matched by a  
56  subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
57  are on the heap, not on the stack. */  obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
58    
59  typedef struct eptrblock {  #define EPTR_WORK_SIZE (1000)
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
60    
61  /* Flag bits for the match() function */  /* Flag bits for the match() function */
62    
63  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
64  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
65    #define match_tail_recursed  0x04  /* Tail recursive call */
66    
67  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
68  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 103  Returns:     nothing
103  static void  static void
104  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
105  {  {
106  int c;  unsigned int c;
107  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
108  while (length-- > 0)  while (length-- > 0)
109    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 186  calls by keeping local variables that ne Line 188  calls by keeping local variables that ne
188  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
189  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
190  always used to.  always used to.
191    
192    The original heap-recursive code used longjmp(). However, it seems that this
193    can be very slow on some operating systems. Following a suggestion from Stan
194    Switzer, the use of longjmp() has been abolished, at the cost of having to
195    provide a unique number for each call to RMATCH. There is no way of generating
196    a sequence of numbers at compile time in C. I have given them names, to make
197    them stand out more clearly.
198    
199    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
200    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
201    tests. Furthermore, not using longjmp() means that local dynamic variables
202    don't have indeterminate values; this has meant that the frame size can be
203    reduced because the result can be "passed back" by straight setting of the
204    variable instead of being passed in the frame.
205  ****************************************************************************  ****************************************************************************
206  ***************************************************************************/  ***************************************************************************/
207    
208    
209    /* Numbers for RMATCH calls */
210    
211    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
212           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
213           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
214           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
215           RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };
216    
217    
218  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
219  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
220    actuall used in this definition. */
221    
222  #ifndef NO_RECURSE  #ifndef NO_RECURSE
223  #define REGISTER register  #define REGISTER register
224    
225  #ifdef DEBUG  #ifdef DEBUG
226  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
227    { \    { \
228    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
229    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
230    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
231    }    }
232  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 235  versions and production versions. */
235    return ra; \    return ra; \
236    }    }
237  #else  #else
238  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
239    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
240  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
241  #endif  #endif
242    
243  #else  #else
244    
245    
246  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
247  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
248  match(), which never changes. */  argument of match(), which never changes. */
249    
250  #define REGISTER  #define REGISTER
251    
252  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
253    {\    {\
254    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
255    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
256      {\    newframe->Xeptr = ra;\
257      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
258      newframe->Xecode = rb;\    newframe->Xoffset_top = rc;\
259      newframe->Xoffset_top = rc;\    newframe->Xims = re;\
260      newframe->Xims = re;\    newframe->Xeptrb = rf;\
261      newframe->Xeptrb = rf;\    newframe->Xflags = rg;\
262      newframe->Xflags = rg;\    newframe->Xrdepth = frame->Xrdepth + 1;\
263      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xprevframe = frame;\
264      newframe->Xprevframe = frame;\    frame = newframe;\
265      frame = newframe;\    DPRINTF(("restarting from line %d\n", __LINE__));\
266      DPRINTF(("restarting from line %d\n", __LINE__));\    goto HEAP_RECURSE;\
267      goto HEAP_RECURSE;\    L_##rw:\
268      }\    DPRINTF(("jumped back to line %d\n", __LINE__));\
   else\  
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
269    }    }
270    
271  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 254  match(), which never changes. */ Line 275  match(), which never changes. */
275    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
276    if (frame != NULL)\    if (frame != NULL)\
277      {\      {\
278      frame->Xresult = ra;\      rrc = ra;\
279      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
280      }\      }\
281    return ra;\    return ra;\
282    }    }
# Line 275  typedef struct heapframe { Line 295  typedef struct heapframe {
295    long int Xims;    long int Xims;
296    eptrblock *Xeptrb;    eptrblock *Xeptrb;
297    int Xflags;    int Xflags;
298    int Xrdepth;    unsigned int Xrdepth;
299    
300    /* Function local variables */    /* Function local variables */
301    
# Line 291  typedef struct heapframe { Line 311  typedef struct heapframe {
311    
312    BOOL Xcur_is_word;    BOOL Xcur_is_word;
313    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
314    BOOL Xprev_is_word;    BOOL Xprev_is_word;
315    
316    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 322  typedef struct heapframe {
322    int Xprop_category;    int Xprop_category;
323    int Xprop_chartype;    int Xprop_chartype;
324    int Xprop_script;    int Xprop_script;
325    int *Xprop_test_variable;    int Xoclength;
326      uschar Xocchars[8];
327  #endif  #endif
328    
329    int Xctype;    int Xctype;
330    int Xfc;    unsigned int Xfc;
331    int Xfi;    int Xfi;
332    int Xlength;    int Xlength;
333    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 341  typedef struct heapframe {
341    
342    eptrblock Xnewptrb;    eptrblock Xnewptrb;
343    
344    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
345    
346    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
347    
348  } heapframe;  } heapframe;
349    
# Line 340  typedef struct heapframe { Line 359  typedef struct heapframe {
359  *         Match from current position            *  *         Match from current position            *
360  *************************************************/  *************************************************/
361    
362  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
363  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
364  same response.  same response.
365    
# Line 353  performance. Tests using gcc on a SPARC Line 369  performance. Tests using gcc on a SPARC
369  made performance worse.  made performance worse.
370    
371  Arguments:  Arguments:
372     eptr        pointer in subject     eptr        pointer to current character in subject
373     ecode       position in code     ecode       pointer to current position in compiled code
374     offset_top  current top pointer     offset_top  current top pointer
375     md          pointer to "static" info for the match     md          pointer to "static" info for the match
376     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 378  Arguments:
378                   brackets - for testing for empty matches                   brackets - for testing for empty matches
379     flags       can contain     flags       can contain
380                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
381                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
382                       group that can match an empty string
383                     match_tail_recursed - this is a tail_recursed group
384     rdepth      the recursion depth     rdepth      the recursion depth
385    
386  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 374  Returns:       MATCH_MATCH if matched Line 392  Returns:       MATCH_MATCH if matched
392  static int  static int
393  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
394    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
395    int flags, int rdepth)    int flags, unsigned int rdepth)
396  {  {
397  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
398  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
399  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
400    
401    register int  rrc;         /* Returns from recursive calls */
402    register int  i;           /* Used for loops not involving calls to RMATCH() */
403    register unsigned int c;   /* Character values not kept over RMATCH() calls */
404    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
405    
406  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
 register int  i;      /* Used for loops not involving calls to RMATCH() */  
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
407    
408  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
409  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 434  HEAP_RECURSE: Line 454  HEAP_RECURSE:
454    
455  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
456  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
457  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
458    
459  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 465  HEAP_RECURSE:
465  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
466  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
467  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
468  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
469    #define occhars            frame->Xocchars
470  #endif  #endif
471    
472  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 490  HEAP_RECURSE:
490  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
491  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
492    
493  #else  #else         /* NO_RECURSE not defined */
494  #define fi i  #define fi i
495  #define fc c  #define fc c
496    
# Line 489  recursion_info new_recursive;      /* wi Line 509  recursion_info new_recursive;      /* wi
509                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
510  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
511  BOOL condition;  BOOL condition;
 BOOL minimize;  
512  BOOL prev_is_word;  BOOL prev_is_word;
513    
514  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 520  int prop_fail_result;
520  int prop_category;  int prop_category;
521  int prop_chartype;  int prop_chartype;
522  int prop_script;  int prop_script;
523  int *prop_test_variable;  int oclength;
524    uschar occhars[8];
525  #endif  #endif
526    
527  int ctype;  int ctype;
# Line 516  int save_offset1, save_offset2, save_off Line 536  int save_offset1, save_offset2, save_off
536  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
537    
538  eptrblock newptrb;  eptrblock newptrb;
539  #endif  #endif     /* NO_RECURSE */
540    
541  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
542  variables. */  variables. */
# Line 524  variables. */ Line 544  variables. */
544  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
545  prop_value = 0;  prop_value = 0;
546  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
547  #endif  #endif
548    
549    
550    /* This label is used for tail recursion, which is used in a few cases even
551    when NO_RECURSE is not defined, in order to reduce the amount of stack that is
552    used. Thanks to Ian Taylor for noticing this possibility and sending the
553    original patch. */
554    
555    TAIL_RECURSE:
556    
557  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
558  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
559  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
# Line 535  defined). However, RMATCH isn't like a f Line 562  defined). However, RMATCH isn't like a f
562  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
563  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
564    
565    #ifdef SUPPORT_UTF8
566    utf8 = md->utf8;       /* Local copy of the flag */
567    #else
568    utf8 = FALSE;
569    #endif
570    
571  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
572  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
573    
# Line 542  if (md->match_call_count++ >= md->match_ Line 575  if (md->match_call_count++ >= md->match_
575  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
576    
577  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
578    
579  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
580  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
581  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
582  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
583    When match() is called in other circumstances, don't add to the chain. If this
584    is a tail recursion, use a block from the workspace, as the one on the stack is
585    already used. */
586    
587  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
588    {    {
589    newptrb.epb_prev = eptrb;    eptrblock *p;
590    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
591    eptrb = &newptrb;      {
592        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
593        p = md->eptrchain + md->eptrn++;
594        }
595      else p = &newptrb;
596      p->epb_saved_eptr = eptr;
597      p->epb_prev = eptrb;
598      eptrb = p;
599    }    }
600    
601  /* Now start processing the operations. */  /* Now start processing the opcodes. */
602    
603  for (;;)  for (;;)
604    {    {
605      minimize = possessive = FALSE;
606    op = *ecode;    op = *ecode;
   minimize = FALSE;  
607    
608    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
609    matching at least one subject character. */    matching at least one subject character. */
# Line 571  for (;;) Line 613  for (;;)
613        eptr > md->start_match)        eptr > md->start_match)
614      md->hitend = TRUE;      md->hitend = TRUE;
615    
616    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
617      {      {
618      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
619        the current subject position in the working slot at the top of the vector.
620      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
621      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
622        reference inside the group.
623      if (number > EXTRACT_BASIC_MAX)  
624        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
625        values of the final offsets, in case they were set by a previous iteration
626        of the same bracket.
627    
628        If there isn't enough space in the offset vector, treat this as if it were
629        a non-capturing bracket. Don't worry about setting the flag for the error
630        case here; that is handled in the code for KET. */
631    
632        case OP_CBRA:
633        case OP_SCBRA:
634        number = GET2(ecode, 1+LINK_SIZE);
635      offset = number << 1;      offset = number << 1;
636    
637  #ifdef DEBUG  #ifdef DEBUG
638      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
639        printf("subject=");
640      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
641      printf("\n");      printf("\n");
642  #endif  #endif
# Line 612  for (;;) Line 651  for (;;)
651        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
652        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
653    
654          flags = (op == OP_SCBRA)? match_cbegroup : 0;
655        do        do
656          {          {
657          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
658            match_isgroup);            ims, eptrb, flags, RM1);
659          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
660          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
661          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 631  for (;;) Line 671  for (;;)
671        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
672        }        }
673    
674      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
675        bracket. */
676    
677      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
678    
679    /* Other types of node can be handled by a switch */      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
680        final alternative within the brackets, we would return the result of a
681        recursive call to match() whatever happened. We can reduce stack usage by
682        turning this into a tail recursion. */
683    
684    switch(op)      case OP_BRA:
685      {      case OP_SBRA:
686      case OP_BRA:     /* Non-capturing bracket: optimized */      DPRINTF(("start non-capturing bracket\n"));
687      DPRINTF(("start bracket 0\n"));      flags = (op >= OP_SBRA)? match_cbegroup : 0;
688      do      for (;;)
689        {        {
690        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)
691          match_isgroup);          {
692            ecode += _pcre_OP_lengths[*ecode];
693            flags |= match_tail_recursed;
694            DPRINTF(("bracket 0 tail recursion\n"));
695            goto TAIL_RECURSE;
696            }
697    
698          /* For non-final alternatives, continue the loop for a NOMATCH result;
699          otherwise return. */
700    
701          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
702            eptrb, flags, RM2);
703        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
704        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
705        }        }
706      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
707    
708      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
709      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
710      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
711      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
712        obeyed, we can use tail recursion to avoid using another stack frame. */
713    
714      case OP_COND:      case OP_COND:
715      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
716        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
717          {
718          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
719          condition = md->recursive != NULL &&
720            (offset == RREF_ANY || offset == md->recursive->group_num);
721          ecode += condition? 3 : GET(ecode, 1);
722          }
723    
724        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
725        {        {
726        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
727        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
728          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
729          (offset < offset_top && md->offset_vector[offset] >= 0);        }
730        RMATCH(rrc, eptr, ecode + (condition?  
731          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
732          offset_top, md, ims, eptrb, match_isgroup);        {
733        RRETURN(rrc);        condition = FALSE;
734          ecode += GET(ecode, 1);
735        }        }
736    
737      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
738      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
739        assertion. */
740    
741      else      else
742        {        {
743        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
744            match_condassert | match_isgroup);            match_condassert, RM3);
745        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
746          {          {
747          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
748            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
749          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
750          }          }
751        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
752          {          {
753          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
754          }          }
755        else ecode += GET(ecode, 1);        else
756        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
757          match_isgroup);          condition = FALSE;
758        RRETURN(rrc);          ecode += GET(ecode, 1);
759            }
760        }        }
     /* Control never reaches here */  
761    
762      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
763      encountered. */      we can use tail recursion to avoid using another stack frame. If the second
764        alternative doesn't exist, we can just plough on. */
765    
766      case OP_CREF:      if (condition || *ecode == OP_ALT)
767      case OP_BRANUMBER:        {
768      ecode += 3;        ecode += 1 + LINK_SIZE;
769          flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
770          goto TAIL_RECURSE;
771          }
772        else
773          {
774          ecode += 1 + LINK_SIZE;
775          }
776      break;      break;
777    
778      /* End of the pattern. If we are in a recursion, we should restore the  
779      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
780        restore the offsets appropriately and continue from after the call. */
781    
782      case OP_END:      case OP_END:
783      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
# Line 745  for (;;) Line 819  for (;;)
819      case OP_ASSERTBACK:      case OP_ASSERTBACK:
820      do      do
821        {        {
822        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
823          match_isgroup);          RM4);
824        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
825        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
826        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 772  for (;;) Line 846  for (;;)
846      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
847      do      do
848        {        {
849        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
850          match_isgroup);          RM5);
851        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
852        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
853        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 794  for (;;) Line 868  for (;;)
868  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
869      if (utf8)      if (utf8)
870        {        {
871        c = GET(ecode,1);        i = GET(ecode, 1);
872        for (i = 0; i < c; i++)        while (i-- > 0)
873          {          {
874          eptr--;          eptr--;
875          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 808  for (;;) Line 882  for (;;)
882      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
883    
884        {        {
885        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
886        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
887        }        }
888    
# Line 865  for (;;) Line 939  for (;;)
939      case OP_RECURSE:      case OP_RECURSE:
940        {        {
941        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
942        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
943            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
944    
945        /* Add to "recursing stack" */        /* Add to "recursing stack" */
946    
# Line 904  for (;;) Line 973  for (;;)
973        restore the offset and recursion data. */        restore the offset and recursion data. */
974    
975        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
976          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
977        do        do
978          {          {
979          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
980              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
981          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
982            {            {
983            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 945  for (;;) Line 1015  for (;;)
1015      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1016    
1017      case OP_ONCE:      case OP_ONCE:
1018        {      prev = ecode;
1019        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1020    
1021        do      do
1022          {        {
1023          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1024            eptrb, match_isgroup);          eptrb, 0, RM7);
1025          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1026          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1027          ecode += GET(ecode,1);        ecode += GET(ecode,1);
1028          }        }
1029        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1030    
1031        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1032    
1033        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1034    
1035        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1036        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1037    
1038        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1039    
1040        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1041        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1042    
1043        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1044        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1045        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1046        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1047        course of events. */      course of events. */
1048    
1049        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1050          {        {
1051          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1052          break;        break;
1053          }        }
1054    
1055        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1056        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1057        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1058        opcode. */      any options that changed within the bracket before re-running it, so
1059        check the next opcode. */
1060    
1061        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1062          {        {
1063          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1064          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1065          }        }
1066    
1067        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1068          {        {
1069          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1070          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RM8);
1071          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1072          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode = prev;
1073          }        flags = match_tail_recursed;
1074        else  /* OP_KETRMAX */        goto TAIL_RECURSE;
1075          {        }
1076          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      else  /* OP_KETRMAX */
1077          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1078          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1079          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1080          }        ecode += 1 + LINK_SIZE;
1081          flags = match_tail_recursed;
1082          goto TAIL_RECURSE;
1083        }        }
1084      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1085    
1086      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1087      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1027  for (;;) Line 1099  for (;;)
1099      case OP_BRAZERO:      case OP_BRAZERO:
1100        {        {
1101        next = ecode+1;        next = ecode+1;
1102        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1103        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1104        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1105        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1106        }        }
1107      break;      break;
1108    
1109      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1110        {        {
1111        next = ecode+1;        next = ecode+1;
1112        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1113        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1114        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1115        ecode++;        ecode++;
1116        }        }
1117      break;      break;
1118    
1119      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1120    
1121      case OP_KET:      case OP_KET:
1122      case OP_KETRMIN:      case OP_KETRMIN:
1123      case OP_KETRMAX:      case OP_KETRMAX:
1124        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
   
       /* Back up the stack of bracket start pointers. */  
1125    
1126        eptrb = eptrb->epb_prev;      /* If this was a group that remembered the subject start, in order to break
1127        infinite repeats of empty string matches, retrieve the subject start from
1128        the chain. Otherwise, set it NULL. */
1129    
1130        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev >= OP_SBRA)
1131            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        {
1132            *prev == OP_ONCE)        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1133          {        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1134          md->end_match_ptr = eptr;      /* For ONCE */        }
1135          md->end_offset_top = offset_top;      else saved_eptr = NULL;
         RRETURN(MATCH_MATCH);  
         }  
1136    
1137        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1138        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1139        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1140    
1141        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1142          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1143          number = *prev - OP_BRA;          *prev == OP_ONCE)
1144          {
1145          md->end_match_ptr = eptr;      /* For ONCE */
1146          md->end_offset_top = offset_top;
1147          RRETURN(MATCH_MATCH);
1148          }
1149    
1150          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1151          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1152        bumping the high water mark. Note that whole-pattern recursion is coded as
1153        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1154        when the OP_END is reached. Other recursion is handled here. */
1155    
1156          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1157          offset = number << 1;        {
1158          number = GET2(prev, 1+LINK_SIZE);
1159          offset = number << 1;
1160    
1161  #ifdef DEBUG  #ifdef DEBUG
1162          printf("end bracket %d", number);        printf("end bracket %d", number);
1163          printf("\n");        printf("\n");
1164  #endif  #endif
1165    
1166          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1167          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1168          into group 0, so it won't be picked up here. Instead, we catch it when          {
1169          the OP_END is reached. */          md->offset_vector[offset] =
1170              md->offset_vector[md->offset_end - number];
1171          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1172            {          if (offset_top <= offset) offset_top = offset + 2;
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
   
           /* Handle a recursively called group. Restore the offsets  
           appropriately and continue from after the call. */  
   
           if (md->recursive != NULL && md->recursive->group_num == number)  
             {  
             recursion_info *rec = md->recursive;  
             DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1173          }          }
1174    
1175        /* Reset the value of the ims flags, in case they got changed during        /* Handle a recursively called group. Restore the offsets
1176        the group. */        appropriately and continue from after the call. */
   
       ims = original_ims;  
       DPRINTF(("ims reset to %02lx\n", ims));  
   
       /* For a non-repeating ket, just continue at this level. This also  
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
1177    
1178        if (*ecode == OP_KET || eptr == saved_eptr)        if (md->recursive != NULL && md->recursive->group_num == number)
1179          {          {
1180          ecode += 1 + LINK_SIZE;          recursion_info *rec = md->recursive;
1181            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1182            md->recursive = rec->prevrec;
1183            md->start_match = rec->save_start;
1184            memcpy(md->offset_vector, rec->offset_save,
1185              rec->saved_max * sizeof(int));
1186            ecode = rec->after_call;
1187            ims = original_ims;
1188          break;          break;
1189          }          }
1190          }
1191    
1192        /* The repeating kets try the rest of the pattern or restart from the      /* For both capturing and non-capturing groups, reset the value of the ims
1193        preceding bracket, in the appropriate order. */      flags, in case they got changed during the group. */
1194    
1195        if (*ecode == OP_KETRMIN)      ims = original_ims;
1196          {      DPRINTF(("ims reset to %02lx\n", ims));
1197          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
1198          if (rrc != MATCH_NOMATCH) RRETURN(rrc);      /* For a non-repeating ket, just continue at this level. This also
1199          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      happens for a repeating ket if no characters were matched in the group.
1200          if (rrc != MATCH_NOMATCH) RRETURN(rrc);      This is the forcible breaking of infinite loops as implemented in Perl
1201          }      5.005. If there is an options reset, it will get obeyed in the normal
1202        else  /* OP_KETRMAX */      course of events. */
1203          {  
1204          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      if (*ecode == OP_KET || eptr == saved_eptr)
1205          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1206          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        ecode += 1 + LINK_SIZE;
1207          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        break;
         }  
1208        }        }
1209    
1210      RRETURN(MATCH_NOMATCH);      /* The repeating kets try the rest of the pattern or restart from the
1211        preceding bracket, in the appropriate order. In the second case, we can use
1212        tail recursion to avoid using another stack frame. */
1213    
1214        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1215    
1216        if (*ecode == OP_KETRMIN)
1217          {
1218          RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1219            RM12);
1220          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1221          ecode = prev;
1222          flags |= match_tail_recursed;
1223          goto TAIL_RECURSE;
1224          }
1225        else  /* OP_KETRMAX */
1226          {
1227          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1228          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1229          ecode += 1 + LINK_SIZE;
1230          flags = match_tail_recursed;
1231          goto TAIL_RECURSE;
1232          }
1233        /* Control never gets here */
1234    
1235      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1236    
# Line 1168  for (;;) Line 1238  for (;;)
1238      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1239      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1240        {        {
1241        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1242              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1243          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1244        ecode++;        ecode++;
1245        break;        break;
# Line 1196  for (;;) Line 1267  for (;;)
1267      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1268        {        {
1269        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1270          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1271        else        else
1272          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1273        ecode++;        ecode++;
# Line 1207  for (;;) Line 1278  for (;;)
1278        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1279        if (!md->endonly)        if (!md->endonly)
1280          {          {
1281          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1282             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1283            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1284          ecode++;          ecode++;
1285          break;          break;
1286          }          }
1287        }        }
1288      /* ... else fall through */      /* ... else fall through for endonly */
1289    
1290      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1291    
# Line 1226  for (;;) Line 1297  for (;;)
1297      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1298    
1299      case OP_EODN:      case OP_EODN:
1300      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1301         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1302          RRETURN(MATCH_NOMATCH);
1303      ecode++;      ecode++;
1304      break;      break;
1305    
# Line 1280  for (;;) Line 1352  for (;;)
1352      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1353    
1354      case OP_ANY:      case OP_ANY:
1355      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1356        RRETURN(MATCH_NOMATCH);        {
1357          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1358          }
1359      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1360      if (utf8)      if (utf8)
1361        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1362      ecode++;      ecode++;
1363      break;      break;
1364    
# Line 1376  for (;;) Line 1448  for (;;)
1448      ecode++;      ecode++;
1449      break;      break;
1450    
1451        case OP_ANYNL:
1452        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1453        GETCHARINCTEST(c, eptr);
1454        switch(c)
1455          {
1456          default: RRETURN(MATCH_NOMATCH);
1457          case 0x000d:
1458          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1459          break;
1460          case 0x000a:
1461          case 0x000b:
1462          case 0x000c:
1463          case 0x0085:
1464          case 0x2028:
1465          case 0x2029:
1466          break;
1467          }
1468        ecode++;
1469        break;
1470    
1471  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1472      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1473      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1418  for (;;) Line 1510  for (;;)
1510    
1511          default:          default:
1512          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
1513          }          }
1514    
1515        ecode += 3;        ecode += 3;
# Line 1532  for (;;) Line 1623  for (;;)
1623          {          {
1624          for (fi = min;; fi++)          for (fi = min;; fi++)
1625            {            {
1626            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1627            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1628            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1629              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1553  for (;;) Line 1644  for (;;)
1644            }            }
1645          while (eptr >= pp)          while (eptr >= pp)
1646            {            {
1647            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1648            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1649            eptr -= length;            eptr -= length;
1650            }            }
# Line 1658  for (;;) Line 1749  for (;;)
1749            {            {
1750            for (fi = min;; fi++)            for (fi = min;; fi++)
1751              {              {
1752              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1753              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1754              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1755              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1678  for (;;) Line 1769  for (;;)
1769            {            {
1770            for (fi = min;; fi++)            for (fi = min;; fi++)
1771              {              {
1772              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1773              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1774              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1775              c = *eptr++;              c = *eptr++;
# Line 1715  for (;;) Line 1806  for (;;)
1806              }              }
1807            for (;;)            for (;;)
1808              {              {
1809              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1810              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1811              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1812              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1734  for (;;) Line 1825  for (;;)
1825              }              }
1826            while (eptr >= pp)            while (eptr >= pp)
1827              {              {
1828              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1829              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1830              eptr--;              eptr--;
1831              }              }
# Line 1805  for (;;) Line 1896  for (;;)
1896          {          {
1897          for (fi = min;; fi++)          for (fi = min;; fi++)
1898            {            {
1899            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
1900            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1901            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1902            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1829  for (;;) Line 1920  for (;;)
1920            }            }
1921          for(;;)          for(;;)
1922            {            {
1923            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
1924            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1925            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
1926            BACKCHAR(eptr)            BACKCHAR(eptr)
# Line 1888  for (;;) Line 1979  for (;;)
1979    
1980        else        else
1981          {          {
1982          int dc;          unsigned int dc;
1983          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1984          ecode += length;          ecode += length;
1985    
# Line 1915  for (;;) Line 2006  for (;;)
2006        }        }
2007      break;      break;
2008    
2009      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2010    
2011      case OP_EXACT:      case OP_EXACT:
2012      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2013      ecode += 3;      ecode += 3;
2014      goto REPEATCHAR;      goto REPEATCHAR;
2015    
2016        case OP_POSUPTO:
2017        possessive = TRUE;
2018        /* Fall through */
2019    
2020      case OP_UPTO:      case OP_UPTO:
2021      case OP_MINUPTO:      case OP_MINUPTO:
2022      min = 0;      min = 0;
# Line 1930  for (;;) Line 2025  for (;;)
2025      ecode += 3;      ecode += 3;
2026      goto REPEATCHAR;      goto REPEATCHAR;
2027    
2028        case OP_POSSTAR:
2029        possessive = TRUE;
2030        min = 0;
2031        max = INT_MAX;
2032        ecode++;
2033        goto REPEATCHAR;
2034    
2035        case OP_POSPLUS:
2036        possessive = TRUE;
2037        min = 1;
2038        max = INT_MAX;
2039        ecode++;
2040        goto REPEATCHAR;
2041    
2042        case OP_POSQUERY:
2043        possessive = TRUE;
2044        min = 0;
2045        max = 1;
2046        ecode++;
2047        goto REPEATCHAR;
2048    
2049      case OP_STAR:      case OP_STAR:
2050      case OP_MINSTAR:      case OP_MINSTAR:
2051      case OP_PLUS:      case OP_PLUS:
# Line 1961  for (;;) Line 2077  for (;;)
2077    
2078        if (length > 1)        if (length > 1)
2079          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2080  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2081          int othercase;          unsigned int othercase;
2082          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2083              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase >= 0)  
2084            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2085            else oclength = 0;
2086  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2087    
2088          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2089            {            {
2090            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2091    #ifdef SUPPORT_UCP
2092            /* Need braces because of following else */            /* Need braces because of following else */
2093            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2094            else            else
# Line 1982  for (;;) Line 2096  for (;;)
2096              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2097              eptr += oclength;              eptr += oclength;
2098              }              }
2099    #else   /* without SUPPORT_UCP */
2100              else { RRETURN(MATCH_NOMATCH); }
2101    #endif  /* SUPPORT_UCP */
2102            }            }
2103    
2104          if (min == max) continue;          if (min == max) continue;
# Line 1990  for (;;) Line 2107  for (;;)
2107            {            {
2108            for (fi = min;; fi++)            for (fi = min;; fi++)
2109              {              {
2110              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2111              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2112              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2113              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2114    #ifdef SUPPORT_UCP
2115              /* Need braces because of following else */              /* Need braces because of following else */
2116              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2117              else              else
# Line 2001  for (;;) Line 2119  for (;;)
2119                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2120                eptr += oclength;                eptr += oclength;
2121                }                }
2122    #else   /* without SUPPORT_UCP */
2123                else { RRETURN (MATCH_NOMATCH); }
2124    #endif  /* SUPPORT_UCP */
2125              }              }
2126            /* Control never gets here */            /* Control never gets here */
2127            }            }
2128          else  
2129            else  /* Maximize */
2130            {            {
2131            pp = eptr;            pp = eptr;
2132            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2133              {              {
2134              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2135              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2136    #ifdef SUPPORT_UCP
2137              else if (oclength == 0) break;              else if (oclength == 0) break;
2138              else              else
2139                {                {
2140                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2141                eptr += oclength;                eptr += oclength;
2142                }                }
2143    #else   /* without SUPPORT_UCP */
2144                else break;
2145    #endif  /* SUPPORT_UCP */
2146              }              }
2147            while (eptr >= pp)  
2148              if (possessive) continue;
2149              for(;;)
2150             {             {
2151             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2152             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2153               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2154    #ifdef SUPPORT_UCP
2155               eptr--;
2156               BACKCHAR(eptr);
2157    #else   /* without SUPPORT_UCP */
2158             eptr -= length;             eptr -= length;
2159    #endif  /* SUPPORT_UCP */
2160             }             }
           RRETURN(MATCH_NOMATCH);  
2161            }            }
2162          /* Control never gets here */          /* Control never gets here */
2163          }          }
# Line 2064  for (;;) Line 2197  for (;;)
2197          {          {
2198          for (fi = min;; fi++)          for (fi = min;; fi++)
2199            {            {
2200            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2201            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2202            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2203                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2072  for (;;) Line 2205  for (;;)
2205            }            }
2206          /* Control never gets here */          /* Control never gets here */
2207          }          }
2208        else        else  /* Maximize */
2209          {          {
2210          pp = eptr;          pp = eptr;
2211          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2080  for (;;) Line 2213  for (;;)
2213            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2214            eptr++;            eptr++;
2215            }            }
2216            if (possessive) continue;
2217          while (eptr >= pp)          while (eptr >= pp)
2218            {            {
2219            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2220            eptr--;            eptr--;
2221            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2222            }            }
# Line 2101  for (;;) Line 2235  for (;;)
2235          {          {
2236          for (fi = min;; fi++)          for (fi = min;; fi++)
2237            {            {
2238            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2239            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2240            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2241              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2242            }            }
2243          /* Control never gets here */          /* Control never gets here */
2244          }          }
2245        else        else  /* Maximize */
2246          {          {
2247          pp = eptr;          pp = eptr;
2248          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2116  for (;;) Line 2250  for (;;)
2250            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2251            eptr++;            eptr++;
2252            }            }
2253            if (possessive) continue;
2254          while (eptr >= pp)          while (eptr >= pp)
2255            {            {
2256            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2257            eptr--;            eptr--;
2258            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2259            }            }
# Line 2168  for (;;) Line 2303  for (;;)
2303      ecode += 3;      ecode += 3;
2304      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2305    
2306        case OP_NOTPOSSTAR:
2307        possessive = TRUE;
2308        min = 0;
2309        max = INT_MAX;
2310        ecode++;
2311        goto REPEATNOTCHAR;
2312    
2313        case OP_NOTPOSPLUS:
2314        possessive = TRUE;
2315        min = 1;
2316        max = INT_MAX;
2317        ecode++;
2318        goto REPEATNOTCHAR;
2319    
2320        case OP_NOTPOSQUERY:
2321        possessive = TRUE;
2322        min = 0;
2323        max = 1;
2324        ecode++;
2325        goto REPEATNOTCHAR;
2326    
2327        case OP_NOTPOSUPTO:
2328        possessive = TRUE;
2329        min = 0;
2330        max = GET2(ecode, 1);
2331        ecode += 3;
2332        goto REPEATNOTCHAR;
2333    
2334      case OP_NOTSTAR:      case OP_NOTSTAR:
2335      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2336      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2207  for (;;) Line 2370  for (;;)
2370        /* UTF-8 mode */        /* UTF-8 mode */
2371        if (utf8)        if (utf8)
2372          {          {
2373          register int d;          register unsigned int d;
2374          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2375            {            {
2376            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2232  for (;;) Line 2395  for (;;)
2395          /* UTF-8 mode */          /* UTF-8 mode */
2396          if (utf8)          if (utf8)
2397            {            {
2398            register int d;            register unsigned int d;
2399            for (fi = min;; fi++)            for (fi = min;; fi++)
2400              {              {
2401              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2402              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2403              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2404              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2249  for (;;) Line 2412  for (;;)
2412            {            {
2413            for (fi = min;; fi++)            for (fi = min;; fi++)
2414              {              {
2415              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2416              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2417              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2418                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2268  for (;;) Line 2431  for (;;)
2431          /* UTF-8 mode */          /* UTF-8 mode */
2432          if (utf8)          if (utf8)
2433            {            {
2434            register int d;            register unsigned int d;
2435            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2436              {              {
2437              int len = 1;              int len = 1;
# Line 2278  for (;;) Line 2441  for (;;)
2441              if (fc == d) break;              if (fc == d) break;
2442              eptr += len;              eptr += len;
2443              }              }
2444            for(;;)          if (possessive) continue;
2445            for(;;)
2446              {              {
2447              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2448              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2449              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2450              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2295  for (;;) Line 2459  for (;;)
2459              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2460              eptr++;              eptr++;
2461              }              }
2462              if (possessive) continue;
2463            while (eptr >= pp)            while (eptr >= pp)
2464              {              {
2465              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2466              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2467              eptr--;              eptr--;
2468              }              }
# Line 2316  for (;;) Line 2481  for (;;)
2481        /* UTF-8 mode */        /* UTF-8 mode */
2482        if (utf8)        if (utf8)
2483          {          {
2484          register int d;          register unsigned int d;
2485          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2486            {            {
2487            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2339  for (;;) Line 2504  for (;;)
2504          /* UTF-8 mode */          /* UTF-8 mode */
2505          if (utf8)          if (utf8)
2506            {            {
2507            register int d;            register unsigned int d;
2508            for (fi = min;; fi++)            for (fi = min;; fi++)
2509              {              {
2510              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2511              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2512              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2513              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2355  for (;;) Line 2520  for (;;)
2520            {            {
2521            for (fi = min;; fi++)            for (fi = min;; fi++)
2522              {              {
2523              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2524              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2525              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2526                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2374  for (;;) Line 2539  for (;;)
2539          /* UTF-8 mode */          /* UTF-8 mode */
2540          if (utf8)          if (utf8)
2541            {            {
2542            register int d;            register unsigned int d;
2543            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2544              {              {
2545              int len = 1;              int len = 1;
# Line 2383  for (;;) Line 2548  for (;;)
2548              if (fc == d) break;              if (fc == d) break;
2549              eptr += len;              eptr += len;
2550              }              }
2551              if (possessive) continue;
2552            for(;;)            for(;;)
2553              {              {
2554              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2555              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2556              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2557              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2400  for (;;) Line 2566  for (;;)
2566              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2567              eptr++;              eptr++;
2568              }              }
2569              if (possessive) continue;
2570            while (eptr >= pp)            while (eptr >= pp)
2571              {              {
2572              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2573              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574              eptr--;              eptr--;
2575              }              }
# Line 2431  for (;;) Line 2598  for (;;)
2598      ecode += 3;      ecode += 3;
2599      goto REPEATTYPE;      goto REPEATTYPE;
2600    
2601        case OP_TYPEPOSSTAR:
2602        possessive = TRUE;
2603        min = 0;
2604        max = INT_MAX;
2605        ecode++;
2606        goto REPEATTYPE;
2607    
2608        case OP_TYPEPOSPLUS:
2609        possessive = TRUE;
2610        min = 1;
2611        max = INT_MAX;
2612        ecode++;
2613        goto REPEATTYPE;
2614    
2615        case OP_TYPEPOSQUERY:
2616        possessive = TRUE;
2617        min = 0;
2618        max = 1;
2619        ecode++;
2620        goto REPEATTYPE;
2621    
2622        case OP_TYPEPOSUPTO:
2623        possessive = TRUE;
2624        min = 0;
2625        max = GET2(ecode, 1);
2626        ecode += 3;
2627        goto REPEATTYPE;
2628    
2629      case OP_TYPESTAR:      case OP_TYPESTAR:
2630      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2631      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2533  for (;;) Line 2728  for (;;)
2728    
2729            default:            default:
2730            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
2731            }            }
2732          }          }
2733    
# Line 2573  for (;;) Line 2767  for (;;)
2767          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2768            {            {
2769            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2770               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2771              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2772              eptr++;
2773            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2774            }            }
2775          break;          break;
# Line 2583  for (;;) Line 2778  for (;;)
2778          eptr += min;          eptr += min;
2779          break;          break;
2780    
2781            case OP_ANYNL:
2782            for (i = 1; i <= min; i++)
2783              {
2784              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2785              GETCHARINC(c, eptr);
2786              switch(c)
2787                {
2788                default: RRETURN(MATCH_NOMATCH);
2789                case 0x000d:
2790                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2791                break;
2792                case 0x000a:
2793                case 0x000b:
2794                case 0x000c:
2795                case 0x0085:
2796                case 0x2028:
2797                case 0x2029:
2798                break;
2799                }
2800              }
2801            break;
2802    
2803          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2804          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2805            {            {
# Line 2651  for (;;) Line 2868  for (;;)
2868  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2869    
2870        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2871        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2872          number of bytes present, as this was tested above. */
2873    
2874        switch(ctype)        switch(ctype)
2875          {          {
# Line 2659  for (;;) Line 2877  for (;;)
2877          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
2878            {            {
2879            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2880              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
2881                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2882                eptr++;
2883                }
2884            }            }
2885          else eptr += min;          else eptr += min;
2886          break;          break;
# Line 2668  for (;;) Line 2889  for (;;)
2889          eptr += min;          eptr += min;
2890          break;          break;
2891    
2892            /* Because of the CRLF case, we can't assume the minimum number of
2893            bytes are present in this case. */
2894    
2895            case OP_ANYNL:
2896            for (i = 1; i <= min; i++)
2897              {
2898              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2899              switch(*eptr++)
2900                {
2901                default: RRETURN(MATCH_NOMATCH);
2902                case 0x000d:
2903                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2904                break;
2905                case 0x000a:
2906                case 0x000b:
2907                case 0x000c:
2908                case 0x0085:
2909                break;
2910                }
2911              }
2912            break;
2913    
2914          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2915          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2916            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2723  for (;;) Line 2966  for (;;)
2966            case PT_ANY:            case PT_ANY:
2967            for (fi = min;; fi++)            for (fi = min;; fi++)
2968              {              {
2969              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
2970              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2971              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2972              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2973              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2974              }              }
2975            break;            /* Control never gets here */
2976    
2977            case PT_LAMP:            case PT_LAMP:
2978            for (fi = min;; fi++)            for (fi = min;; fi++)
2979              {              {
2980              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
2981              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2982              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2983              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2744  for (;;) Line 2987  for (;;)
2987                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
2988                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2989              }              }
2990            break;            /* Control never gets here */
2991    
2992            case PT_GC:            case PT_GC:
2993            for (fi = min;; fi++)            for (fi = min;; fi++)
2994              {              {
2995              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
2996              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2997              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2998              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2757  for (;;) Line 3000  for (;;)
3000              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3001                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3002              }              }
3003            break;            /* Control never gets here */
3004    
3005            case PT_PC:            case PT_PC:
3006            for (fi = min;; fi++)            for (fi = min;; fi++)
3007              {              {
3008              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3009              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3010              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3011              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2770  for (;;) Line 3013  for (;;)
3013              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3014                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3015              }              }
3016            break;            /* Control never gets here */
3017    
3018            case PT_SC:            case PT_SC:
3019            for (fi = min;; fi++)            for (fi = min;; fi++)
3020              {              {
3021              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3022              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3023              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3024              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2783  for (;;) Line 3026  for (;;)
3026              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3027                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3028              }              }
3029            break;            /* Control never gets here */
3030    
3031            default:            default:
3032            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3033            }            }
3034          }          }
3035    
# Line 2798  for (;;) Line 3040  for (;;)
3040          {          {
3041          for (fi = min;; fi++)          for (fi = min;; fi++)
3042            {            {
3043            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3044            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3045            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3046            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
# Line 2827  for (;;) Line 3069  for (;;)
3069          {          {
3070          for (fi = min;; fi++)          for (fi = min;; fi++)
3071            {            {
3072            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3073            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3074            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3075                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3076                    IS_NEWLINE(eptr)))
3077                RRETURN(MATCH_NOMATCH);
3078    
3079            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3080            switch(ctype)            switch(ctype)
3081              {              {
3082              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3083              break;              break;
3084    
3085              case OP_ANYBYTE:              case OP_ANYBYTE:
3086              break;              break;
3087    
3088                case OP_ANYNL:
3089                switch(c)
3090                  {
3091                  default: RRETURN(MATCH_NOMATCH);
3092                  case 0x000d:
3093                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3094                  break;
3095                  case 0x000a:
3096                  case 0x000b:
3097                  case 0x000c:
3098                  case 0x0085:
3099                  case 0x2028:
3100                  case 0x2029:
3101                  break;
3102                  }
3103                break;
3104    
3105              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3106              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3107                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2882  for (;;) Line 3143  for (;;)
3143          {          {
3144          for (fi = min;; fi++)          for (fi = min;; fi++)
3145            {            {
3146            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3147            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3148            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3149                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3150                RRETURN(MATCH_NOMATCH);
3151    
3152            c = *eptr++;            c = *eptr++;
3153            switch(ctype)            switch(ctype)
3154              {              {
3155              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3156              break;              break;
3157    
3158              case OP_ANYBYTE:              case OP_ANYBYTE:
3159              break;              break;
3160    
3161                case OP_ANYNL:
3162                switch(c)
3163                  {
3164                  default: RRETURN(MATCH_NOMATCH);
3165                  case 0x000d:
3166                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3167                  break;
3168                  case 0x000a:
3169                  case 0x000b:
3170                  case 0x000c:
3171                  case 0x0085:
3172                  break;
3173                  }
3174                break;
3175    
3176              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3177              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3178              break;              break;
# Line 2927  for (;;) Line 3205  for (;;)
3205        /* Control never gets here */        /* Control never gets here */
3206        }        }
3207    
3208      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3209      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3210      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3211    
# Line 3008  for (;;) Line 3286  for (;;)
3286    
3287          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3288    
3289            if (possessive) continue;
3290          for(;;)          for(;;)
3291            {            {
3292            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3293            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3294            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3295            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3043  for (;;) Line 3322  for (;;)
3322    
3323          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3324    
3325            if (possessive) continue;
3326          for(;;)          for(;;)
3327            {            {
3328            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3329            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3330            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3331            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 3075  for (;;) Line 3355  for (;;)
3355            {            {
3356            case OP_ANY:            case OP_ANY:
3357    
3358            /* Special code is required for UTF8, but when the maximum is unlimited            /* Special code is required for UTF8, but when the maximum is
3359            we don't need it, so we repeat the non-UTF8 code. This is probably            unlimited we don't need it, so we repeat the non-UTF8 code. This is
3360            worth it, because .* is quite a common idiom. */            probably worth it, because .* is quite a common idiom. */
3361    
3362            if (max < INT_MAX)            if (max < INT_MAX)
3363              {              {
# Line 3085  for (;;) Line 3365  for (;;)
3365                {                {
3366                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3367                  {                  {
3368                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3369                  eptr++;                  eptr++;
3370                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3371                  }                  }
# Line 3094  for (;;) Line 3374  for (;;)
3374                {                {
3375                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3376                  {                  {
3377                    if (eptr >= md->end_subject) break;
3378                  eptr++;                  eptr++;
3379                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3380                  }                  }
# Line 3108  for (;;) Line 3389  for (;;)
3389                {                {
3390                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3391                  {                  {
3392                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3393                  eptr++;                  eptr++;
3394                  }                  }
3395                break;                break;
# Line 3116  for (;;) Line 3397  for (;;)
3397              else              else
3398                {                {
3399                c = max - min;                c = max - min;
3400                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3401                    c = md->end_subject - eptr;
3402                eptr += c;                eptr += c;
3403                }                }
3404              }              }
# Line 3126  for (;;) Line 3408  for (;;)
3408    
3409            case OP_ANYBYTE:            case OP_ANYBYTE:
3410            c = max - min;            c = max - min;
3411            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3412                c = md->end_subject - eptr;
3413            eptr += c;            eptr += c;
3414            break;            break;
3415    
3416              case OP_ANYNL:
3417              for (i = min; i < max; i++)
3418                {
3419                int len = 1;
3420                if (eptr >= md->end_subject) break;
3421                GETCHARLEN(c, eptr, len);
3422                if (c == 0x000d)
3423                  {
3424                  if (++eptr >= md->end_subject) break;
3425                  if (*eptr == 0x000a) eptr++;
3426                  }
3427                else
3428                  {
3429                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3430                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3431                    break;
3432                  eptr += len;
3433                  }
3434                }
3435              break;
3436    
3437            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3438            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3439              {              {
# Line 3202  for (;;) Line 3506  for (;;)
3506    
3507          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3508    
3509            if (possessive) continue;
3510          for(;;)          for(;;)
3511            {            {
3512            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3513            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3514            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3515            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3222  for (;;) Line 3527  for (;;)
3527              {              {
3528              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3529                {                {
3530                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3531                eptr++;                eptr++;
3532                }                }
3533              break;              break;
# Line 3231  for (;;) Line 3536  for (;;)
3536    
3537            case OP_ANYBYTE:            case OP_ANYBYTE:
3538            c = max - min;            c = max - min;
3539            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3540                c = md->end_subject - eptr;
3541            eptr += c;            eptr += c;
3542            break;            break;
3543    
3544              case OP_ANYNL:
3545              for (i = min; i < max; i++)
3546                {
3547                if (eptr >= md->end_subject) break;
3548                c = *eptr;
3549                if (c == 0x000d)
3550                  {
3551                  if (++eptr >= md->end_subject) break;
3552                  if (*eptr == 0x000a) eptr++;
3553                  }
3554                else
3555                  {
3556                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3557                    break;
3558                  eptr++;
3559                  }
3560                }
3561              break;
3562    
3563            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3564            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3565              {              {
# Line 3295  for (;;) Line 3620  for (;;)
3620    
3621          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3622    
3623            if (possessive) continue;
3624          while (eptr >= pp)          while (eptr >= pp)
3625            {            {
3626            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
3627            eptr--;            eptr--;
3628            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3629            }            }
# Line 3309  for (;;) Line 3635  for (;;)
3635        }        }
3636      /* Control never gets here */      /* Control never gets here */
3637    
3638      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3639      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3640    
3641      default:      default:
3642      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3643      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3644      }      }
3645    
3646    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3325  for (;;) Line 3649  for (;;)
3649    
3650    }             /* End of main loop */    }             /* End of main loop */
3651  /* Control never reaches here */  /* Control never reaches here */
3652    
3653    
3654    /* When compiling to use the heap rather than the stack for recursive calls to
3655    match(), the RRETURN() macro jumps here. The number that is saved in
3656    frame->Xwhere indicates which label we actually want to return to. */
3657    
3658    #ifdef NO_RECURSE
3659    #define LBL(val) case val: goto L_RM##val;
3660    HEAP_RETURN:
3661    switch (frame->Xwhere)
3662      {
3663      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
3664      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
3665      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
3666      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
3667      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
3668      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
3669      default:
3670      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
3671      return PCRE_ERROR_INTERNAL;
3672      }
3673    #undef LBL
3674    #endif  /* NO_RECURSE */
3675  }  }
3676    
3677    
# Line 3354  Undefine all the macros that were define Line 3701  Undefine all the macros that were define
3701    
3702  #undef cur_is_word  #undef cur_is_word
3703  #undef condition  #undef condition
 #undef minimize  
3704  #undef prev_is_word  #undef prev_is_word
3705    
3706  #undef original_ims  #undef original_ims
# Line 3410  Returns:          > 0 => success; value Line 3756  Returns:          > 0 => success; value
3756                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3757  */  */
3758    
3759  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
3760  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3761    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3762    int offsetcount)    int offsetcount)
# Line 3419  int rc, resetcount, ocount; Line 3765  int rc, resetcount, ocount;
3765  int first_byte = -1;  int first_byte = -1;
3766  int req_byte = -1;  int req_byte = -1;
3767  int req_byte2 = -1;  int req_byte2 = -1;
3768  unsigned long int ims = 0;  int newline;
3769    unsigned long int ims;
3770  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
3771  BOOL anchored;  BOOL anchored;
3772  BOOL startline;  BOOL startline;
3773  BOOL firstline;  BOOL firstline;
3774  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3775  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3776    BOOL utf8;
3777  match_data match_block;  match_data match_block;
3778    match_data *md = &match_block;
3779  const uschar *tables;  const uschar *tables;
3780  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3781  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3782  USPTR end_subject;  USPTR end_subject;
3783  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3784    eptrblock eptrchain[EPTR_WORK_SIZE];
3785    
3786  pcre_study_data internal_study;  pcre_study_data internal_study;
3787  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3451  if (offsetcount < 0) return PCRE_ERROR_B Line 3801  if (offsetcount < 0) return PCRE_ERROR_B
3801  the default values. */  the default values. */
3802    
3803  study = NULL;  study = NULL;
3804  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
3805  match_block.match_limit_recursion = MATCH_LIMIT_RECURSION;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3806  match_block.callout_data = NULL;  md->callout_data = NULL;
3807    
3808  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
3809    
# Line 3465  if (extra_data != NULL) Line 3815  if (extra_data != NULL)
3815    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3816      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3817    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3818      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
3819    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3820      match_block.match_limit_recursion = extra_data->match_limit_recursion;      md->match_limit_recursion = extra_data->match_limit_recursion;
3821    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3822      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3823    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3824    }    }
3825    
# Line 3499  firstline = (re->options & PCRE_FIRSTLIN Line 3849  firstline = (re->options & PCRE_FIRSTLIN
3849    
3850  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
3851    
3852  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
3853    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3854    
3855  match_block.start_subject = (USPTR)subject;  md->start_subject = (USPTR)subject;
3856  match_block.start_offset = start_offset;  md->start_offset = start_offset;
3857  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
3858  end_subject = match_block.end_subject;  end_subject = md->end_subject;
3859    
3860  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3861  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3862    
3863  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3864  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
3865  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
3866  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
3867  match_block.hitend = FALSE;  md->hitend = FALSE;
3868    
3869    md->recursive = NULL;                   /* No recursion at top level */
3870    md->eptrchain = eptrchain;              /* Make workspace generally available */
3871    
3872  match_block.recursive = NULL;                   /* No recursion at top level */  md->lcc = tables + lcc_offset;
3873    md->ctypes = tables + ctypes_offset;
3874    
3875  match_block.lcc = tables + lcc_offset;  /* Handle different types of newline. The three bits give eight cases. If
3876  match_block.ctypes = tables + ctypes_offset;  nothing is set at run time, whatever was used at compile time applies. */
3877    
3878    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3879           PCRE_NEWLINE_BITS)
3880      {
3881      case 0: newline = NEWLINE; break;   /* Compile-time default */
3882      case PCRE_NEWLINE_CR: newline = '\r'; break;
3883      case PCRE_NEWLINE_LF: newline = '\n'; break;
3884      case PCRE_NEWLINE_CR+
3885           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3886      case PCRE_NEWLINE_ANY: newline = -1; break;
3887      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3888      default: return PCRE_ERROR_BADNEWLINE;
3889      }
3890    
3891    if (newline == -2)
3892      {
3893      md->nltype = NLTYPE_ANYCRLF;
3894      }
3895    else if (newline < 0)
3896      {
3897      md->nltype = NLTYPE_ANY;
3898      }
3899    else
3900      {
3901      md->nltype = NLTYPE_FIXED;
3902      if (newline > 255)
3903        {
3904        md->nllen = 2;
3905        md->nl[0] = (newline >> 8) & 255;
3906        md->nl[1] = newline & 255;
3907        }
3908      else
3909        {
3910        md->nllen = 1;
3911        md->nl[0] = newline;
3912        }
3913      }
3914    
3915  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
3916  moment. */  moment. */
3917    
3918  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3919    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
3920    
3921  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3922  back the character offset. */  back the character offset. */
3923    
3924  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3925  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3926    {    {
3927    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3928      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3563  ocount = offsetcount - (offsetcount % 3) Line 3954  ocount = offsetcount - (offsetcount % 3)
3954  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
3955    {    {
3956    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
3957    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3958    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3959    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3960    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3961    }    }
3962  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
3963    
3964  match_block.offset_end = ocount;  md->offset_end = ocount;
3965  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
3966  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
3967  match_block.capture_last = -1;  md->capture_last = -1;
3968    
3969  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
3970  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3586  if (resetcount > offsetcount) resetcount Line 3977  if (resetcount > offsetcount) resetcount
3977  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
3978  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
3979    
3980  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
3981    {    {
3982    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
3983    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
3984    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
3985    }    }
# Line 3605  if (!anchored) Line 3996  if (!anchored)
3996      {      {
3997      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
3998      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3999        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
4000      }      }
4001    else    else
4002      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3623  if ((re->options & PCRE_REQCHSET) != 0) Line 4014  if ((re->options & PCRE_REQCHSET) != 0)
4014    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4015    }    }
4016    
4017    
4018    /* ==========================================================================*/
4019    
4020  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4021  the loop runs just once. */  the loop runs just once. */
4022    
4023  do  for(;;)
4024    {    {
4025    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4026    
4027    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4028    
4029    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
4030      {      {
4031      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
4032      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4033      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4034      }      }
4035    
4036    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
4037    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
4038    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
4039    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
4040    */    the match fails at the newline, later code breaks this loop. */
4041    
4042    if (firstline)    if (firstline)
4043      {      {
4044      USPTR t = start_match;      USPTR t = start_match;
4045      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4046      end_subject = t;      end_subject = t;
4047      }      }
4048    
# Line 3658  do Line 4052  do
4052      {      {
4053      if (first_byte_caseless)      if (first_byte_caseless)
4054        while (start_match < end_subject &&        while (start_match < end_subject &&
4055               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4056          start_match++;          start_match++;
4057      else      else
4058        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4059          start_match++;          start_match++;
4060      }      }
4061    
4062    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4063    
4064    else if (startline)    else if (startline)
4065      {      {
4066      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4067        {        {
4068        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4069            start_match++;
4070    
4071          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4072          and we are now at a LF, advance the match position by one more character.
4073          */
4074    
4075          if (start_match[-1] == '\r' &&
4076               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4077               start_match < end_subject &&
4078               *start_match == '\n')
4079          start_match++;          start_match++;
4080        }        }
4081      }      }
# Line 3693  do Line 4097  do
4097    
4098  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4099    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4100    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4101    printf("\n");    printf("\n");
4102  #endif  #endif
4103    
# Line 3707  do Line 4111  do
4111    
4112    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4113    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4114    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4115    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4116    
4117    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4118    */    */
4119    
4120    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4121        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4122        !match_block.partial)        !md->partial)
4123      {      {
4124      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4125    
# Line 3740  do Line 4144  do
4144            }            }
4145          }          }
4146    
4147        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4148          forcing a match failure. */
4149    
4150        if (p >= end_subject) break;        if (p >= end_subject)
4151            {
4152            rc = MATCH_NOMATCH;
4153            break;
4154            }
4155    
4156        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4157        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3752  do Line 4161  do
4161        }        }
4162      }      }
4163    
4164    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup, 0);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4165    
4166    if (rc == MATCH_NOMATCH)    md->start_match = start_match;
4167      {    md->match_call_count = 0;
4168      if (firstline && *start_match == NEWLINE) break;    md->eptrn = 0;                          /* Next free eptrchain slot */
4169      start_match++;    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4170    
4171      /* Any return other than MATCH_NOMATCH breaks the loop. */
4172    
4173      if (rc != MATCH_NOMATCH) break;
4174    
4175      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4176      newline in the subject (though it may continue over the newline). Therefore,
4177      if we have just failed to match, starting at a newline, do not continue. */
4178    
4179      if (firstline && IS_NEWLINE(start_match)) break;
4180    
4181      /* Advance the match position by one character. */
4182    
4183      start_match++;
4184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4185      if (match_block.utf8)    if (utf8)
4186        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4187          start_match++;        start_match++;
4188  #endif  #endif
4189      continue;  
4190      }    /* Break the loop if the pattern is anchored or if we have passed the end of
4191      the subject. */
4192    
4193      if (anchored || start_match > end_subject) break;
4194    
4195      /* If we have just passed a CR and the newline option is CRLF or ANY or
4196      ANYCRLF, and we are now at a LF, advance the match position by one more
4197      character. */
4198    
4199      if (start_match[-1] == '\r' &&
4200           (md->nltype == NLTYPE_ANY ||
4201            md->nltype == NLTYPE_ANYCRLF ||
4202            md->nllen == 2) &&
4203           start_match < end_subject &&
4204           *start_match == '\n')
4205        start_match++;
4206    
4207    if (rc != MATCH_MATCH)    }   /* End of for(;;) "bumpalong" loop */
4208      {  
4209      DPRINTF((">>>> error: returning %d\n", rc));  /* ==========================================================================*/
4210      return rc;  
4211      }  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4212    conditions is true:
4213    
4214    /* We have a match! Copy the offset information from temporary store if  (1) The pattern is anchored;
   necessary */  
4215    
4216    (2) We are past the end of the subject;
4217    
4218    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4219        this option requests that a match occur at or before the first newline in
4220        the subject.
4221    
4222    When we have a match and the offset vector is big enough to deal with any
4223    backreferences, captured substring offsets will already be set up. In the case
4224    where we had to get some local store to hold offsets for backreference
4225    processing, copy those that we can. In this case there need not be overflow if
4226    certain parts of the pattern were not used, even though there are more
4227    capturing parentheses than vector slots. */
4228    
4229    if (rc == MATCH_MATCH)
4230      {
4231    if (using_temporary_offsets)    if (using_temporary_offsets)
4232      {      {
4233      if (offsetcount >= 4)      if (offsetcount >= 4)
4234        {        {
4235        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4236          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4237        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4238        }        }
4239      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4240      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4241      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4242      }      }
4243    
4244    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4245      too many to fit into the vector. */
4246    
4247      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4248    
4249      /* If there is space, set up the whole thing as substring 0. */
4250    
4251    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4252      {      {
4253      offsets[0] = start_match - match_block.start_subject;      offsets[0] = start_match - md->start_subject;
4254      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4255      }      }
4256    
4257    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4258    return rc;    return rc;
4259    }    }
4260    
4261  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4262    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4263    
4264  if (using_temporary_offsets)  if (using_temporary_offsets)
4265    {    {
4266    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4267    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4268    }    }
4269    
4270  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4271      {
4272      DPRINTF((">>>> error: returning %d\n", rc));
4273      return rc;
4274      }
4275    else if (md->partial && md->hitend)
4276    {    {
4277    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4278    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.87  
changed lines
  Added in v.165

  ViewVC Help
Powered by ViewVC 1.1.5