/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 165 by ph10, Wed May 9 10:50:57 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #define NLBLOCK md           /* The block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* Undefine some potentially clashing cpp symbols */
52    
53    #undef min
54    #undef max
55    
56    /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
57    obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
58    
59  /* Structure for building a chain of data that actually lives on the  #define EPTR_WORK_SIZE (1000)
 stack, for holding the values of the subject pointer at the start of each  
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
60    
61  /* Flag bits for the match() function */  /* Flag bits for the match() function */
62    
63  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
64  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
65    #define match_tail_recursed  0x04  /* Tail recursive call */
66    
67  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
68  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 103  Returns:     nothing
103  static void  static void
104  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
105  {  {
106  int c;  unsigned int c;
107  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
108  while (length-- > 0)  while (length-- > 0)
109    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 186  calls by keeping local variables that ne Line 188  calls by keeping local variables that ne
188  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
189  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
190  always used to.  always used to.
191    
192    The original heap-recursive code used longjmp(). However, it seems that this
193    can be very slow on some operating systems. Following a suggestion from Stan
194    Switzer, the use of longjmp() has been abolished, at the cost of having to
195    provide a unique number for each call to RMATCH. There is no way of generating
196    a sequence of numbers at compile time in C. I have given them names, to make
197    them stand out more clearly.
198    
199    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
200    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
201    tests. Furthermore, not using longjmp() means that local dynamic variables
202    don't have indeterminate values; this has meant that the frame size can be
203    reduced because the result can be "passed back" by straight setting of the
204    variable instead of being passed in the frame.
205  ****************************************************************************  ****************************************************************************
206  ***************************************************************************/  ***************************************************************************/
207    
208    
209    /* Numbers for RMATCH calls */
210    
211    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
212           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
213           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
214           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
215           RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };
216    
217    
218  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
219  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
220    actuall used in this definition. */
221    
222  #ifndef NO_RECURSE  #ifndef NO_RECURSE
223  #define REGISTER register  #define REGISTER register
224    
225  #ifdef DEBUG  #ifdef DEBUG
226  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
227    { \    { \
228    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
229    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
230    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
231    }    }
232  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 235  versions and production versions. */
235    return ra; \    return ra; \
236    }    }
237  #else  #else
238  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
239    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
240  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
241  #endif  #endif
242    
243  #else  #else
244    
245    
246  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
247  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
248  match(), which never changes. */  argument of match(), which never changes. */
249    
250  #define REGISTER  #define REGISTER
251    
252  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
253    {\    {\
254    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
255    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
256      {\    newframe->Xeptr = ra;\
257      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
258      newframe->Xecode = rb;\    newframe->Xoffset_top = rc;\
259      newframe->Xoffset_top = rc;\    newframe->Xims = re;\
260      newframe->Xims = re;\    newframe->Xeptrb = rf;\
261      newframe->Xeptrb = rf;\    newframe->Xflags = rg;\
262      newframe->Xflags = rg;\    newframe->Xrdepth = frame->Xrdepth + 1;\
263      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xprevframe = frame;\
264      newframe->Xprevframe = frame;\    frame = newframe;\
265      frame = newframe;\    DPRINTF(("restarting from line %d\n", __LINE__));\
266      DPRINTF(("restarting from line %d\n", __LINE__));\    goto HEAP_RECURSE;\
267      goto HEAP_RECURSE;\    L_##rw:\
268      }\    DPRINTF(("jumped back to line %d\n", __LINE__));\
   else\  
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
269    }    }
270    
271  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 254  match(), which never changes. */ Line 275  match(), which never changes. */
275    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
276    if (frame != NULL)\    if (frame != NULL)\
277      {\      {\
278      frame->Xresult = ra;\      rrc = ra;\
279      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
280      }\      }\
281    return ra;\    return ra;\
282    }    }
# Line 291  typedef struct heapframe { Line 311  typedef struct heapframe {
311    
312    BOOL Xcur_is_word;    BOOL Xcur_is_word;
313    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
314    BOOL Xprev_is_word;    BOOL Xprev_is_word;
315    
316    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 322  typedef struct heapframe {
322    int Xprop_category;    int Xprop_category;
323    int Xprop_chartype;    int Xprop_chartype;
324    int Xprop_script;    int Xprop_script;
325    int *Xprop_test_variable;    int Xoclength;
326      uschar Xocchars[8];
327  #endif  #endif
328    
329    int Xctype;    int Xctype;
330    int Xfc;    unsigned int Xfc;
331    int Xfi;    int Xfi;
332    int Xlength;    int Xlength;
333    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 341  typedef struct heapframe {
341    
342    eptrblock Xnewptrb;    eptrblock Xnewptrb;
343    
344    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
345    
346    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
347    
348  } heapframe;  } heapframe;
349    
# Line 340  typedef struct heapframe { Line 359  typedef struct heapframe {
359  *         Match from current position            *  *         Match from current position            *
360  *************************************************/  *************************************************/
361    
362  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
363  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
364  same response.  same response.
365    
# Line 353  performance. Tests using gcc on a SPARC Line 369  performance. Tests using gcc on a SPARC
369  made performance worse.  made performance worse.
370    
371  Arguments:  Arguments:
372     eptr        pointer in subject     eptr        pointer to current character in subject
373     ecode       position in code     ecode       pointer to current position in compiled code
374     offset_top  current top pointer     offset_top  current top pointer
375     md          pointer to "static" info for the match     md          pointer to "static" info for the match
376     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 378  Arguments:
378                   brackets - for testing for empty matches                   brackets - for testing for empty matches
379     flags       can contain     flags       can contain
380                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
381                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
382                       group that can match an empty string
383                     match_tail_recursed - this is a tail_recursed group
384     rdepth      the recursion depth     rdepth      the recursion depth
385    
386  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 377  match(REGISTER USPTR eptr, REGISTER cons Line 395  match(REGISTER USPTR eptr, REGISTER cons
395    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
396  {  {
397  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
398  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
399  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
400    
401  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
402  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
403  register unsigned int  c;  /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
404  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
405    
406    BOOL minimize, possessive; /* Quantifier options */
407    
408  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
409  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
410  heap storage. Set up the top-level frame here; others are obtained from the  heap storage. Set up the top-level frame here; others are obtained from the
# Line 434  HEAP_RECURSE: Line 454  HEAP_RECURSE:
454    
455  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
456  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
457  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
458    
459  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 465  HEAP_RECURSE:
465  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
466  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
467  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
468  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
469    #define occhars            frame->Xocchars
470  #endif  #endif
471    
472  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 490  HEAP_RECURSE:
490  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
491  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
492    
493  #else  #else         /* NO_RECURSE not defined */
494  #define fi i  #define fi i
495  #define fc c  #define fc c
496    
# Line 489  recursion_info new_recursive;      /* wi Line 509  recursion_info new_recursive;      /* wi
509                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
510  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
511  BOOL condition;  BOOL condition;
 BOOL minimize;  
512  BOOL prev_is_word;  BOOL prev_is_word;
513    
514  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 520  int prop_fail_result;
520  int prop_category;  int prop_category;
521  int prop_chartype;  int prop_chartype;
522  int prop_script;  int prop_script;
523  int *prop_test_variable;  int oclength;
524    uschar occhars[8];
525  #endif  #endif
526    
527  int ctype;  int ctype;
# Line 516  int save_offset1, save_offset2, save_off Line 536  int save_offset1, save_offset2, save_off
536  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
537    
538  eptrblock newptrb;  eptrblock newptrb;
539  #endif  #endif     /* NO_RECURSE */
540    
541  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
542  variables. */  variables. */
# Line 524  variables. */ Line 544  variables. */
544  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
545  prop_value = 0;  prop_value = 0;
546  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
547  #endif  #endif
548    
549    
550  /* This label is used for tail recursion, which is used in a few cases even  /* This label is used for tail recursion, which is used in a few cases even
551  when NO_RECURSE is not defined, in order to reduce the amount of stack that is  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
552  used. Thanks to Ian Taylor for noticing this possibility and sending the  used. Thanks to Ian Taylor for noticing this possibility and sending the
# Line 542  defined). However, RMATCH isn't like a f Line 562  defined). However, RMATCH isn't like a f
562  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
563  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
564    
565    #ifdef SUPPORT_UTF8
566    utf8 = md->utf8;       /* Local copy of the flag */
567    #else
568    utf8 = FALSE;
569    #endif
570    
571  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
572  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
573    
# Line 550  if (rdepth >= md->match_limit_recursion) Line 576  if (rdepth >= md->match_limit_recursion)
576    
577  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
578    
579  #ifdef SUPPORT_UTF8  /* At the start of a group with an unlimited repeat that may match an empty
580  utf8 = md->utf8;       /* Local copy of the flag */  string, the match_cbegroup flag is set. When this is the case, add the current
581  #else  subject pointer to the chain of such remembered pointers, to be checked when we
582  utf8 = FALSE;  hit the closing ket, in order to break infinite loops that match no characters.
583  #endif  When match() is called in other circumstances, don't add to the chain. If this
584    is a tail recursion, use a block from the workspace, as the one on the stack is
585  /* At the start of a bracketed group, add the current subject pointer to the  already used. */
 stack of such pointers, to be re-instated at the end of the group when we hit  
 the closing ket. When match() is called in other circumstances, we don't add to  
 this stack. */  
586    
587  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
588    {    {
589    newptrb.epb_prev = eptrb;    eptrblock *p;
590    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
591    eptrb = &newptrb;      {
592        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
593        p = md->eptrchain + md->eptrn++;
594        }
595      else p = &newptrb;
596      p->epb_saved_eptr = eptr;
597      p->epb_prev = eptrb;
598      eptrb = p;
599    }    }
600    
601  /* Now start processing the operations. */  /* Now start processing the opcodes. */
602    
603  for (;;)  for (;;)
604    {    {
605      minimize = possessive = FALSE;
606    op = *ecode;    op = *ecode;
   minimize = FALSE;  
607    
608    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
609    matching at least one subject character. */    matching at least one subject character. */
# Line 583  for (;;) Line 613  for (;;)
613        eptr > md->start_match)        eptr > md->start_match)
614      md->hitend = TRUE;      md->hitend = TRUE;
615    
616    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
617      {      {
618      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
619        the current subject position in the working slot at the top of the vector.
620      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
621      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
622        reference inside the group.
623      if (number > EXTRACT_BASIC_MAX)  
624        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
625        values of the final offsets, in case they were set by a previous iteration
626        of the same bracket.
627    
628        If there isn't enough space in the offset vector, treat this as if it were
629        a non-capturing bracket. Don't worry about setting the flag for the error
630        case here; that is handled in the code for KET. */
631    
632        case OP_CBRA:
633        case OP_SCBRA:
634        number = GET2(ecode, 1+LINK_SIZE);
635      offset = number << 1;      offset = number << 1;
636    
637  #ifdef DEBUG  #ifdef DEBUG
638      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
639        printf("subject=");
640      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
641      printf("\n");      printf("\n");
642  #endif  #endif
# Line 624  for (;;) Line 651  for (;;)
651        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
652        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
653    
654          flags = (op == OP_SCBRA)? match_cbegroup : 0;
655        do        do
656          {          {
657          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
658            match_isgroup);            ims, eptrb, flags, RM1);
659          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
660          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
661          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 643  for (;;) Line 671  for (;;)
671        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
672        }        }
673    
674      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
675        bracket. */
676    
677      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
   
   /* Other types of node can be handled by a switch */  
   
   switch(op)  
     {  
     case OP_BRA:     /* Non-capturing bracket: optimized */  
     DPRINTF(("start bracket 0\n"));  
   
     /* Loop for all the alternatives */  
678    
679        /* Non-capturing bracket. Loop for all the alternatives. When we get to the
680        final alternative within the brackets, we would return the result of a
681        recursive call to match() whatever happened. We can reduce stack usage by
682        turning this into a tail recursion. */
683    
684        case OP_BRA:
685        case OP_SBRA:
686        DPRINTF(("start non-capturing bracket\n"));
687        flags = (op >= OP_SBRA)? match_cbegroup : 0;
688      for (;;)      for (;;)
689        {        {
       /* When we get to the final alternative within the brackets, we would  
       return the result of a recursive call to match() whatever happened. We  
       can reduce stack usage by turning this into a tail recursion. */  
   
690        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)
691         {          {
692         ecode += 1 + LINK_SIZE;          ecode += _pcre_OP_lengths[*ecode];
693         flags = match_isgroup;          flags |= match_tail_recursed;
694         DPRINTF(("bracket 0 tail recursion\n"));          DPRINTF(("bracket 0 tail recursion\n"));
695         goto TAIL_RECURSE;          goto TAIL_RECURSE;
696         }          }
697    
698        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
699        otherwise return. */        otherwise return. */
700    
701        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
702          match_isgroup);          eptrb, flags, RM2);
703        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
704        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
705        }        }
# Line 688  for (;;) Line 712  for (;;)
712      obeyed, we can use tail recursion to avoid using another stack frame. */      obeyed, we can use tail recursion to avoid using another stack frame. */
713    
714      case OP_COND:      case OP_COND:
715      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
716        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
717          {
718          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
719          condition = md->recursive != NULL &&
720            (offset == RREF_ANY || offset == md->recursive->group_num);
721          ecode += condition? 3 : GET(ecode, 1);
722          }
723    
724        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
725        {        {
726        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
727        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
728          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
729          (offset < offset_top && md->offset_vector[offset] >= 0);        }
730        ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));  
731        flags = match_isgroup;      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
732        goto TAIL_RECURSE;        {
733          condition = FALSE;
734          ecode += GET(ecode, 1);
735        }        }
736    
737      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
738      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
739        assertion. */
740    
741      else      else
742        {        {
743        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
744            match_condassert | match_isgroup);            match_condassert, RM3);
745        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
746          {          {
747          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
748            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
749          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
750          }          }
751        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
752          {          {
753          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
754          }          }
755        else ecode += GET(ecode, 1);        else
756            {
757            condition = FALSE;
758            ecode += GET(ecode, 1);
759            }
760          }
761    
762        /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
763        we can use tail recursion to avoid using another stack frame. */      we can use tail recursion to avoid using another stack frame. If the second
764        alternative doesn't exist, we can just plough on. */
765    
766        if (condition || *ecode == OP_ALT)
767          {
768        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
769        flags = match_isgroup;        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
770        goto TAIL_RECURSE;        goto TAIL_RECURSE;
771        }        }
772      /* Control never reaches here */      else
773          {
774      /* Skip over conditional reference or large extraction number data if        ecode += 1 + LINK_SIZE;
775      encountered. */        }
   
     case OP_CREF:  
     case OP_BRANUMBER:  
     ecode += 3;  
776      break;      break;
777    
778      /* End of the pattern. If we are in a recursion, we should restore the  
779      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
780        restore the offsets appropriately and continue from after the call. */
781    
782      case OP_END:      case OP_END:
783      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
# Line 777  for (;;) Line 819  for (;;)
819      case OP_ASSERTBACK:      case OP_ASSERTBACK:
820      do      do
821        {        {
822        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
823          match_isgroup);          RM4);
824        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
825        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
826        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 804  for (;;) Line 846  for (;;)
846      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
847      do      do
848        {        {
849        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
850          match_isgroup);          RM5);
851        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
852        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
853        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 826  for (;;) Line 868  for (;;)
868  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
869      if (utf8)      if (utf8)
870        {        {
871        c = GET(ecode,1);        i = GET(ecode, 1);
872        for (i = 0; i < c; i++)        while (i-- > 0)
873          {          {
874          eptr--;          eptr--;
875          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 840  for (;;) Line 882  for (;;)
882      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
883    
884        {        {
885        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
886        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
887        }        }
888    
# Line 897  for (;;) Line 939  for (;;)
939      case OP_RECURSE:      case OP_RECURSE:
940        {        {
941        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
942        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
943            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
944    
945        /* Add to "recursing stack" */        /* Add to "recursing stack" */
946    
# Line 936  for (;;) Line 973  for (;;)
973        restore the offset and recursion data. */        restore the offset and recursion data. */
974    
975        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
976          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
977        do        do
978          {          {
979          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
980              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
981          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
982            {            {
983            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 982  for (;;) Line 1020  for (;;)
1020    
1021      do      do
1022        {        {
1023        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1024          eptrb, match_isgroup);          eptrb, 0, RM7);
1025        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1026        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1027        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 997  for (;;) Line 1035  for (;;)
1035      /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1036      mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1037    
1038      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1039    
1040      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1041      eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
# Line 1028  for (;;) Line 1066  for (;;)
1066    
1067      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1068        {        {
1069        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1070            RM8);
1071        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1072        ecode = prev;        ecode = prev;
1073        flags = match_isgroup;        flags = match_tail_recursed;
1074        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1075        }        }
1076      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1077        {        {
1078        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1079        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1080        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1081        flags = 0;        flags = match_tail_recursed;
1082        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1083        }        }
1084      /* Control never gets here */      /* Control never gets here */
# Line 1060  for (;;) Line 1099  for (;;)
1099      case OP_BRAZERO:      case OP_BRAZERO:
1100        {        {
1101        next = ecode+1;        next = ecode+1;
1102        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1103        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1104        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1105        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1106        }        }
1107      break;      break;
1108    
1109      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1110        {        {
1111        next = ecode+1;        next = ecode+1;
1112        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1113        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1114        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1115        ecode++;        ecode++;
1116        }        }
1117      break;      break;
1118    
1119      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1120    
1121      case OP_KET:      case OP_KET:
1122      case OP_KETRMIN:      case OP_KETRMIN:
1123      case OP_KETRMAX:      case OP_KETRMAX:
1124      prev = ecode - GET(ecode, 1);      prev = ecode - GET(ecode, 1);
     saved_eptr = eptrb->epb_saved_eptr;  
1125    
1126      /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1127        infinite repeats of empty string matches, retrieve the subject start from
1128        the chain. Otherwise, set it NULL. */
1129    
1130      eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1131          {
1132          saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1133          eptrb = eptrb->epb_prev;              /* Backup to previous group */
1134          }
1135        else saved_eptr = NULL;
1136    
1137        /* If we are at the end of an assertion group, stop matching and return
1138        MATCH_MATCH, but record the current high water mark for use by positive
1139        assertions. Do this also for the "once" (atomic) groups. */
1140    
1141      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1142          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1102  for (;;) Line 1147  for (;;)
1147        RRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);
1148        }        }
1149    
1150      /* In all other cases except a conditional group we have to check the      /* For capturing groups we have to check the group number back at the start
1151      group number back at the start and if necessary complete handling an      and if necessary complete handling an extraction by setting the offsets and
1152      extraction by setting the offsets and bumping the high water mark. */      bumping the high water mark. Note that whole-pattern recursion is coded as
1153        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1154        when the OP_END is reached. Other recursion is handled here. */
1155    
1156      if (*prev != OP_COND)      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1157        {        {
1158        number = *prev - OP_BRA;        number = GET2(prev, 1+LINK_SIZE);
   
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);  
1159        offset = number << 1;        offset = number << 1;
1160    
1161  #ifdef DEBUG  #ifdef DEBUG
# Line 1121  for (;;) Line 1163  for (;;)
1163        printf("\n");        printf("\n");
1164  #endif  #endif
1165    
1166        /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1167        of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
       into group 0, so it won't be picked up here. Instead, we catch it when  
       the OP_END is reached. */  
   
       if (number > 0)  
1168          {          {
1169          md->capture_last = number;          md->offset_vector[offset] =
1170          if (offset >= md->offset_max) md->offset_overflow = TRUE; else            md->offset_vector[md->offset_end - number];
1171            {          md->offset_vector[offset+1] = eptr - md->start_subject;
1172            md->offset_vector[offset] =          if (offset_top <= offset) offset_top = offset + 2;
1173              md->offset_vector[md->offset_end - number];          }
1174            md->offset_vector[offset+1] = eptr - md->start_subject;  
1175            if (offset_top <= offset) offset_top = offset + 2;        /* Handle a recursively called group. Restore the offsets
1176            }        appropriately and continue from after the call. */
1177    
1178          /* Handle a recursively called group. Restore the offsets        if (md->recursive != NULL && md->recursive->group_num == number)
1179          appropriately and continue from after the call. */          {
1180            recursion_info *rec = md->recursive;
1181          if (md->recursive != NULL && md->recursive->group_num == number)          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1182            {          md->recursive = rec->prevrec;
1183            recursion_info *rec = md->recursive;          md->start_match = rec->save_start;
1184            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          memcpy(md->offset_vector, rec->offset_save,
1185            md->recursive = rec->prevrec;            rec->saved_max * sizeof(int));
1186            md->start_match = rec->save_start;          ecode = rec->after_call;
1187            memcpy(md->offset_vector, rec->offset_save,          ims = original_ims;
1188              rec->saved_max * sizeof(int));          break;
           ecode = rec->after_call;  
           ims = original_ims;  
           break;  
           }  
1189          }          }
1190        }        }
1191    
1192      /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1193      the group. */      flags, in case they got changed during the group. */
1194    
1195      ims = original_ims;      ims = original_ims;
1196      DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
# Line 1177  for (;;) Line 1211  for (;;)
1211      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1212      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame. */
1213    
1214        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1215    
1216      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1217        {        {
1218        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1219            RM12);
1220        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1221        ecode = prev;        ecode = prev;
1222        flags = match_isgroup;        flags |= match_tail_recursed;
1223        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1224        }        }
1225      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1226        {        {
1227        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1228        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1229        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1230        flags = 0;        flags = match_tail_recursed;
1231        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1232        }        }
1233      /* Control never gets here */      /* Control never gets here */
# Line 1202  for (;;) Line 1239  for (;;)
1239      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1240        {        {
1241        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1242            (eptr == md->end_subject ||            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
            eptr < md->start_subject + md->nllen ||  
            !IS_NEWLINE(eptr - md->nllen)))  
1243          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1244        ecode++;        ecode++;
1245        break;        break;
# Line 1244  for (;;) Line 1279  for (;;)
1279        if (!md->endonly)        if (!md->endonly)
1280          {          {
1281          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1282              (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1283            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1284          ecode++;          ecode++;
1285          break;          break;
# Line 1263  for (;;) Line 1298  for (;;)
1298    
1299      case OP_EODN:      case OP_EODN:
1300      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1301          (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1302        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1303      ecode++;      ecode++;
1304      break;      break;
# Line 1319  for (;;) Line 1354  for (;;)
1354      case OP_ANY:      case OP_ANY:
1355      if ((ims & PCRE_DOTALL) == 0)      if ((ims & PCRE_DOTALL) == 0)
1356        {        {
1357        if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
         RRETURN(MATCH_NOMATCH);  
1358        }        }
1359      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1360      if (utf8)      if (utf8)
# Line 1414  for (;;) Line 1448  for (;;)
1448      ecode++;      ecode++;
1449      break;      break;
1450    
1451        case OP_ANYNL:
1452        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1453        GETCHARINCTEST(c, eptr);
1454        switch(c)
1455          {
1456          default: RRETURN(MATCH_NOMATCH);
1457          case 0x000d:
1458          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1459          break;
1460          case 0x000a:
1461          case 0x000b:
1462          case 0x000c:
1463          case 0x0085:
1464          case 0x2028:
1465          case 0x2029:
1466          break;
1467          }
1468        ecode++;
1469        break;
1470    
1471  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1472      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1473      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1456  for (;;) Line 1510  for (;;)
1510    
1511          default:          default:
1512          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
1513          }          }
1514    
1515        ecode += 3;        ecode += 3;
# Line 1570  for (;;) Line 1623  for (;;)
1623          {          {
1624          for (fi = min;; fi++)          for (fi = min;; fi++)
1625            {            {
1626            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1627            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1628            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1629              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1591  for (;;) Line 1644  for (;;)
1644            }            }
1645          while (eptr >= pp)          while (eptr >= pp)
1646            {            {
1647            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1648            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1649            eptr -= length;            eptr -= length;
1650            }            }
# Line 1696  for (;;) Line 1749  for (;;)
1749            {            {
1750            for (fi = min;; fi++)            for (fi = min;; fi++)
1751              {              {
1752              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1753              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1754              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1755              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1716  for (;;) Line 1769  for (;;)
1769            {            {
1770            for (fi = min;; fi++)            for (fi = min;; fi++)
1771              {              {
1772              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1773              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1774              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1775              c = *eptr++;              c = *eptr++;
# Line 1753  for (;;) Line 1806  for (;;)
1806              }              }
1807            for (;;)            for (;;)
1808              {              {
1809              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1810              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1811              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1812              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1772  for (;;) Line 1825  for (;;)
1825              }              }
1826            while (eptr >= pp)            while (eptr >= pp)
1827              {              {
1828              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1829              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1830              eptr--;              eptr--;
1831              }              }
# Line 1843  for (;;) Line 1896  for (;;)
1896          {          {
1897          for (fi = min;; fi++)          for (fi = min;; fi++)
1898            {            {
1899            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
1900            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1901            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1902            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1867  for (;;) Line 1920  for (;;)
1920            }            }
1921          for(;;)          for(;;)
1922            {            {
1923            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
1924            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1925            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
1926            BACKCHAR(eptr)            BACKCHAR(eptr)
# Line 1926  for (;;) Line 1979  for (;;)
1979    
1980        else        else
1981          {          {
1982          int dc;          unsigned int dc;
1983          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1984          ecode += length;          ecode += length;
1985    
# Line 1953  for (;;) Line 2006  for (;;)
2006        }        }
2007      break;      break;
2008    
2009      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2010    
2011      case OP_EXACT:      case OP_EXACT:
2012      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2013      ecode += 3;      ecode += 3;
2014      goto REPEATCHAR;      goto REPEATCHAR;
2015    
2016        case OP_POSUPTO:
2017        possessive = TRUE;
2018        /* Fall through */
2019    
2020      case OP_UPTO:      case OP_UPTO:
2021      case OP_MINUPTO:      case OP_MINUPTO:
2022      min = 0;      min = 0;
# Line 1968  for (;;) Line 2025  for (;;)
2025      ecode += 3;      ecode += 3;
2026      goto REPEATCHAR;      goto REPEATCHAR;
2027    
2028        case OP_POSSTAR:
2029        possessive = TRUE;
2030        min = 0;
2031        max = INT_MAX;
2032        ecode++;
2033        goto REPEATCHAR;
2034    
2035        case OP_POSPLUS:
2036        possessive = TRUE;
2037        min = 1;
2038        max = INT_MAX;
2039        ecode++;
2040        goto REPEATCHAR;
2041    
2042        case OP_POSQUERY:
2043        possessive = TRUE;
2044        min = 0;
2045        max = 1;
2046        ecode++;
2047        goto REPEATCHAR;
2048    
2049      case OP_STAR:      case OP_STAR:
2050      case OP_MINSTAR:      case OP_MINSTAR:
2051      case OP_PLUS:      case OP_PLUS:
# Line 1999  for (;;) Line 2077  for (;;)
2077    
2078        if (length > 1)        if (length > 1)
2079          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2080  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2081          int othercase;          unsigned int othercase;
2082          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2083              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase >= 0)  
2084            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2085            else oclength = 0;
2086  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2087    
2088          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2089            {            {
2090            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2091    #ifdef SUPPORT_UCP
2092            /* Need braces because of following else */            /* Need braces because of following else */
2093            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2094            else            else
# Line 2020  for (;;) Line 2096  for (;;)
2096              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2097              eptr += oclength;              eptr += oclength;
2098              }              }
2099    #else   /* without SUPPORT_UCP */
2100              else { RRETURN(MATCH_NOMATCH); }
2101    #endif  /* SUPPORT_UCP */
2102            }            }
2103    
2104          if (min == max) continue;          if (min == max) continue;
# Line 2028  for (;;) Line 2107  for (;;)
2107            {            {
2108            for (fi = min;; fi++)            for (fi = min;; fi++)
2109              {              {
2110              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2111              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2112              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2113              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2114    #ifdef SUPPORT_UCP
2115              /* Need braces because of following else */              /* Need braces because of following else */
2116              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2117              else              else
# Line 2039  for (;;) Line 2119  for (;;)
2119                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2120                eptr += oclength;                eptr += oclength;
2121                }                }
2122    #else   /* without SUPPORT_UCP */
2123                else { RRETURN (MATCH_NOMATCH); }
2124    #endif  /* SUPPORT_UCP */
2125              }              }
2126            /* Control never gets here */            /* Control never gets here */
2127            }            }
2128          else  
2129            else  /* Maximize */
2130            {            {
2131            pp = eptr;            pp = eptr;
2132            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2133              {              {
2134              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2135              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2136    #ifdef SUPPORT_UCP
2137              else if (oclength == 0) break;              else if (oclength == 0) break;
2138              else              else
2139                {                {
2140                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2141                eptr += oclength;                eptr += oclength;
2142                }                }
2143    #else   /* without SUPPORT_UCP */
2144                else break;
2145    #endif  /* SUPPORT_UCP */
2146              }              }
2147            while (eptr >= pp)  
2148              if (possessive) continue;
2149              for(;;)
2150             {             {
2151             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2152             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2153               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2154    #ifdef SUPPORT_UCP
2155               eptr--;
2156               BACKCHAR(eptr);
2157    #else   /* without SUPPORT_UCP */
2158             eptr -= length;             eptr -= length;
2159    #endif  /* SUPPORT_UCP */
2160             }             }
           RRETURN(MATCH_NOMATCH);  
2161            }            }
2162          /* Control never gets here */          /* Control never gets here */
2163          }          }
# Line 2102  for (;;) Line 2197  for (;;)
2197          {          {
2198          for (fi = min;; fi++)          for (fi = min;; fi++)
2199            {            {
2200            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2201            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2202            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2203                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2110  for (;;) Line 2205  for (;;)
2205            }            }
2206          /* Control never gets here */          /* Control never gets here */
2207          }          }
2208        else        else  /* Maximize */
2209          {          {
2210          pp = eptr;          pp = eptr;
2211          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2118  for (;;) Line 2213  for (;;)
2213            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2214            eptr++;            eptr++;
2215            }            }
2216            if (possessive) continue;
2217          while (eptr >= pp)          while (eptr >= pp)
2218            {            {
2219            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2220            eptr--;            eptr--;
2221            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2222            }            }
# Line 2139  for (;;) Line 2235  for (;;)
2235          {          {
2236          for (fi = min;; fi++)          for (fi = min;; fi++)
2237            {            {
2238            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2239            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2240            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2241              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2242            }            }
2243          /* Control never gets here */          /* Control never gets here */
2244          }          }
2245        else        else  /* Maximize */
2246          {          {
2247          pp = eptr;          pp = eptr;
2248          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2154  for (;;) Line 2250  for (;;)
2250            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2251            eptr++;            eptr++;
2252            }            }
2253            if (possessive) continue;
2254          while (eptr >= pp)          while (eptr >= pp)
2255            {            {
2256            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2257            eptr--;            eptr--;
2258            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2259            }            }
# Line 2206  for (;;) Line 2303  for (;;)
2303      ecode += 3;      ecode += 3;
2304      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2305    
2306        case OP_NOTPOSSTAR:
2307        possessive = TRUE;
2308        min = 0;
2309        max = INT_MAX;
2310        ecode++;
2311        goto REPEATNOTCHAR;
2312    
2313        case OP_NOTPOSPLUS:
2314        possessive = TRUE;
2315        min = 1;
2316        max = INT_MAX;
2317        ecode++;
2318        goto REPEATNOTCHAR;
2319    
2320        case OP_NOTPOSQUERY:
2321        possessive = TRUE;
2322        min = 0;
2323        max = 1;
2324        ecode++;
2325        goto REPEATNOTCHAR;
2326    
2327        case OP_NOTPOSUPTO:
2328        possessive = TRUE;
2329        min = 0;
2330        max = GET2(ecode, 1);
2331        ecode += 3;
2332        goto REPEATNOTCHAR;
2333    
2334      case OP_NOTSTAR:      case OP_NOTSTAR:
2335      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2336      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2245  for (;;) Line 2370  for (;;)
2370        /* UTF-8 mode */        /* UTF-8 mode */
2371        if (utf8)        if (utf8)
2372          {          {
2373          register int d;          register unsigned int d;
2374          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2375            {            {
2376            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2270  for (;;) Line 2395  for (;;)
2395          /* UTF-8 mode */          /* UTF-8 mode */
2396          if (utf8)          if (utf8)
2397            {            {
2398            register int d;            register unsigned int d;
2399            for (fi = min;; fi++)            for (fi = min;; fi++)
2400              {              {
2401              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2402              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2403              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2404              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2287  for (;;) Line 2412  for (;;)
2412            {            {
2413            for (fi = min;; fi++)            for (fi = min;; fi++)
2414              {              {
2415              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2416              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2417              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2418                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2306  for (;;) Line 2431  for (;;)
2431          /* UTF-8 mode */          /* UTF-8 mode */
2432          if (utf8)          if (utf8)
2433            {            {
2434            register int d;            register unsigned int d;
2435            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2436              {              {
2437              int len = 1;              int len = 1;
# Line 2316  for (;;) Line 2441  for (;;)
2441              if (fc == d) break;              if (fc == d) break;
2442              eptr += len;              eptr += len;
2443              }              }
2444            for(;;)          if (possessive) continue;
2445            for(;;)
2446              {              {
2447              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2448              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2449              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2450              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2333  for (;;) Line 2459  for (;;)
2459              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2460              eptr++;              eptr++;
2461              }              }
2462              if (possessive) continue;
2463            while (eptr >= pp)            while (eptr >= pp)
2464              {              {
2465              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2466              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2467              eptr--;              eptr--;
2468              }              }
# Line 2354  for (;;) Line 2481  for (;;)
2481        /* UTF-8 mode */        /* UTF-8 mode */
2482        if (utf8)        if (utf8)
2483          {          {
2484          register int d;          register unsigned int d;
2485          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2486            {            {
2487            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2377  for (;;) Line 2504  for (;;)
2504          /* UTF-8 mode */          /* UTF-8 mode */
2505          if (utf8)          if (utf8)
2506            {            {
2507            register int d;            register unsigned int d;
2508            for (fi = min;; fi++)            for (fi = min;; fi++)
2509              {              {
2510              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2511              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2512              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2513              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2393  for (;;) Line 2520  for (;;)
2520            {            {
2521            for (fi = min;; fi++)            for (fi = min;; fi++)
2522              {              {
2523              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2524              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2525              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2526                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2412  for (;;) Line 2539  for (;;)
2539          /* UTF-8 mode */          /* UTF-8 mode */
2540          if (utf8)          if (utf8)
2541            {            {
2542            register int d;            register unsigned int d;
2543            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2544              {              {
2545              int len = 1;              int len = 1;
# Line 2421  for (;;) Line 2548  for (;;)
2548              if (fc == d) break;              if (fc == d) break;
2549              eptr += len;              eptr += len;
2550              }              }
2551              if (possessive) continue;
2552            for(;;)            for(;;)
2553              {              {
2554              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2555              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2556              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2557              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2438  for (;;) Line 2566  for (;;)
2566              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2567              eptr++;              eptr++;
2568              }              }
2569              if (possessive) continue;
2570            while (eptr >= pp)            while (eptr >= pp)
2571              {              {
2572              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2573              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574              eptr--;              eptr--;
2575              }              }
# Line 2469  for (;;) Line 2598  for (;;)
2598      ecode += 3;      ecode += 3;
2599      goto REPEATTYPE;      goto REPEATTYPE;
2600    
2601        case OP_TYPEPOSSTAR:
2602        possessive = TRUE;
2603        min = 0;
2604        max = INT_MAX;
2605        ecode++;
2606        goto REPEATTYPE;
2607    
2608        case OP_TYPEPOSPLUS:
2609        possessive = TRUE;
2610        min = 1;
2611        max = INT_MAX;
2612        ecode++;
2613        goto REPEATTYPE;
2614    
2615        case OP_TYPEPOSQUERY:
2616        possessive = TRUE;
2617        min = 0;
2618        max = 1;
2619        ecode++;
2620        goto REPEATTYPE;
2621    
2622        case OP_TYPEPOSUPTO:
2623        possessive = TRUE;
2624        min = 0;
2625        max = GET2(ecode, 1);
2626        ecode += 3;
2627        goto REPEATTYPE;
2628    
2629      case OP_TYPESTAR:      case OP_TYPESTAR:
2630      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2631      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2571  for (;;) Line 2728  for (;;)
2728    
2729            default:            default:
2730            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
2731            }            }
2732          }          }
2733    
# Line 2611  for (;;) Line 2767  for (;;)
2767          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2768            {            {
2769            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2770                 ((ims & PCRE_DOTALL) == 0 &&                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
                  eptr <= md->end_subject - md->nllen &&  
                  IS_NEWLINE(eptr)))  
2771              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2772            eptr++;            eptr++;
2773            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
# Line 2624  for (;;) Line 2778  for (;;)
2778          eptr += min;          eptr += min;
2779          break;          break;
2780    
2781            case OP_ANYNL:
2782            for (i = 1; i <= min; i++)
2783              {
2784              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2785              GETCHARINC(c, eptr);
2786              switch(c)
2787                {
2788                default: RRETURN(MATCH_NOMATCH);
2789                case 0x000d:
2790                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2791                break;
2792                case 0x000a:
2793                case 0x000b:
2794                case 0x000c:
2795                case 0x0085:
2796                case 0x2028:
2797                case 0x2029:
2798                break;
2799                }
2800              }
2801            break;
2802    
2803          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2804          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2805            {            {
# Line 2692  for (;;) Line 2868  for (;;)
2868  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2869    
2870        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2871        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2872          number of bytes present, as this was tested above. */
2873    
2874        switch(ctype)        switch(ctype)
2875          {          {
# Line 2701  for (;;) Line 2878  for (;;)
2878            {            {
2879            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2880              {              {
2881              if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2882              eptr++;              eptr++;
2883              }              }
2884            }            }
# Line 2713  for (;;) Line 2889  for (;;)
2889          eptr += min;          eptr += min;
2890          break;          break;
2891    
2892            /* Because of the CRLF case, we can't assume the minimum number of
2893            bytes are present in this case. */
2894    
2895            case OP_ANYNL:
2896            for (i = 1; i <= min; i++)
2897              {
2898              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2899              switch(*eptr++)
2900                {
2901                default: RRETURN(MATCH_NOMATCH);
2902                case 0x000d:
2903                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2904                break;
2905                case 0x000a:
2906                case 0x000b:
2907                case 0x000c:
2908                case 0x0085:
2909                break;
2910                }
2911              }
2912            break;
2913    
2914          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2915          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2916            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2768  for (;;) Line 2966  for (;;)
2966            case PT_ANY:            case PT_ANY:
2967            for (fi = min;; fi++)            for (fi = min;; fi++)
2968              {              {
2969              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
2970              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2971              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2972              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2973              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2974              }              }
2975            break;            /* Control never gets here */
2976    
2977            case PT_LAMP:            case PT_LAMP:
2978            for (fi = min;; fi++)            for (fi = min;; fi++)
2979              {              {
2980              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
2981              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2982              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2983              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2789  for (;;) Line 2987  for (;;)
2987                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
2988                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2989              }              }
2990            break;            /* Control never gets here */
2991    
2992            case PT_GC:            case PT_GC:
2993            for (fi = min;; fi++)            for (fi = min;; fi++)
2994              {              {
2995              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
2996              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2997              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2998              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2802  for (;;) Line 3000  for (;;)
3000              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3001                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3002              }              }
3003            break;            /* Control never gets here */
3004    
3005            case PT_PC:            case PT_PC:
3006            for (fi = min;; fi++)            for (fi = min;; fi++)
3007              {              {
3008              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3009              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3010              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3011              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2815  for (;;) Line 3013  for (;;)
3013              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3014                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3015              }              }
3016            break;            /* Control never gets here */
3017    
3018            case PT_SC:            case PT_SC:
3019            for (fi = min;; fi++)            for (fi = min;; fi++)
3020              {              {
3021              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3022              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3023              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3024              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2828  for (;;) Line 3026  for (;;)
3026              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3027                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3028              }              }
3029            break;            /* Control never gets here */
3030    
3031            default:            default:
3032            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3033            }            }
3034          }          }
3035    
# Line 2843  for (;;) Line 3040  for (;;)
3040          {          {
3041          for (fi = min;; fi++)          for (fi = min;; fi++)
3042            {            {
3043            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3044            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3045            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3046            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
# Line 2872  for (;;) Line 3069  for (;;)
3069          {          {
3070          for (fi = min;; fi++)          for (fi = min;; fi++)
3071            {            {
3072            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3073            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3074            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3075                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3076                  eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))                  IS_NEWLINE(eptr)))
3077              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3078    
3079            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 2888  for (;;) Line 3085  for (;;)
3085              case OP_ANYBYTE:              case OP_ANYBYTE:
3086              break;              break;
3087    
3088                case OP_ANYNL:
3089                switch(c)
3090                  {
3091                  default: RRETURN(MATCH_NOMATCH);
3092                  case 0x000d:
3093                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3094                  break;
3095                  case 0x000a:
3096                  case 0x000b:
3097                  case 0x000c:
3098                  case 0x0085:
3099                  case 0x2028:
3100                  case 0x2029:
3101                  break;
3102                  }
3103                break;
3104    
3105              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3106              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3107                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2929  for (;;) Line 3143  for (;;)
3143          {          {
3144          for (fi = min;; fi++)          for (fi = min;; fi++)
3145            {            {
3146            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3147            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3148            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3149                 ((ims & PCRE_DOTALL) == 0 &&                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
                  eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
3150              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3151    
3152            c = *eptr++;            c = *eptr++;
# Line 2945  for (;;) Line 3158  for (;;)
3158              case OP_ANYBYTE:              case OP_ANYBYTE:
3159              break;              break;
3160    
3161                case OP_ANYNL:
3162                switch(c)
3163                  {
3164                  default: RRETURN(MATCH_NOMATCH);
3165                  case 0x000d:
3166                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3167                  break;
3168                  case 0x000a:
3169                  case 0x000b:
3170                  case 0x000c:
3171                  case 0x0085:
3172                  break;
3173                  }
3174                break;
3175    
3176              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3177              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3178              break;              break;
# Line 2977  for (;;) Line 3205  for (;;)
3205        /* Control never gets here */        /* Control never gets here */
3206        }        }
3207    
3208      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3209      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3210      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3211    
# Line 3058  for (;;) Line 3286  for (;;)
3286    
3287          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3288    
3289            if (possessive) continue;
3290          for(;;)          for(;;)
3291            {            {
3292            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3293            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3294            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3295            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3093  for (;;) Line 3322  for (;;)
3322    
3323          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3324    
3325            if (possessive) continue;
3326          for(;;)          for(;;)
3327            {            {
3328            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3329            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3330            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3331            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 3135  for (;;) Line 3365  for (;;)
3365                {                {
3366                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3367                  {                  {
3368                  if (eptr >= md->end_subject ||                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                     (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
                   break;  
3369                  eptr++;                  eptr++;
3370                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3371                  }                  }
# Line 3161  for (;;) Line 3389  for (;;)
3389                {                {
3390                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3391                  {                  {
3392                  if (eptr >= md->end_subject ||                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                     (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
                   break;  
3393                  eptr++;                  eptr++;
3394                  }                  }
3395                break;                break;
# Line 3171  for (;;) Line 3397  for (;;)
3397              else              else
3398                {                {
3399                c = max - min;                c = max - min;
3400                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3401                    c = md->end_subject - eptr;
3402                eptr += c;                eptr += c;
3403                }                }
3404              }              }
# Line 3181  for (;;) Line 3408  for (;;)
3408    
3409            case OP_ANYBYTE:            case OP_ANYBYTE:
3410            c = max - min;            c = max - min;
3411            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3412                c = md->end_subject - eptr;
3413            eptr += c;            eptr += c;
3414            break;            break;
3415    
3416              case OP_ANYNL:
3417              for (i = min; i < max; i++)
3418                {
3419                int len = 1;
3420                if (eptr >= md->end_subject) break;
3421                GETCHARLEN(c, eptr, len);
3422                if (c == 0x000d)
3423                  {
3424                  if (++eptr >= md->end_subject) break;
3425                  if (*eptr == 0x000a) eptr++;
3426                  }
3427                else
3428                  {
3429                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3430                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3431                    break;
3432                  eptr += len;
3433                  }
3434                }
3435              break;
3436    
3437            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3438            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3439              {              {
# Line 3257  for (;;) Line 3506  for (;;)
3506    
3507          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3508    
3509            if (possessive) continue;
3510          for(;;)          for(;;)
3511            {            {
3512            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3513            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3514            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3515            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3277  for (;;) Line 3527  for (;;)
3527              {              {
3528              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3529                {                {
3530                if (eptr >= md->end_subject ||                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                   (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))  
                 break;  
3531                eptr++;                eptr++;
3532                }                }
3533              break;              break;
# Line 3288  for (;;) Line 3536  for (;;)
3536    
3537            case OP_ANYBYTE:            case OP_ANYBYTE:
3538            c = max - min;            c = max - min;
3539            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3540                c = md->end_subject - eptr;
3541            eptr += c;            eptr += c;
3542            break;            break;
3543    
3544              case OP_ANYNL:
3545              for (i = min; i < max; i++)
3546                {
3547                if (eptr >= md->end_subject) break;
3548                c = *eptr;
3549                if (c == 0x000d)
3550                  {
3551                  if (++eptr >= md->end_subject) break;
3552                  if (*eptr == 0x000a) eptr++;
3553                  }
3554                else
3555                  {
3556                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3557                    break;
3558                  eptr++;
3559                  }
3560                }
3561              break;
3562    
3563            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3564            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3565              {              {
# Line 3352  for (;;) Line 3620  for (;;)
3620    
3621          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3622    
3623            if (possessive) continue;
3624          while (eptr >= pp)          while (eptr >= pp)
3625            {            {
3626            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
3627            eptr--;            eptr--;
3628            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3629            }            }
# Line 3366  for (;;) Line 3635  for (;;)
3635        }        }
3636      /* Control never gets here */      /* Control never gets here */
3637    
3638      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3639      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3640    
3641      default:      default:
3642      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3643      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3644      }      }
3645    
3646    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3382  for (;;) Line 3649  for (;;)
3649    
3650    }             /* End of main loop */    }             /* End of main loop */
3651  /* Control never reaches here */  /* Control never reaches here */
3652    
3653    
3654    /* When compiling to use the heap rather than the stack for recursive calls to
3655    match(), the RRETURN() macro jumps here. The number that is saved in
3656    frame->Xwhere indicates which label we actually want to return to. */
3657    
3658    #ifdef NO_RECURSE
3659    #define LBL(val) case val: goto L_RM##val;
3660    HEAP_RETURN:
3661    switch (frame->Xwhere)
3662      {
3663      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
3664      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
3665      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
3666      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
3667      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
3668      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
3669      default:
3670      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
3671      return PCRE_ERROR_INTERNAL;
3672      }
3673    #undef LBL
3674    #endif  /* NO_RECURSE */
3675  }  }
3676    
3677    
# Line 3411  Undefine all the macros that were define Line 3701  Undefine all the macros that were define
3701    
3702  #undef cur_is_word  #undef cur_is_word
3703  #undef condition  #undef condition
 #undef minimize  
3704  #undef prev_is_word  #undef prev_is_word
3705    
3706  #undef original_ims  #undef original_ims
# Line 3467  Returns:          > 0 => success; value Line 3756  Returns:          > 0 => success; value
3756                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3757  */  */
3758    
3759  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
3760  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3761    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3762    int offsetcount)    int offsetcount)
# Line 3484  BOOL startline; Line 3773  BOOL startline;
3773  BOOL firstline;  BOOL firstline;
3774  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3775  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3776    BOOL utf8;
3777  match_data match_block;  match_data match_block;
3778  match_data *md = &match_block;  match_data *md = &match_block;
3779  const uschar *tables;  const uschar *tables;
# Line 3491  const uschar *start_bits = NULL; Line 3781  const uschar *start_bits = NULL;
3781  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3782  USPTR end_subject;  USPTR end_subject;
3783  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3784    eptrblock eptrchain[EPTR_WORK_SIZE];
3785    
3786  pcre_study_data internal_study;  pcre_study_data internal_study;
3787  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3567  md->end_subject = md->start_subject + le Line 3858  md->end_subject = md->start_subject + le
3858  end_subject = md->end_subject;  end_subject = md->end_subject;
3859    
3860  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3861  md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3862    
3863  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3864  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 3576  md->partial = (options & PCRE_PARTIAL) ! Line 3867  md->partial = (options & PCRE_PARTIAL) !
3867  md->hitend = FALSE;  md->hitend = FALSE;
3868    
3869  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
3870    md->eptrchain = eptrchain;              /* Make workspace generally available */
3871    
3872  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
3873  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
3874    
3875  /* Handle different types of newline. The two bits give four cases. If nothing  /* Handle different types of newline. The three bits give eight cases. If
3876  is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
3877    
3878  switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3879           PCRE_NEWLINE_CRLF)         PCRE_NEWLINE_BITS)
3880    {    {
3881    default:              newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
3882    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
3883    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = '\n'; break;
3884    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
3885         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3886      case PCRE_NEWLINE_ANY: newline = -1; break;
3887      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3888      default: return PCRE_ERROR_BADNEWLINE;
3889    }    }
3890    
3891  if (newline > 255)  if (newline == -2)
3892      {
3893      md->nltype = NLTYPE_ANYCRLF;
3894      }
3895    else if (newline < 0)
3896    {    {
3897    md->nllen = 2;    md->nltype = NLTYPE_ANY;
   md->nl[0] = (newline >> 8) & 255;  
   md->nl[1] = newline & 255;  
3898    }    }
3899  else  else
3900    {    {
3901    md->nllen = 1;    md->nltype = NLTYPE_FIXED;
3902    md->nl[0] = newline;    if (newline > 255)
3903        {
3904        md->nllen = 2;
3905        md->nl[0] = (newline >> 8) & 255;
3906        md->nl[1] = newline & 255;
3907        }
3908      else
3909        {
3910        md->nllen = 1;
3911        md->nl[0] = newline;
3912        }
3913    }    }
3914    
3915  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
# Line 3615  if (md->partial && (re->options & PCRE_N Line 3922  if (md->partial && (re->options & PCRE_N
3922  back the character offset. */  back the character offset. */
3923    
3924  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3925  if (md->utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3926    {    {
3927    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3928      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3707  if ((re->options & PCRE_REQCHSET) != 0) Line 4014  if ((re->options & PCRE_REQCHSET) != 0)
4014    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4015    }    }
4016    
4017    
4018    /* ==========================================================================*/
4019    
4020  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4021  the loop runs just once. */  the loop runs just once. */
4022    
4023  do  for(;;)
4024    {    {
4025    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4026    
# Line 3725  do Line 4035  do
4035    
4036    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
4037    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
4038    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
4039    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
4040    */    the match fails at the newline, later code breaks this loop. */
4041    
4042    if (firstline)    if (firstline)
4043      {      {
4044      USPTR t = start_match;      USPTR t = start_match;
4045      while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4046      end_subject = t;      end_subject = t;
4047      }      }
4048    
# Line 3753  do Line 4063  do
4063    
4064    else if (startline)    else if (startline)
4065      {      {
4066      if (start_match >= md->start_subject + md->nllen +      if (start_match > md->start_subject + start_offset)
           start_offset)  
4067        {        {
4068        while (start_match <= end_subject &&        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4069               !IS_NEWLINE(start_match - md->nllen))          start_match++;
4070    
4071          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4072          and we are now at a LF, advance the match position by one more character.
4073          */
4074    
4075          if (start_match[-1] == '\r' &&
4076               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4077               start_match < end_subject &&
4078               *start_match == '\n')
4079          start_match++;          start_match++;
4080        }        }
4081      }      }
# Line 3793  do Line 4111  do
4111    
4112    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4113    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4114    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4115    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4116    
4117    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4118    */    */
# Line 3826  do Line 4144  do
4144            }            }
4145          }          }
4146    
4147        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4148          forcing a match failure. */
4149    
4150        if (p >= end_subject) break;        if (p >= end_subject)
4151            {
4152            rc = MATCH_NOMATCH;
4153            break;
4154            }
4155    
4156        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4157        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3838  do Line 4161  do
4161        }        }
4162      }      }
4163    
4164    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
4165    
4166    md->start_match = start_match;    md->start_match = start_match;
4167    md->match_call_count = 0;    md->match_call_count = 0;
4168      md->eptrn = 0;                          /* Next free eptrchain slot */
4169      rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4170    
4171    rc = match(start_match, md->start_code, 2, md, ims, NULL, match_isgroup, 0);    /* Any return other than MATCH_NOMATCH breaks the loop. */
4172    
4173    /* When the result is no match, if the subject's first character was a    if (rc != MATCH_NOMATCH) break;
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4174    
4175    if (rc == MATCH_NOMATCH)    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4176      {    newline in the subject (though it may continue over the newline). Therefore,
4177      if (firstline &&    if we have just failed to match, starting at a newline, do not continue. */
4178          start_match <= md->end_subject - md->nllen &&  
4179          IS_NEWLINE(start_match))    if (firstline && IS_NEWLINE(start_match)) break;
4180        break;  
4181      start_match++;    /* Advance the match position by one character. */
4182    
4183      start_match++;
4184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4185      if (md->utf8)    if (utf8)
4186        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4187          start_match++;        start_match++;
4188  #endif  #endif
4189      continue;  
4190      }    /* Break the loop if the pattern is anchored or if we have passed the end of
4191      the subject. */
4192    
4193      if (anchored || start_match > end_subject) break;
4194    
4195      /* If we have just passed a CR and the newline option is CRLF or ANY or
4196      ANYCRLF, and we are now at a LF, advance the match position by one more
4197      character. */
4198    
4199      if (start_match[-1] == '\r' &&
4200           (md->nltype == NLTYPE_ANY ||
4201            md->nltype == NLTYPE_ANYCRLF ||
4202            md->nllen == 2) &&
4203           start_match < end_subject &&
4204           *start_match == '\n')
4205        start_match++;
4206    
4207    if (rc != MATCH_MATCH)    }   /* End of for(;;) "bumpalong" loop */
4208      {  
4209      DPRINTF((">>>> error: returning %d\n", rc));  /* ==========================================================================*/
4210      return rc;  
4211      }  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4212    conditions is true:
4213    
4214    (1) The pattern is anchored;
4215    
4216    (2) We are past the end of the subject;
4217    
4218    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4219        this option requests that a match occur at or before the first newline in
4220        the subject.
4221    
4222    /* We have a match! Copy the offset information from temporary store if  When we have a match and the offset vector is big enough to deal with any
4223    necessary */  backreferences, captured substring offsets will already be set up. In the case
4224    where we had to get some local store to hold offsets for backreference
4225    processing, copy those that we can. In this case there need not be overflow if
4226    certain parts of the pattern were not used, even though there are more
4227    capturing parentheses than vector slots. */
4228    
4229    if (rc == MATCH_MATCH)
4230      {
4231    if (using_temporary_offsets)    if (using_temporary_offsets)
4232      {      {
4233      if (offsetcount >= 4)      if (offsetcount >= 4)
# Line 3889  do Line 4236  do
4236          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4237        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4238        }        }
4239      if (md->end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       md->offset_overflow = TRUE;  
   
4240      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4241      (pcre_free)(md->offset_vector);      (pcre_free)(md->offset_vector);
4242      }      }
4243    
4244      /* Set the return code to the number of captured strings, or 0 if there are
4245      too many to fit into the vector. */
4246    
4247    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
4248    
4249      /* If there is space, set up the whole thing as substring 0. */
4250    
4251    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4252      {      {
4253      offsets[0] = start_match - md->start_subject;      offsets[0] = start_match - md->start_subject;
# Line 3908  do Line 4258  do
4258    return rc;    return rc;
4259    }    }
4260    
4261  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4262    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4263    
4264  if (using_temporary_offsets)  if (using_temporary_offsets)
4265    {    {
# Line 3918  if (using_temporary_offsets) Line 4267  if (using_temporary_offsets)
4267    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
4268    }    }
4269    
4270  if (md->partial && md->hitend)  if (rc != MATCH_NOMATCH)
4271      {
4272      DPRINTF((">>>> error: returning %d\n", rc));
4273      return rc;
4274      }
4275    else if (md->partial && md->hitend)
4276    {    {
4277    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4278    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.91  
changed lines
  Added in v.165

  ViewVC Help
Powered by ViewVC 1.1.5