/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 145 by ph10, Wed Apr 4 14:06:52 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* Undefine some potentially clashing cpp symbols */
52    
53  /* Structure for building a chain of data that actually lives on the  #undef min
54  stack, for holding the values of the subject pointer at the start of each  #undef max
55  subpattern, so as to detect when an empty string has been matched by a  
56  subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
57  are on the heap, not on the stack. */  obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
58    
59  typedef struct eptrblock {  #define EPTR_WORK_SIZE (1000)
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
60    
61  /* Flag bits for the match() function */  /* Flag bits for the match() function */
62    
63  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
64  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
65    #define match_tail_recursed  0x04  /* Tail recursive call */
66    
67  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
68  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 103  Returns:     nothing
103  static void  static void
104  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
105  {  {
106  int c;  unsigned int c;
107  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
108  while (length-- > 0)  while (length-- > 0)
109    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 130  Returns:      TRUE if matched
130  */  */
131    
132  static BOOL  static BOOL
133  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
134    unsigned long int ims)    unsigned long int ims)
135  {  {
136  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
137    
138  #ifdef DEBUG  #ifdef DEBUG
139  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 171  return TRUE;
171  ****************************************************************************  ****************************************************************************
172                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
173    
174  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
175  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
176  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
177  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
178  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
179    fine.
180  It turns out that on non-Unix systems there are problems with programs that  
181  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
182  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
183  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
184    been known for decades.) So....
185    
186  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
187  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
188  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
189  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
190  always used to.  always used to.
191  ****************************************************************************  ****************************************************************************
192  ***************************************************************************/  ***************************************************************************/
193    
194    
195  /* These versions of the macros use the stack, as normal */  /* These versions of the macros use the stack, as normal. There are debugging
196    versions and production versions. */
197    
198  #ifndef NO_RECURSE  #ifndef NO_RECURSE
199  #define REGISTER register  #define REGISTER register
200  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  #ifdef DEBUG
201    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
202      { \
203      printf("match() called in line %d\n", __LINE__); \
204      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
205      printf("to line %d\n", __LINE__); \
206      }
207    #define RRETURN(ra) \
208      { \
209      printf("match() returned %d from line %d ", ra, __LINE__); \
210      return ra; \
211      }
212    #else
213    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
214      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
215  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
216    #endif
217    
218  #else  #else
219    
220    
# Line 215  match(), which never changes. */ Line 235  match(), which never changes. */
235      newframe->Xims = re;\      newframe->Xims = re;\
236      newframe->Xeptrb = rf;\      newframe->Xeptrb = rf;\
237      newframe->Xflags = rg;\      newframe->Xflags = rg;\
238        newframe->Xrdepth = frame->Xrdepth + 1;\
239      newframe->Xprevframe = frame;\      newframe->Xprevframe = frame;\
240      frame = newframe;\      frame = newframe;\
241      DPRINTF(("restarting from line %d\n", __LINE__));\      DPRINTF(("restarting from line %d\n", __LINE__));\
# Line 256  typedef struct heapframe { Line 277  typedef struct heapframe {
277    long int Xims;    long int Xims;
278    eptrblock *Xeptrb;    eptrblock *Xeptrb;
279    int Xflags;    int Xflags;
280      unsigned int Xrdepth;
281    
282    /* Function local variables */    /* Function local variables */
283    
# Line 271  typedef struct heapframe { Line 293  typedef struct heapframe {
293    
294    BOOL Xcur_is_word;    BOOL Xcur_is_word;
295    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
296    BOOL Xprev_is_word;    BOOL Xprev_is_word;
297    
298    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
299    
300  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
301    int Xprop_type;    int Xprop_type;
302      int Xprop_value;
303    int Xprop_fail_result;    int Xprop_fail_result;
304    int Xprop_category;    int Xprop_category;
305    int Xprop_chartype;    int Xprop_chartype;
306    int Xprop_othercase;    int Xprop_script;
307    int Xprop_test_against;    int Xoclength;
308    int *Xprop_test_variable;    uschar Xocchars[8];
309  #endif  #endif
310    
311    int Xctype;    int Xctype;
312    int Xfc;    unsigned int Xfc;
313    int Xfi;    int Xfi;
314    int Xlength;    int Xlength;
315    int Xmax;    int Xmax;
# Line 320  typedef struct heapframe { Line 342  typedef struct heapframe {
342  *         Match from current position            *  *         Match from current position            *
343  *************************************************/  *************************************************/
344    
345  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
346  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
347  same response.  same response.
348    
# Line 333  performance. Tests using gcc on a SPARC Line 352  performance. Tests using gcc on a SPARC
352  made performance worse.  made performance worse.
353    
354  Arguments:  Arguments:
355     eptr        pointer in subject     eptr        pointer to current character in subject
356     ecode       position in code     ecode       pointer to current position in compiled code
357     offset_top  current top pointer     offset_top  current top pointer
358     md          pointer to "static" info for the match     md          pointer to "static" info for the match
359     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 361  Arguments:
361                   brackets - for testing for empty matches                   brackets - for testing for empty matches
362     flags       can contain     flags       can contain
363                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
364                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
365                       group that can match an empty string
366                     match_tail_recursed - this is a tail_recursed group
367       rdepth      the recursion depth
368    
369  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
370                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
371                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
372                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
373  */  */
374    
375  static int  static int
376  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
377    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
378    int flags)    int flags, unsigned int rdepth)
379  {  {
380  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
381  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
382  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
383    
384  register int  rrc;    /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
385  register int  i;      /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
386  register int  c;      /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
387  register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
388    
389    BOOL minimize, possessive; /* Quantifier options */
390    
391  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
392  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 381  frame->Xoffset_top = offset_top; Line 405  frame->Xoffset_top = offset_top;
405  frame->Xims = ims;  frame->Xims = ims;
406  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
407  frame->Xflags = flags;  frame->Xflags = flags;
408    frame->Xrdepth = rdepth;
409    
410  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
411    
# Line 394  HEAP_RECURSE: Line 419  HEAP_RECURSE:
419  #define ims                frame->Xims  #define ims                frame->Xims
420  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
421  #define flags              frame->Xflags  #define flags              frame->Xflags
422    #define rdepth             frame->Xrdepth
423    
424  /* Ditto for the local variables */  /* Ditto for the local variables */
425    
# Line 411  HEAP_RECURSE: Line 437  HEAP_RECURSE:
437    
438  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
439  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
440  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
441    
442  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
443    
444  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
445  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
446    #define prop_value         frame->Xprop_value
447  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
448  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
449  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
450  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
451  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
452  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
453  #endif  #endif
454    
455  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 473  HEAP_RECURSE:
473  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
474  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
475    
476  #else  #else         /* NO_RECURSE not defined */
477  #define fi i  #define fi i
478  #define fc c  #define fc c
479    
480    
481  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
482  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
483  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
484  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
485  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
486  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
487  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
488  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
489  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
490                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
491  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
492                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
493  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
494  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
495  BOOL prev_is_word;  BOOL prev_is_word;
496    
497  unsigned long int original_ims;  unsigned long int original_ims;
498    
499  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
500  int prop_type;  int prop_type;
501    int prop_value;
502  int prop_fail_result;  int prop_fail_result;
503  int prop_category;  int prop_category;
504  int prop_chartype;  int prop_chartype;
505  int prop_othercase;  int prop_script;
506  int prop_test_against;  int oclength;
507  int *prop_test_variable;  uschar occhars[8];
508  #endif  #endif
509    
510  int ctype;  int ctype;
# Line 493  int save_offset1, save_offset2, save_off Line 519  int save_offset1, save_offset2, save_off
519  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
520    
521  eptrblock newptrb;  eptrblock newptrb;
522  #endif  #endif     /* NO_RECURSE */
523    
524  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
525  variables. */  variables. */
526    
527  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
528    prop_value = 0;
529  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
530  #endif  #endif
531    
532  /* OK, now we can get on with the real code of the function. Recursion is  
533  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
534  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
535  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
536  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
537  performance when true recursion is being used. */  
538    TAIL_RECURSE:
539    
540    /* OK, now we can get on with the real code of the function. Recursive calls
541    are specified by the macro RMATCH and RRETURN is used to return. When
542    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
543    and a "return", respectively (possibly with some debugging if DEBUG is
544    defined). However, RMATCH isn't like a function call because it's quite a
545    complicated macro. It has to be used in one particular way. This shouldn't,
546    however, impact performance when true recursion is being used. */
547    
548    /* First check that we haven't called match() too many times, or that we
549    haven't exceeded the recursive call limit. */
550    
551  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
552    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
553    
554  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
555    
556    #ifdef SUPPORT_UTF8
557  utf8 = md->utf8;       /* Local copy of the flag */  utf8 = md->utf8;       /* Local copy of the flag */
558    #else
559    utf8 = FALSE;
560    #endif
561    
562  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
563  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
564  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
565  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
566    When match() is called in other circumstances, don't add to the chain. If this
567    is a tail recursion, use a block from the workspace, as the one on the stack is
568    already used. */
569    
570  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
571    {    {
572    newptrb.epb_prev = eptrb;    eptrblock *p;
573    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
574    eptrb = &newptrb;      {
575        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
576        p = md->eptrchain + md->eptrn++;
577        }
578      else p = &newptrb;
579      p->epb_saved_eptr = eptr;
580      p->epb_prev = eptrb;
581      eptrb = p;
582    }    }
583    
584  /* Now start processing the operations. */  /* Now start processing the opcodes. */
585    
586  for (;;)  for (;;)
587    {    {
588      minimize = possessive = FALSE;
589    op = *ecode;    op = *ecode;
   minimize = FALSE;  
590    
591    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
592    matching at least one subject character. */    matching at least one subject character. */
# Line 543  for (;;) Line 596  for (;;)
596        eptr > md->start_match)        eptr > md->start_match)
597      md->hitend = TRUE;      md->hitend = TRUE;
598    
599    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
600      {      {
601      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
602        the current subject position in the working slot at the top of the vector.
603      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
604      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
605        reference inside the group.
606      if (number > EXTRACT_BASIC_MAX)  
607        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
608        values of the final offsets, in case they were set by a previous iteration
609        of the same bracket.
610    
611        If there isn't enough space in the offset vector, treat this as if it were
612        a non-capturing bracket. Don't worry about setting the flag for the error
613        case here; that is handled in the code for KET. */
614    
615        case OP_CBRA:
616        case OP_SCBRA:
617        number = GET2(ecode, 1+LINK_SIZE);
618      offset = number << 1;      offset = number << 1;
619    
620  #ifdef DEBUG  #ifdef DEBUG
621      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
622        printf("subject=");
623      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
624      printf("\n");      printf("\n");
625  #endif  #endif
# Line 584  for (;;) Line 634  for (;;)
634        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
635        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
636    
637          flags = (op == OP_SCBRA)? match_cbegroup : 0;
638        do        do
639          {          {
640          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
641            match_isgroup);            ims, eptrb, flags);
642          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
643          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
644          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 603  for (;;) Line 654  for (;;)
654        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
655        }        }
656    
657      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
658        bracket. */
659    
660      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
661    
662    /* Other types of node can be handled by a switch */      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
663        final alternative within the brackets, we would return the result of a
664        recursive call to match() whatever happened. We can reduce stack usage by
665        turning this into a tail recursion. */
666    
667    switch(op)      case OP_BRA:
668      {      case OP_SBRA:
669      case OP_BRA:     /* Non-capturing bracket: optimized */      DPRINTF(("start non-capturing bracket\n"));
670      DPRINTF(("start bracket 0\n"));      flags = (op >= OP_SBRA)? match_cbegroup : 0;
671      do      for (;;)
672        {        {
673        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)
674          match_isgroup);          {
675            ecode += _pcre_OP_lengths[*ecode];
676            flags |= match_tail_recursed;
677            DPRINTF(("bracket 0 tail recursion\n"));
678            goto TAIL_RECURSE;
679            }
680    
681          /* For non-final alternatives, continue the loop for a NOMATCH result;
682          otherwise return. */
683    
684          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
685            eptrb, flags);
686        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
687        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
688        }        }
689      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
690    
691      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
692      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
693      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
694      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
695        obeyed, we can use tail recursion to avoid using another stack frame. */
696    
697      case OP_COND:      case OP_COND:
698      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
699        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
700          {
701          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
702          condition = md->recursive != NULL &&
703            (offset == RREF_ANY || offset == md->recursive->group_num);
704          ecode += condition? 3 : GET(ecode, 1);
705          }
706    
707        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
708        {        {
709        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
710        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
711          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
712          (offset < offset_top && md->offset_vector[offset] >= 0);        }
713        RMATCH(rrc, eptr, ecode + (condition?  
714          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
715          offset_top, md, ims, eptrb, match_isgroup);        {
716        RRETURN(rrc);        condition = FALSE;
717          ecode += GET(ecode, 1);
718        }        }
719    
720      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
721      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
722        assertion. */
723    
724      else      else
725        {        {
726        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
727            match_condassert | match_isgroup);            match_condassert);
728        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
729          {          {
730          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
731            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
732          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
733          }          }
734        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
735          {          {
736          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
737          }          }
738        else ecode += GET(ecode, 1);        else
739        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
740          match_isgroup);          condition = FALSE;
741        RRETURN(rrc);          ecode += GET(ecode, 1);
742            }
743        }        }
     /* Control never reaches here */  
744    
745      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
746      encountered. */      we can use tail recursion to avoid using another stack frame. If the second
747        alternative doesn't exist, we can just plough on. */
748    
749      case OP_CREF:      if (condition || *ecode == OP_ALT)
750      case OP_BRANUMBER:        {
751      ecode += 3;        ecode += 1 + LINK_SIZE;
752          flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
753          goto TAIL_RECURSE;
754          }
755        else
756          {
757          ecode += 1 + LINK_SIZE;
758          }
759      break;      break;
760    
761      /* End of the pattern. If we are in a recursion, we should restore the  
762      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
763        restore the offsets appropriately and continue from after the call. */
764    
765      case OP_END:      case OP_END:
766      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
767        {        {
768        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
769        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
770        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
771        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
772          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
# Line 717  for (;;) Line 802  for (;;)
802      case OP_ASSERTBACK:      case OP_ASSERTBACK:
803      do      do
804        {        {
805        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
806        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
807        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
808        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 744  for (;;) Line 828  for (;;)
828      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
829      do      do
830        {        {
831        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
832        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
833        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
834        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 766  for (;;) Line 849  for (;;)
849  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
850      if (utf8)      if (utf8)
851        {        {
852        c = GET(ecode,1);        i = GET(ecode, 1);
853        for (i = 0; i < c; i++)        while (i-- > 0)
854          {          {
855          eptr--;          eptr--;
856          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 780  for (;;) Line 863  for (;;)
863      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
864    
865        {        {
866        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
867        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
868        }        }
869    
# Line 800  for (;;) Line 883  for (;;)
883        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
884        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
885        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
886        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
887        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
888        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = md->start_match - md->start_subject;
889        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
# Line 837  for (;;) Line 920  for (;;)
920      case OP_RECURSE:      case OP_RECURSE:
921        {        {
922        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
923        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
924            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
925    
926        /* Add to "recursing stack" */        /* Add to "recursing stack" */
927    
# Line 876  for (;;) Line 954  for (;;)
954        restore the offset and recursion data. */        restore the offset and recursion data. */
955    
956        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
957          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
958        do        do
959          {          {
960          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
961              eptrb, match_isgroup);            md, ims, eptrb, flags);
962          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
963            {            {
964              DPRINTF(("Recursion matched\n"));
965            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
966            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
967              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
968            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
969            }            }
970          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH)
971              {
972              DPRINTF(("Recursion gave error %d\n", rrc));
973              RRETURN(rrc);
974              }
975    
976          md->recursive = &new_recursive;          md->recursive = &new_recursive;
977          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 996  for (;;)
996      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
997    
998      case OP_ONCE:      case OP_ONCE:
999        {      prev = ecode;
1000        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1001    
1002        do      do
1003          {        {
1004          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1005            eptrb, match_isgroup);          eptrb, 0);
1006          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1007          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1008          ecode += GET(ecode,1);        ecode += GET(ecode,1);
1009          }        }
1010        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1011    
1012        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1013    
1014        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1015    
1016        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1017        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1018    
1019        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1020    
1021        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1022        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1023    
1024        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1025        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1026        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1027        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1028        course of events. */      course of events. */
1029    
1030        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1031          {        {
1032          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1033          break;        break;
1034          }        }
1035    
1036        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1037        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1038        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1039        opcode. */      any options that changed within the bracket before re-running it, so
1040        check the next opcode. */
1041    
1042        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1043          {        {
1044          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1045          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1046          }        }
1047    
1048        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1049          {        {
1050          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1051          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1052          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1053          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = match_tail_recursed;
1054          }        goto TAIL_RECURSE;
1055        else  /* OP_KETRMAX */        }
1056          {      else  /* OP_KETRMAX */
1057          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1058          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1059          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1060          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1061          }        flags = match_tail_recursed;
1062          goto TAIL_RECURSE;
1063        }        }
1064      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1065    
1066      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1067      bracketed group and go to there. */      bracketed group and go to there. */
# Line 994  for (;;) Line 1079  for (;;)
1079      case OP_BRAZERO:      case OP_BRAZERO:
1080        {        {
1081        next = ecode+1;        next = ecode+1;
1082        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1083        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1084        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1085        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1086        }        }
1087      break;      break;
1088    
1089      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1090        {        {
1091        next = ecode+1;        next = ecode+1;
1092        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1093        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
         match_isgroup);  
1094        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1095        ecode++;        ecode++;
1096        }        }
1097      break;      break;
1098    
1099      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1100    
1101      case OP_KET:      case OP_KET:
1102      case OP_KETRMIN:      case OP_KETRMIN:
1103      case OP_KETRMAX:      case OP_KETRMAX:
1104        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1105    
1106        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1107        infinite repeats of empty string matches, retrieve the subject start from
1108        the chain. Otherwise, set it NULL. */
1109    
1110        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1111          {
1112        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1113            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1114            *prev == OP_ONCE)        }
1115          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1116    
1117        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1118        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1119        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1120    
1121        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1122          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1123          number = *prev - OP_BRA;          *prev == OP_ONCE)
1124          {
1125          md->end_match_ptr = eptr;      /* For ONCE */
1126          md->end_offset_top = offset_top;
1127          RRETURN(MATCH_MATCH);
1128          }
1129    
1130          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1131          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1132        bumping the high water mark. Note that whole-pattern recursion is coded as
1133        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1134        when the OP_END is reached. Other recursion is handled here. */
1135    
1136          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1137          offset = number << 1;        {
1138          number = GET2(prev, 1+LINK_SIZE);
1139          offset = number << 1;
1140    
1141  #ifdef DEBUG  #ifdef DEBUG
1142          printf("end bracket %d", number);        printf("end bracket %d", number);
1143          printf("\n");        printf("\n");
1144  #endif  #endif
1145    
1146          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1147          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1148          into group 0, so it won't be picked up here. Instead, we catch it when          {
1149          the OP_END is reached. */          md->offset_vector[offset] =
1150              md->offset_vector[md->offset_end - number];
1151          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1152            {          if (offset_top <= offset) offset_top = offset + 2;
1153            md->capture_last = number;          }
1154            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1155              {        /* Handle a recursively called group. Restore the offsets
1156              md->offset_vector[offset] =        appropriately and continue from after the call. */
1157                md->offset_vector[md->offset_end - number];  
1158              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1159              if (offset_top <= offset) offset_top = offset + 2;          {
1160              }          recursion_info *rec = md->recursive;
1161            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1162            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1163            appropriately and continue from after the call. */          md->start_match = rec->save_start;
1164            memcpy(md->offset_vector, rec->offset_save,
1165            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1166              {          ecode = rec->after_call;
1167              recursion_info *rec = md->recursive;          ims = original_ims;
1168              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1169          }          }
1170          }
1171    
1172        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1173        the group. */      flags, in case they got changed during the group. */
1174    
1175        ims = original_ims;      ims = original_ims;
1176        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1177    
1178        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1179        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1180        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1181        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1182        course of events. */      course of events. */
1183    
1184        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1185          {        {
1186          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1187          break;        break;
1188          }        }
1189    
1190        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1191        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1192        tail recursion to avoid using another stack frame. */
1193    
1194        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       }  
1195    
1196      RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KETRMIN)
1197          {
1198          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1199          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1200          ecode = prev;
1201          flags |= match_tail_recursed;
1202          goto TAIL_RECURSE;
1203          }
1204        else  /* OP_KETRMAX */
1205          {
1206          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1207          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1208          ecode += 1 + LINK_SIZE;
1209          flags = match_tail_recursed;
1210          goto TAIL_RECURSE;
1211          }
1212        /* Control never gets here */
1213    
1214      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1215    
# Line 1135  for (;;) Line 1217  for (;;)
1217      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1218      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1219        {        {
1220        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1221              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1222          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1223        ecode++;        ecode++;
1224        break;        break;
# Line 1163  for (;;) Line 1246  for (;;)
1246      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1247        {        {
1248        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1249          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1250        else        else
1251          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1252        ecode++;        ecode++;
# Line 1174  for (;;) Line 1257  for (;;)
1257        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1258        if (!md->endonly)        if (!md->endonly)
1259          {          {
1260          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1261             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1262            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1263          ecode++;          ecode++;
1264          break;          break;
1265          }          }
1266        }        }
1267      /* ... else fall through */      /* ... else fall through for endonly */
1268    
1269      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1270    
# Line 1193  for (;;) Line 1276  for (;;)
1276      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1277    
1278      case OP_EODN:      case OP_EODN:
1279      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1280         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1281          RRETURN(MATCH_NOMATCH);
1282      ecode++;      ecode++;
1283      break;      break;
1284    
# Line 1247  for (;;) Line 1331  for (;;)
1331      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1332    
1333      case OP_ANY:      case OP_ANY:
1334      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1335        RRETURN(MATCH_NOMATCH);        {
1336          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1337          }
1338      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1339      if (utf8)      if (utf8)
1340        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1341      ecode++;      ecode++;
1342      break;      break;
1343    
# Line 1343  for (;;) Line 1427  for (;;)
1427      ecode++;      ecode++;
1428      break;      break;
1429    
1430        case OP_ANYNL:
1431        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1432        GETCHARINCTEST(c, eptr);
1433        switch(c)
1434          {
1435          default: RRETURN(MATCH_NOMATCH);
1436          case 0x000d:
1437          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1438          break;
1439          case 0x000a:
1440          case 0x000b:
1441          case 0x000c:
1442          case 0x0085:
1443          case 0x2028:
1444          case 0x2029:
1445          break;
1446          }
1447        ecode++;
1448        break;
1449    
1450  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1451      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1452      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1352  for (;;) Line 1456  for (;;)
1456      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1457      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1458        {        {
1459        int chartype, rqdtype;        int chartype, script;
1460        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
1461    
1462        if (rqdtype >= 128)        switch(ecode[1])
1463          {          {
1464          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1465            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1466            break;
1467    
1468            case PT_LAMP:
1469            if ((chartype == ucp_Lu ||
1470                 chartype == ucp_Ll ||
1471                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1472            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1473          }           break;
1474        else  
1475          {          case PT_GC:
1476          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1477              RRETURN(MATCH_NOMATCH);
1478            break;
1479    
1480            case PT_PC:
1481            if ((ecode[2] != chartype) == (op == OP_PROP))
1482            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1483            break;
1484    
1485            case PT_SC:
1486            if ((ecode[2] != script) == (op == OP_PROP))
1487              RRETURN(MATCH_NOMATCH);
1488            break;
1489    
1490            default:
1491            RRETURN(PCRE_ERROR_INTERNAL);
1492          }          }
1493    
1494          ecode += 3;
1495        }        }
1496      break;      break;
1497    
# Line 1379  for (;;) Line 1502  for (;;)
1502      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1503      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1504        {        {
1505        int chartype;        int chartype, script;
1506        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1507        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1508        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1509          {          {
# Line 1390  for (;;) Line 1512  for (;;)
1512            {            {
1513            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1514            }            }
1515          category = _pcre_ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1516          if (category != ucp_M) break;          if (category != ucp_M) break;
1517          eptr += len;          eptr += len;
1518          }          }
# Line 1683  for (;;) Line 1805  for (;;)
1805            while (eptr >= pp)            while (eptr >= pp)
1806              {              {
1807              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
             eptr--;  
1808              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1809                eptr--;
1810              }              }
1811            }            }
1812    
# Line 1836  for (;;) Line 1958  for (;;)
1958    
1959        else        else
1960          {          {
1961          int dc;          unsigned int dc;
1962          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1963          ecode += length;          ecode += length;
1964    
1965          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
1966          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
1967    
1968          if (fc != dc)          if (fc != dc)
1969            {            {
1970  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1971            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
1972  #endif  #endif
1973              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1974            }            }
# Line 1867  for (;;) Line 1985  for (;;)
1985        }        }
1986      break;      break;
1987    
1988      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
1989    
1990      case OP_EXACT:      case OP_EXACT:
1991      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
1992      ecode += 3;      ecode += 3;
1993      goto REPEATCHAR;      goto REPEATCHAR;
1994    
1995        case OP_POSUPTO:
1996        possessive = TRUE;
1997        /* Fall through */
1998    
1999      case OP_UPTO:      case OP_UPTO:
2000      case OP_MINUPTO:      case OP_MINUPTO:
2001      min = 0;      min = 0;
# Line 1882  for (;;) Line 2004  for (;;)
2004      ecode += 3;      ecode += 3;
2005      goto REPEATCHAR;      goto REPEATCHAR;
2006    
2007        case OP_POSSTAR:
2008        possessive = TRUE;
2009        min = 0;
2010        max = INT_MAX;
2011        ecode++;
2012        goto REPEATCHAR;
2013    
2014        case OP_POSPLUS:
2015        possessive = TRUE;
2016        min = 1;
2017        max = INT_MAX;
2018        ecode++;
2019        goto REPEATCHAR;
2020    
2021        case OP_POSQUERY:
2022        possessive = TRUE;
2023        min = 0;
2024        max = 1;
2025        ecode++;
2026        goto REPEATCHAR;
2027    
2028      case OP_STAR:      case OP_STAR:
2029      case OP_MINSTAR:      case OP_MINSTAR:
2030      case OP_PLUS:      case OP_PLUS:
# Line 1913  for (;;) Line 2056  for (;;)
2056    
2057        if (length > 1)        if (length > 1)
2058          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2059  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2060          int othercase;          unsigned int othercase;
         int chartype;  
2061          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2062               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase > 0)  
2063            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2064            else oclength = 0;
2065  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2066    
2067          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2068            {            {
2069            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2070    #ifdef SUPPORT_UCP
2071            /* Need braces because of following else */            /* Need braces because of following else */
2072            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2073            else            else
# Line 1935  for (;;) Line 2075  for (;;)
2075              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2076              eptr += oclength;              eptr += oclength;
2077              }              }
2078    #else   /* without SUPPORT_UCP */
2079              else { RRETURN(MATCH_NOMATCH); }
2080    #endif  /* SUPPORT_UCP */
2081            }            }
2082    
2083          if (min == max) continue;          if (min == max) continue;
# Line 1947  for (;;) Line 2090  for (;;)
2090              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2091              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2092              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2093    #ifdef SUPPORT_UCP
2094              /* Need braces because of following else */              /* Need braces because of following else */
2095              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2096              else              else
# Line 1954  for (;;) Line 2098  for (;;)
2098                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2099                eptr += oclength;                eptr += oclength;
2100                }                }
2101    #else   /* without SUPPORT_UCP */
2102                else { RRETURN (MATCH_NOMATCH); }
2103    #endif  /* SUPPORT_UCP */
2104              }              }
2105            /* Control never gets here */            /* Control never gets here */
2106            }            }
2107          else  
2108            else  /* Maximize */
2109            {            {
2110            pp = eptr;            pp = eptr;
2111            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2112              {              {
2113              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2114              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2115    #ifdef SUPPORT_UCP
2116              else if (oclength == 0) break;              else if (oclength == 0) break;
2117              else              else
2118                {                {
2119                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2120                eptr += oclength;                eptr += oclength;
2121                }                }
2122    #else   /* without SUPPORT_UCP */
2123                else break;
2124    #endif  /* SUPPORT_UCP */
2125              }              }
2126            while (eptr >= pp)  
2127              if (possessive) continue;
2128              for(;;)
2129             {             {
2130             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2131             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2132               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2133    #ifdef SUPPORT_UCP
2134               eptr--;
2135               BACKCHAR(eptr);
2136    #else   /* without SUPPORT_UCP */
2137             eptr -= length;             eptr -= length;
2138    #endif  /* SUPPORT_UCP */
2139             }             }
           RRETURN(MATCH_NOMATCH);  
2140            }            }
2141          /* Control never gets here */          /* Control never gets here */
2142          }          }
# Line 2025  for (;;) Line 2184  for (;;)
2184            }            }
2185          /* Control never gets here */          /* Control never gets here */
2186          }          }
2187        else        else  /* Maximize */
2188          {          {
2189          pp = eptr;          pp = eptr;
2190          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2192  for (;;)
2192            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2193            eptr++;            eptr++;
2194            }            }
2195            if (possessive) continue;
2196          while (eptr >= pp)          while (eptr >= pp)
2197            {            {
2198            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2061  for (;;) Line 2221  for (;;)
2221            }            }
2222          /* Control never gets here */          /* Control never gets here */
2223          }          }
2224        else        else  /* Maximize */
2225          {          {
2226          pp = eptr;          pp = eptr;
2227          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2229  for (;;)
2229            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2230            eptr++;            eptr++;
2231            }            }
2232            if (possessive) continue;
2233          while (eptr >= pp)          while (eptr >= pp)
2234            {            {
2235            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2121  for (;;) Line 2282  for (;;)
2282      ecode += 3;      ecode += 3;
2283      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2284    
2285        case OP_NOTPOSSTAR:
2286        possessive = TRUE;
2287        min = 0;
2288        max = INT_MAX;
2289        ecode++;
2290        goto REPEATNOTCHAR;
2291    
2292        case OP_NOTPOSPLUS:
2293        possessive = TRUE;
2294        min = 1;
2295        max = INT_MAX;
2296        ecode++;
2297        goto REPEATNOTCHAR;
2298    
2299        case OP_NOTPOSQUERY:
2300        possessive = TRUE;
2301        min = 0;
2302        max = 1;
2303        ecode++;
2304        goto REPEATNOTCHAR;
2305    
2306        case OP_NOTPOSUPTO:
2307        possessive = TRUE;
2308        min = 0;
2309        max = GET2(ecode, 1);
2310        ecode += 3;
2311        goto REPEATNOTCHAR;
2312    
2313      case OP_NOTSTAR:      case OP_NOTSTAR:
2314      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2315      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2160  for (;;) Line 2349  for (;;)
2349        /* UTF-8 mode */        /* UTF-8 mode */
2350        if (utf8)        if (utf8)
2351          {          {
2352          register int d;          register unsigned int d;
2353          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2354            {            {
2355            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2185  for (;;) Line 2374  for (;;)
2374          /* UTF-8 mode */          /* UTF-8 mode */
2375          if (utf8)          if (utf8)
2376            {            {
2377            register int d;            register unsigned int d;
2378            for (fi = min;; fi++)            for (fi = min;; fi++)
2379              {              {
2380              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2221  for (;;) Line 2410  for (;;)
2410          /* UTF-8 mode */          /* UTF-8 mode */
2411          if (utf8)          if (utf8)
2412            {            {
2413            register int d;            register unsigned int d;
2414            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2415              {              {
2416              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2420  for (;;)
2420              if (fc == d) break;              if (fc == d) break;
2421              eptr += len;              eptr += len;
2422              }              }
2423            for(;;)          if (possessive) continue;
2424            for(;;)
2425              {              {
2426              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2427              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 2248  for (;;) Line 2438  for (;;)
2438              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2439              eptr++;              eptr++;
2440              }              }
2441              if (possessive) continue;
2442            while (eptr >= pp)            while (eptr >= pp)
2443              {              {
2444              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2269  for (;;) Line 2460  for (;;)
2460        /* UTF-8 mode */        /* UTF-8 mode */
2461        if (utf8)        if (utf8)
2462          {          {
2463          register int d;          register unsigned int d;
2464          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2465            {            {
2466            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2292  for (;;) Line 2483  for (;;)
2483          /* UTF-8 mode */          /* UTF-8 mode */
2484          if (utf8)          if (utf8)
2485            {            {
2486            register int d;            register unsigned int d;
2487            for (fi = min;; fi++)            for (fi = min;; fi++)
2488              {              {
2489              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2327  for (;;) Line 2518  for (;;)
2518          /* UTF-8 mode */          /* UTF-8 mode */
2519          if (utf8)          if (utf8)
2520            {            {
2521            register int d;            register unsigned int d;
2522            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2523              {              {
2524              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2527  for (;;)
2527              if (fc == d) break;              if (fc == d) break;
2528              eptr += len;              eptr += len;
2529              }              }
2530              if (possessive) continue;
2531            for(;;)            for(;;)
2532              {              {
2533              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2353  for (;;) Line 2545  for (;;)
2545              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2546              eptr++;              eptr++;
2547              }              }
2548              if (possessive) continue;
2549            while (eptr >= pp)            while (eptr >= pp)
2550              {              {
2551              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2384  for (;;) Line 2577  for (;;)
2577      ecode += 3;      ecode += 3;
2578      goto REPEATTYPE;      goto REPEATTYPE;
2579    
2580        case OP_TYPEPOSSTAR:
2581        possessive = TRUE;
2582        min = 0;
2583        max = INT_MAX;
2584        ecode++;
2585        goto REPEATTYPE;
2586    
2587        case OP_TYPEPOSPLUS:
2588        possessive = TRUE;
2589        min = 1;
2590        max = INT_MAX;
2591        ecode++;
2592        goto REPEATTYPE;
2593    
2594        case OP_TYPEPOSQUERY:
2595        possessive = TRUE;
2596        min = 0;
2597        max = 1;
2598        ecode++;
2599        goto REPEATTYPE;
2600    
2601        case OP_TYPEPOSUPTO:
2602        possessive = TRUE;
2603        min = 0;
2604        max = GET2(ecode, 1);
2605        ecode += 3;
2606        goto REPEATTYPE;
2607    
2608      case OP_TYPESTAR:      case OP_TYPESTAR:
2609      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2610      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 2629  for (;;)
2629        {        {
2630        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2631        prop_type = *ecode++;        prop_type = *ecode++;
2632        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2633        }        }
2634      else prop_type = -1;      else prop_type = -1;
2635  #endif  #endif
# Line 2434  for (;;) Line 2646  for (;;)
2646      if (min > 0)      if (min > 0)
2647        {        {
2648  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2649        if (prop_type > 0)        if (prop_type >= 0)
2650          {          {
2651          for (i = 1; i <= min; i++)          switch(prop_type)
2652            {            {
2653            GETCHARINC(c, eptr);            case PT_ANY:
2654            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2655            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2656              RRETURN(MATCH_NOMATCH);              {
2657                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2658                GETCHARINC(c, eptr);
2659                }
2660              break;
2661    
2662              case PT_LAMP:
2663              for (i = 1; i <= min; i++)
2664                {
2665                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2666                GETCHARINC(c, eptr);
2667                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2668                if ((prop_chartype == ucp_Lu ||
2669                     prop_chartype == ucp_Ll ||
2670                     prop_chartype == ucp_Lt) == prop_fail_result)
2671                  RRETURN(MATCH_NOMATCH);
2672                }
2673              break;
2674    
2675              case PT_GC:
2676              for (i = 1; i <= min; i++)
2677                {
2678                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2679                GETCHARINC(c, eptr);
2680                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2681                if ((prop_category == prop_value) == prop_fail_result)
2682                  RRETURN(MATCH_NOMATCH);
2683                }
2684              break;
2685    
2686              case PT_PC:
2687              for (i = 1; i <= min; i++)
2688                {
2689                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2690                GETCHARINC(c, eptr);
2691                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2692                if ((prop_chartype == prop_value) == prop_fail_result)
2693                  RRETURN(MATCH_NOMATCH);
2694                }
2695              break;
2696    
2697              case PT_SC:
2698              for (i = 1; i <= min; i++)
2699                {
2700                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2701                GETCHARINC(c, eptr);
2702                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2703                if ((prop_script == prop_value) == prop_fail_result)
2704                  RRETURN(MATCH_NOMATCH);
2705                }
2706              break;
2707    
2708              default:
2709              RRETURN(PCRE_ERROR_INTERNAL);
2710            }            }
2711          }          }
2712    
# Line 2453  for (;;) Line 2718  for (;;)
2718          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2719            {            {
2720            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2721            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2722            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2723            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2724              {              {
# Line 2462  for (;;) Line 2727  for (;;)
2727                {                {
2728                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2729                }                }
2730              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2731              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2732              eptr += len;              eptr += len;
2733              }              }
# Line 2481  for (;;) Line 2746  for (;;)
2746          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2747            {            {
2748            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2749               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2750              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2751              eptr++;
2752            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2753            }            }
2754          break;          break;
# Line 2491  for (;;) Line 2757  for (;;)
2757          eptr += min;          eptr += min;
2758          break;          break;
2759    
2760            case OP_ANYNL:
2761            for (i = 1; i <= min; i++)
2762              {
2763              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2764              GETCHARINC(c, eptr);
2765              switch(c)
2766                {
2767                default: RRETURN(MATCH_NOMATCH);
2768                case 0x000d:
2769                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2770                break;
2771                case 0x000a:
2772                case 0x000b:
2773                case 0x000c:
2774                case 0x0085:
2775                case 0x2028:
2776                case 0x2029:
2777                break;
2778                }
2779              }
2780            break;
2781    
2782          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2783          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2784            {            {
# Line 2559  for (;;) Line 2847  for (;;)
2847  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2848    
2849        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2850        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2851          number of bytes present, as this was tested above. */
2852    
2853        switch(ctype)        switch(ctype)
2854          {          {
# Line 2567  for (;;) Line 2856  for (;;)
2856          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
2857            {            {
2858            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2859              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
2860                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2861                eptr++;
2862                }
2863            }            }
2864          else eptr += min;          else eptr += min;
2865          break;          break;
# Line 2576  for (;;) Line 2868  for (;;)
2868          eptr += min;          eptr += min;
2869          break;          break;
2870    
2871            /* Because of the CRLF case, we can't assume the minimum number of
2872            bytes are present in this case. */
2873    
2874            case OP_ANYNL:
2875            for (i = 1; i <= min; i++)
2876              {
2877              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2878              switch(*eptr++)
2879                {
2880                default: RRETURN(MATCH_NOMATCH);
2881                case 0x000d:
2882                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2883                break;
2884                case 0x000a:
2885                case 0x000b:
2886                case 0x000c:
2887                case 0x0085:
2888                break;
2889                }
2890              }
2891            break;
2892    
2893          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2894          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2895            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2624  for (;;) Line 2938  for (;;)
2938      if (minimize)      if (minimize)
2939        {        {
2940  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2941        if (prop_type > 0)        if (prop_type >= 0)
2942          {          {
2943          for (fi = min;; fi++)          switch(prop_type)
2944            {            {
2945            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
2946            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
2947            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
2948            GETCHARINC(c, eptr);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2949            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2950            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
2952                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2953                }
2954              /* Control never gets here */
2955    
2956              case PT_LAMP:
2957              for (fi = min;; fi++)
2958                {
2959                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2960                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2961                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2962                GETCHARINC(c, eptr);
2963                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2964                if ((prop_chartype == ucp_Lu ||
2965                     prop_chartype == ucp_Ll ||
2966                     prop_chartype == ucp_Lt) == prop_fail_result)
2967                  RRETURN(MATCH_NOMATCH);
2968                }
2969              /* Control never gets here */
2970    
2971              case PT_GC:
2972              for (fi = min;; fi++)
2973                {
2974                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2975                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2976                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2977                GETCHARINC(c, eptr);
2978                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2979                if ((prop_category == prop_value) == prop_fail_result)
2980                  RRETURN(MATCH_NOMATCH);
2981                }
2982              /* Control never gets here */
2983    
2984              case PT_PC:
2985              for (fi = min;; fi++)
2986                {
2987                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2988                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2989                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2990                GETCHARINC(c, eptr);
2991                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2992                if ((prop_chartype == prop_value) == prop_fail_result)
2993                  RRETURN(MATCH_NOMATCH);
2994                }
2995              /* Control never gets here */
2996    
2997              case PT_SC:
2998              for (fi = min;; fi++)
2999                {
3000                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3001                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3002                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3003                GETCHARINC(c, eptr);
3004                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3005                if ((prop_script == prop_value) == prop_fail_result)
3006                  RRETURN(MATCH_NOMATCH);
3007                }
3008              /* Control never gets here */
3009    
3010              default:
3011              RRETURN(PCRE_ERROR_INTERNAL);
3012            }            }
3013          }          }
3014    
# Line 2649  for (;;) Line 3023  for (;;)
3023            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3024            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3025            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3026            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3027            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3028            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3029              {              {
# Line 2658  for (;;) Line 3032  for (;;)
3032                {                {
3033                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3034                }                }
3035              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3036              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3037              eptr += len;              eptr += len;
3038              }              }
# Line 2676  for (;;) Line 3050  for (;;)
3050            {            {
3051            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3052            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3053            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3054                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3055                    IS_NEWLINE(eptr)))
3056                RRETURN(MATCH_NOMATCH);
3057    
3058            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3059            switch(ctype)            switch(ctype)
3060              {              {
3061              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3062              break;              break;
3063    
3064              case OP_ANYBYTE:              case OP_ANYBYTE:
3065              break;              break;
3066    
3067                case OP_ANYNL:
3068                switch(c)
3069                  {
3070                  default: RRETURN(MATCH_NOMATCH);
3071                  case 0x000d:
3072                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3073                  break;
3074                  case 0x000a:
3075                  case 0x000b:
3076                  case 0x000c:
3077                  case 0x0085:
3078                  case 0x2028:
3079                  case 0x2029:
3080                  break;
3081                  }
3082                break;
3083    
3084              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3085              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3086                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2731  for (;;) Line 3124  for (;;)
3124            {            {
3125            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3126            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3127            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3128                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3129                RRETURN(MATCH_NOMATCH);
3130    
3131            c = *eptr++;            c = *eptr++;
3132            switch(ctype)            switch(ctype)
3133              {              {
3134              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3135              break;              break;
3136    
3137              case OP_ANYBYTE:              case OP_ANYBYTE:
3138              break;              break;
3139    
3140                case OP_ANYNL:
3141                switch(c)
3142                  {
3143                  default: RRETURN(MATCH_NOMATCH);
3144                  case 0x000d:
3145                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3146                  break;
3147                  case 0x000a:
3148                  case 0x000b:
3149                  case 0x000c:
3150                  case 0x0085:
3151                  break;
3152                  }
3153                break;
3154    
3155              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3156              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3157              break;              break;
# Line 2774  for (;;) Line 3184  for (;;)
3184        /* Control never gets here */        /* Control never gets here */
3185        }        }
3186    
3187      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3188      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3189      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3190    
# Line 2783  for (;;) Line 3193  for (;;)
3193        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
3194    
3195  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3196        if (prop_type > 0)        if (prop_type >= 0)
3197          {          {
3198          for (i = min; i < max; i++)          switch(prop_type)
3199            {            {
3200            int len = 1;            case PT_ANY:
3201            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
3202            GETCHARLEN(c, eptr, len);              {
3203            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
3204            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
3205              break;              GETCHARLEN(c, eptr, len);
3206            eptr+= len;              if (prop_fail_result) break;
3207                eptr+= len;
3208                }
3209              break;
3210    
3211              case PT_LAMP:
3212              for (i = min; i < max; i++)
3213                {
3214                int len = 1;
3215                if (eptr >= md->end_subject) break;
3216                GETCHARLEN(c, eptr, len);
3217                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3218                if ((prop_chartype == ucp_Lu ||
3219                     prop_chartype == ucp_Ll ||
3220                     prop_chartype == ucp_Lt) == prop_fail_result)
3221                  break;
3222                eptr+= len;
3223                }
3224              break;
3225    
3226              case PT_GC:
3227              for (i = min; i < max; i++)
3228                {
3229                int len = 1;
3230                if (eptr >= md->end_subject) break;
3231                GETCHARLEN(c, eptr, len);
3232                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3233                if ((prop_category == prop_value) == prop_fail_result)
3234                  break;
3235                eptr+= len;
3236                }
3237              break;
3238    
3239              case PT_PC:
3240              for (i = min; i < max; i++)
3241                {
3242                int len = 1;
3243                if (eptr >= md->end_subject) break;
3244                GETCHARLEN(c, eptr, len);
3245                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3246                if ((prop_chartype == prop_value) == prop_fail_result)
3247                  break;
3248                eptr+= len;
3249                }
3250              break;
3251    
3252              case PT_SC:
3253              for (i = min; i < max; i++)
3254                {
3255                int len = 1;
3256                if (eptr >= md->end_subject) break;
3257                GETCHARLEN(c, eptr, len);
3258                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3259                if ((prop_script == prop_value) == prop_fail_result)
3260                  break;
3261                eptr+= len;
3262                }
3263              break;
3264            }            }
3265    
3266          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3267    
3268            if (possessive) continue;
3269          for(;;)          for(;;)
3270            {            {
3271            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2816  for (;;) Line 3284  for (;;)
3284            {            {
3285            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3286            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3287            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3288            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3289            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3290              {              {
# Line 2825  for (;;) Line 3293  for (;;)
3293                {                {
3294                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3295                }                }
3296              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3297              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3298              eptr += len;              eptr += len;
3299              }              }
# Line 2833  for (;;) Line 3301  for (;;)
3301    
3302          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3303    
3304            if (possessive) continue;
3305          for(;;)          for(;;)
3306            {            {
3307            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2846  for (;;) Line 3315  for (;;)
3315                {                {
3316                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3317                }                }
3318              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3319              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3320              eptr--;              eptr--;
3321              }              }
# Line 2865  for (;;) Line 3334  for (;;)
3334            {            {
3335            case OP_ANY:            case OP_ANY:
3336    
3337            /* Special code is required for UTF8, but when the maximum is unlimited            /* Special code is required for UTF8, but when the maximum is
3338            we don't need it, so we repeat the non-UTF8 code. This is probably            unlimited we don't need it, so we repeat the non-UTF8 code. This is
3339            worth it, because .* is quite a common idiom. */            probably worth it, because .* is quite a common idiom. */
3340    
3341            if (max < INT_MAX)            if (max < INT_MAX)
3342              {              {
# Line 2875  for (;;) Line 3344  for (;;)
3344                {                {
3345                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3346                  {                  {
3347                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3348                  eptr++;                  eptr++;
3349                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3350                  }                  }
# Line 2884  for (;;) Line 3353  for (;;)
3353                {                {
3354                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3355                  {                  {
3356                    if (eptr >= md->end_subject) break;
3357                  eptr++;                  eptr++;
3358                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3359                  }                  }
# Line 2898  for (;;) Line 3368  for (;;)
3368                {                {
3369                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3370                  {                  {
3371                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3372                  eptr++;                  eptr++;
3373                  }                  }
3374                break;                break;
# Line 2906  for (;;) Line 3376  for (;;)
3376              else              else
3377                {                {
3378                c = max - min;                c = max - min;
3379                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3380                    c = md->end_subject - eptr;
3381                eptr += c;                eptr += c;
3382                }                }
3383              }              }
# Line 2916  for (;;) Line 3387  for (;;)
3387    
3388            case OP_ANYBYTE:            case OP_ANYBYTE:
3389            c = max - min;            c = max - min;
3390            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3391                c = md->end_subject - eptr;
3392            eptr += c;            eptr += c;
3393            break;            break;
3394    
3395              case OP_ANYNL:
3396              for (i = min; i < max; i++)
3397                {
3398                int len = 1;
3399                if (eptr >= md->end_subject) break;
3400                GETCHARLEN(c, eptr, len);
3401                if (c == 0x000d)
3402                  {
3403                  if (++eptr >= md->end_subject) break;
3404                  if (*eptr == 0x000a) eptr++;
3405                  }
3406                else
3407                  {
3408                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3409                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3410                    break;
3411                  eptr += len;
3412                  }
3413                }
3414              break;
3415    
3416            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3417            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3418              {              {
# Line 2992  for (;;) Line 3485  for (;;)
3485    
3486          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3487    
3488            if (possessive) continue;
3489          for(;;)          for(;;)
3490            {            {
3491            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3012  for (;;) Line 3506  for (;;)
3506              {              {
3507              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3508                {                {
3509                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3510                eptr++;                eptr++;
3511                }                }
3512              break;              break;
# Line 3021  for (;;) Line 3515  for (;;)
3515    
3516            case OP_ANYBYTE:            case OP_ANYBYTE:
3517            c = max - min;            c = max - min;
3518            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3519                c = md->end_subject - eptr;
3520            eptr += c;            eptr += c;
3521            break;            break;
3522    
3523              case OP_ANYNL:
3524              for (i = min; i < max; i++)
3525                {
3526                if (eptr >= md->end_subject) break;
3527                c = *eptr;
3528                if (c == 0x000d)
3529                  {
3530                  if (++eptr >= md->end_subject) break;
3531                  if (*eptr == 0x000a) eptr++;
3532                  }
3533                else
3534                  {
3535                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3536                    break;
3537                  eptr++;
3538                  }
3539                }
3540              break;
3541    
3542            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3543            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3544              {              {
# Line 3085  for (;;) Line 3599  for (;;)
3599    
3600          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3601    
3602            if (possessive) continue;
3603          while (eptr >= pp)          while (eptr >= pp)
3604            {            {
3605            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3099  for (;;) Line 3614  for (;;)
3614        }        }
3615      /* Control never gets here */      /* Control never gets here */
3616    
3617      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3618      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3619    
3620      default:      default:
3621      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3622      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3623      }      }
3624    
3625    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3144  Undefine all the macros that were define Line 3657  Undefine all the macros that were define
3657    
3658  #undef cur_is_word  #undef cur_is_word
3659  #undef condition  #undef condition
 #undef minimize  
3660  #undef prev_is_word  #undef prev_is_word
3661    
3662  #undef original_ims  #undef original_ims
# Line 3200  Returns:          > 0 => success; value Line 3712  Returns:          > 0 => success; value
3712                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3713  */  */
3714    
3715  PCRE_EXPORT int  PCRE_EXP_DEFN int
3716  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3717    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3718    int offsetcount)    int offsetcount)
3719  {  {
3720  int rc, resetcount, ocount;  int rc, resetcount, ocount;
3721  int first_byte = -1;  int first_byte = -1;
3722  int req_byte = -1;  int req_byte = -1;
3723  int req_byte2 = -1;  int req_byte2 = -1;
3724  unsigned long int ims = 0;  int newline;
3725    unsigned long int ims;
3726  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
3727  BOOL anchored;  BOOL anchored;
3728  BOOL startline;  BOOL startline;
3729  BOOL firstline;  BOOL firstline;
3730  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3731  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3732    BOOL utf8;
3733  match_data match_block;  match_data match_block;
3734    match_data *md = &match_block;
3735  const uschar *tables;  const uschar *tables;
3736  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3737  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3738  const uschar *end_subject;  USPTR end_subject;
3739  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3740    eptrblock eptrchain[EPTR_WORK_SIZE];
3741    
3742  pcre_study_data internal_study;  pcre_study_data internal_study;
3743  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3241  if (offsetcount < 0) return PCRE_ERROR_B Line 3757  if (offsetcount < 0) return PCRE_ERROR_B
3757  the default values. */  the default values. */
3758    
3759  study = NULL;  study = NULL;
3760  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
3761  match_block.callout_data = NULL;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3762    md->callout_data = NULL;
3763    
3764  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
3765    
# Line 3254  if (extra_data != NULL) Line 3771  if (extra_data != NULL)
3771    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3772      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3773    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3774      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
3775      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3776        md->match_limit_recursion = extra_data->match_limit_recursion;
3777    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3778      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3779    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3780    }    }
3781    
# Line 3286  firstline = (re->options & PCRE_FIRSTLIN Line 3805  firstline = (re->options & PCRE_FIRSTLIN
3805    
3806  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
3807    
3808  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
3809    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3810    
3811  match_block.start_subject = (const uschar *)subject;  md->start_subject = (USPTR)subject;
3812  match_block.start_offset = start_offset;  md->start_offset = start_offset;
3813  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
3814  end_subject = match_block.end_subject;  end_subject = md->end_subject;
3815    
3816  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3817  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3818    
3819  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3820  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
3821  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
3822  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
3823  match_block.hitend = FALSE;  md->hitend = FALSE;
3824    
3825    md->recursive = NULL;                   /* No recursion at top level */
3826    md->eptrchain = eptrchain;              /* Make workspace generally available */
3827    
3828    md->lcc = tables + lcc_offset;
3829    md->ctypes = tables + ctypes_offset;
3830    
3831  match_block.recursive = NULL;                   /* No recursion at top level */  /* Handle different types of newline. The three bits give eight cases. If
3832    nothing is set at run time, whatever was used at compile time applies. */
3833    
3834    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3835           PCRE_NEWLINE_BITS)
3836      {
3837      case 0: newline = NEWLINE; break;   /* Compile-time default */
3838      case PCRE_NEWLINE_CR: newline = '\r'; break;
3839      case PCRE_NEWLINE_LF: newline = '\n'; break;
3840      case PCRE_NEWLINE_CR+
3841           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3842      case PCRE_NEWLINE_ANY: newline = -1; break;
3843      default: return PCRE_ERROR_BADNEWLINE;
3844      }
3845    
3846  match_block.lcc = tables + lcc_offset;  if (newline < 0)
3847  match_block.ctypes = tables + ctypes_offset;    {
3848      md->nltype = NLTYPE_ANY;
3849      }
3850    else
3851      {
3852      md->nltype = NLTYPE_FIXED;
3853      if (newline > 255)
3854        {
3855        md->nllen = 2;
3856        md->nl[0] = (newline >> 8) & 255;
3857        md->nl[1] = newline & 255;
3858        }
3859      else
3860        {
3861        md->nllen = 1;
3862        md->nl[0] = newline;
3863        }
3864      }
3865    
3866  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
3867  moment. */  moment. */
3868    
3869  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3870    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
3871    
3872  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3873  back the character offset. */  back the character offset. */
3874    
3875  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3876  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3877    {    {
3878    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3879      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3350  ocount = offsetcount - (offsetcount % 3) Line 3905  ocount = offsetcount - (offsetcount % 3)
3905  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
3906    {    {
3907    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
3908    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3909    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3910    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3911    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3912    }    }
3913  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
3914    
3915  match_block.offset_end = ocount;  md->offset_end = ocount;
3916  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
3917  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
3918  match_block.capture_last = -1;  md->capture_last = -1;
3919    
3920  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
3921  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3373  if (resetcount > offsetcount) resetcount Line 3928  if (resetcount > offsetcount) resetcount
3928  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
3929  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
3930    
3931  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
3932    {    {
3933    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
3934    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
3935    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
3936    }    }
# Line 3392  if (!anchored) Line 3947  if (!anchored)
3947      {      {
3948      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
3949      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3950        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
3951      }      }
3952    else    else
3953      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3410  if ((re->options & PCRE_REQCHSET) != 0) Line 3965  if ((re->options & PCRE_REQCHSET) != 0)
3965    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
3966    }    }
3967    
3968    
3969    /* ==========================================================================*/
3970    
3971  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3972  the loop runs just once. */  the loop runs just once. */
3973    
3974  do  for(;;)
3975    {    {
3976    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
3977    
3978    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
3979    
3980    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
3981      {      {
3982      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
3983      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
3984      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
3985      }      }
3986    
3987    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
3988    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
3989    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
3990    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
3991    */    the match fails at the newline, later code breaks this loop. */
3992    
3993    if (firstline)    if (firstline)
3994      {      {
3995      const uschar *t = start_match;      USPTR t = start_match;
3996      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3997      end_subject = t;      end_subject = t;
3998      }      }
3999    
# Line 3445  do Line 4003  do
4003      {      {
4004      if (first_byte_caseless)      if (first_byte_caseless)
4005        while (start_match < end_subject &&        while (start_match < end_subject &&
4006               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4007          start_match++;          start_match++;
4008      else      else
4009        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4010          start_match++;          start_match++;
4011      }      }
4012    
4013    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4014    
4015    else if (startline)    else if (startline)
4016      {      {
4017      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4018        {        {
4019        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4020            start_match++;
4021    
4022          /* If we have just passed a CR and the newline option is ANY, and we are
4023          now at a LF, advance the match position by one more character. */
4024    
4025          if (start_match[-1] == '\r' &&
4026               md->nltype == NLTYPE_ANY &&
4027               start_match < end_subject &&
4028               *start_match == '\n')
4029          start_match++;          start_match++;
4030        }        }
4031      }      }
# Line 3480  do Line 4047  do
4047    
4048  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4049    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4050    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4051    printf("\n");    printf("\n");
4052  #endif  #endif
4053    
# Line 3494  do Line 4061  do
4061    
4062    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4063    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4064    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4065    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4066    
4067    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4068    */    */
4069    
4070    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4071        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4072        !match_block.partial)        !md->partial)
4073      {      {
4074      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4075    
4076      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
4077      place we found it at last time. */      place we found it at last time. */
# Line 3527  do Line 4094  do
4094            }            }
4095          }          }
4096    
4097        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4098          forcing a match failure. */
4099    
4100        if (p >= end_subject) break;        if (p >= end_subject)
4101            {
4102            rc = MATCH_NOMATCH;
4103            break;
4104            }
4105    
4106        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4107        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3539  do Line 4111  do
4111        }        }
4112      }      }
4113    
4114    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4115    
4116    if (rc == MATCH_NOMATCH)    md->start_match = start_match;
4117      {    md->match_call_count = 0;
4118      if (firstline && *start_match == NEWLINE) break;    md->eptrn = 0;                          /* Next free eptrchain slot */
4119      start_match++;    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4120    
4121      /* Any return other than MATCH_NOMATCH breaks the loop. */
4122    
4123      if (rc != MATCH_NOMATCH) break;
4124    
4125      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4126      newline in the subject (though it may continue over the newline). Therefore,
4127      if we have just failed to match, starting at a newline, do not continue. */
4128    
4129      if (firstline && IS_NEWLINE(start_match)) break;
4130    
4131      /* Advance the match position by one character. */
4132    
4133      start_match++;
4134  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4135      if (match_block.utf8)    if (utf8)
4136        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4137          start_match++;        start_match++;
4138  #endif  #endif
     continue;  
     }  
4139    
4140    if (rc != MATCH_MATCH)    /* Break the loop if the pattern is anchored or if we have passed the end of
4141      {    the subject. */
4142      DPRINTF((">>>> error: returning %d\n", rc));  
4143      return rc;    if (anchored || start_match > end_subject) break;
     }  
4144    
4145    /* We have a match! Copy the offset information from temporary store if    /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4146    necessary */    are now at a LF, advance the match position by one more character. */
4147    
4148      if (start_match[-1] == '\r' &&
4149           (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4150           start_match < end_subject &&
4151           *start_match == '\n')
4152        start_match++;
4153    
4154      }   /* End of for(;;) "bumpalong" loop */
4155    
4156    /* ==========================================================================*/
4157    
4158    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4159    conditions is true:
4160    
4161    (1) The pattern is anchored;
4162    
4163    (2) We are past the end of the subject;
4164    
4165    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4166        this option requests that a match occur at or before the first newline in
4167        the subject.
4168    
4169    When we have a match and the offset vector is big enough to deal with any
4170    backreferences, captured substring offsets will already be set up. In the case
4171    where we had to get some local store to hold offsets for backreference
4172    processing, copy those that we can. In this case there need not be overflow if
4173    certain parts of the pattern were not used, even though there are more
4174    capturing parentheses than vector slots. */
4175    
4176    if (rc == MATCH_MATCH)
4177      {
4178    if (using_temporary_offsets)    if (using_temporary_offsets)
4179      {      {
4180      if (offsetcount >= 4)      if (offsetcount >= 4)
4181        {        {
4182        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4183          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4184        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4185        }        }
4186      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4187      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4188      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4189      }      }
4190    
4191    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4192      too many to fit into the vector. */
4193    
4194      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4195    
4196      /* If there is space, set up the whole thing as substring 0. */
4197    
4198    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4199      {      {
4200      offsets[0] = start_match - match_block.start_subject;      offsets[0] = start_match - md->start_subject;
4201      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4202      }      }
4203    
4204    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4205    return rc;    return rc;
4206    }    }
4207    
4208  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4209    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4210    
4211  if (using_temporary_offsets)  if (using_temporary_offsets)
4212    {    {
4213    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4214    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4215    }    }
4216    
4217  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4218      {
4219      DPRINTF((">>>> error: returning %d\n", rc));
4220      return rc;
4221      }
4222    else if (md->partial && md->hitend)
4223    {    {
4224    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4225    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.85  
changed lines
  Added in v.145

  ViewVC Help
Powered by ViewVC 1.1.5