/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 97 by ph10, Mon Mar 5 12:36:47 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2006 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
52    obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
53    
54  /* Structure for building a chain of data that actually lives on the  #define EPTR_WORK_SIZE (1000)
 stack, for holding the values of the subject pointer at the start of each  
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
55    
56  /* Flag bits for the match() function */  /* Flag bits for the match() function */
57    
58  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
59  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
60    #define match_tail_recursed  0x04  /* Tail recursive call */
61    
62  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
63  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 98  Returns:     nothing
98  static void  static void
99  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
100  {  {
101  int c;  unsigned int c;
102  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
103  while (length-- > 0)  while (length-- > 0)
104    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 125  Returns:      TRUE if matched
125  */  */
126    
127  static BOOL  static BOOL
128  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
129    unsigned long int ims)    unsigned long int ims)
130  {  {
131  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
132    
133  #ifdef DEBUG  #ifdef DEBUG
134  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 166  return TRUE;
166  ****************************************************************************  ****************************************************************************
167                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
168    
169  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
170  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
171  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
172  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
173  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
174    fine.
175  It turns out that on non-Unix systems there are problems with programs that  
176  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
177  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
178  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
179    been known for decades.) So....
180    
181  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
182  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
183  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
184  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
185  always used to.  always used to.
186  ****************************************************************************  ****************************************************************************
187  ***************************************************************************/  ***************************************************************************/
188    
189    
190  /* These versions of the macros use the stack, as normal */  /* These versions of the macros use the stack, as normal. There are debugging
191    versions and production versions. */
192    
193  #ifndef NO_RECURSE  #ifndef NO_RECURSE
194  #define REGISTER register  #define REGISTER register
195  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  #ifdef DEBUG
196    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
197      { \
198      printf("match() called in line %d\n", __LINE__); \
199      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
200      printf("to line %d\n", __LINE__); \
201      }
202    #define RRETURN(ra) \
203      { \
204      printf("match() returned %d from line %d ", ra, __LINE__); \
205      return ra; \
206      }
207    #else
208    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
209      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
210  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
211    #endif
212    
213  #else  #else
214    
215    
# Line 215  match(), which never changes. */ Line 230  match(), which never changes. */
230      newframe->Xims = re;\      newframe->Xims = re;\
231      newframe->Xeptrb = rf;\      newframe->Xeptrb = rf;\
232      newframe->Xflags = rg;\      newframe->Xflags = rg;\
233        newframe->Xrdepth = frame->Xrdepth + 1;\
234      newframe->Xprevframe = frame;\      newframe->Xprevframe = frame;\
235      frame = newframe;\      frame = newframe;\
236      DPRINTF(("restarting from line %d\n", __LINE__));\      DPRINTF(("restarting from line %d\n", __LINE__));\
# Line 256  typedef struct heapframe { Line 272  typedef struct heapframe {
272    long int Xims;    long int Xims;
273    eptrblock *Xeptrb;    eptrblock *Xeptrb;
274    int Xflags;    int Xflags;
275      unsigned int Xrdepth;
276    
277    /* Function local variables */    /* Function local variables */
278    
# Line 271  typedef struct heapframe { Line 288  typedef struct heapframe {
288    
289    BOOL Xcur_is_word;    BOOL Xcur_is_word;
290    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
291    BOOL Xprev_is_word;    BOOL Xprev_is_word;
292    
293    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
294    
295  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
296    int Xprop_type;    int Xprop_type;
297      int Xprop_value;
298    int Xprop_fail_result;    int Xprop_fail_result;
299    int Xprop_category;    int Xprop_category;
300    int Xprop_chartype;    int Xprop_chartype;
301    int Xprop_othercase;    int Xprop_script;
   int Xprop_test_against;  
   int *Xprop_test_variable;  
302  #endif  #endif
303    
304    int Xctype;    int Xctype;
305    int Xfc;    unsigned int Xfc;
306    int Xfi;    int Xfi;
307    int Xlength;    int Xlength;
308    int Xmax;    int Xmax;
# Line 320  typedef struct heapframe { Line 335  typedef struct heapframe {
335  *         Match from current position            *  *         Match from current position            *
336  *************************************************/  *************************************************/
337    
338  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
339  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
340  same response.  same response.
341    
# Line 333  performance. Tests using gcc on a SPARC Line 345  performance. Tests using gcc on a SPARC
345  made performance worse.  made performance worse.
346    
347  Arguments:  Arguments:
348     eptr        pointer in subject     eptr        pointer to current character in subject
349     ecode       position in code     ecode       pointer to current position in compiled code
350     offset_top  current top pointer     offset_top  current top pointer
351     md          pointer to "static" info for the match     md          pointer to "static" info for the match
352     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 354  Arguments:
354                   brackets - for testing for empty matches                   brackets - for testing for empty matches
355     flags       can contain     flags       can contain
356                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
357                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
358                       group that can match an empty string
359                     match_tail_recursed - this is a tail_recursed group
360       rdepth      the recursion depth
361    
362  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
363                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
364                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
365                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
366  */  */
367    
368  static int  static int
369  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
370    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
371    int flags)    int flags, unsigned int rdepth)
372  {  {
373  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
374  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
375  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
376    
377  register int  rrc;    /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
378  register int  i;      /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
379  register int  c;      /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
380  register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
381    
382    BOOL minimize, possessive; /* Quantifier options */
383    
384  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
385  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 381  frame->Xoffset_top = offset_top; Line 398  frame->Xoffset_top = offset_top;
398  frame->Xims = ims;  frame->Xims = ims;
399  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
400  frame->Xflags = flags;  frame->Xflags = flags;
401    frame->Xrdepth = rdepth;
402    
403  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
404    
# Line 394  HEAP_RECURSE: Line 412  HEAP_RECURSE:
412  #define ims                frame->Xims  #define ims                frame->Xims
413  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
414  #define flags              frame->Xflags  #define flags              frame->Xflags
415    #define rdepth             frame->Xrdepth
416    
417  /* Ditto for the local variables */  /* Ditto for the local variables */
418    
# Line 411  HEAP_RECURSE: Line 430  HEAP_RECURSE:
430    
431  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
432  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
433  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
434    
435  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
436    
437  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
438  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
439    #define prop_value         frame->Xprop_value
440  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
441  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
442  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
443  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
 #define prop_test_against  frame->Xprop_test_against  
 #define prop_test_variable frame->Xprop_test_variable  
444  #endif  #endif
445    
446  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 464  HEAP_RECURSE:
464  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
465  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
466    
467  #else  #else         /* NO_RECURSE not defined */
468  #define fi i  #define fi i
469  #define fc c  #define fc c
470    
471    
472  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
473  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
474  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
475  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
476  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
477  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
478  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
479  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
480  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
481                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
482  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
483                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
484  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
485  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
486  BOOL prev_is_word;  BOOL prev_is_word;
487    
488  unsigned long int original_ims;  unsigned long int original_ims;
489    
490  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
491  int prop_type;  int prop_type;
492    int prop_value;
493  int prop_fail_result;  int prop_fail_result;
494  int prop_category;  int prop_category;
495  int prop_chartype;  int prop_chartype;
496  int prop_othercase;  int prop_script;
 int prop_test_against;  
 int *prop_test_variable;  
497  #endif  #endif
498    
499  int ctype;  int ctype;
# Line 493  int save_offset1, save_offset2, save_off Line 508  int save_offset1, save_offset2, save_off
508  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
509    
510  eptrblock newptrb;  eptrblock newptrb;
511  #endif  #endif     /* NO_RECURSE */
512    
513  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
514  variables. */  variables. */
515    
516  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
517    prop_value = 0;
518  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
519  #endif  #endif
520    
521  /* OK, now we can get on with the real code of the function. Recursion is  
522  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
523  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
524  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
525  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
526  performance when true recursion is being used. */  
527    TAIL_RECURSE:
528    
529    /* OK, now we can get on with the real code of the function. Recursive calls
530    are specified by the macro RMATCH and RRETURN is used to return. When
531    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
532    and a "return", respectively (possibly with some debugging if DEBUG is
533    defined). However, RMATCH isn't like a function call because it's quite a
534    complicated macro. It has to be used in one particular way. This shouldn't,
535    however, impact performance when true recursion is being used. */
536    
537    /* First check that we haven't called match() too many times, or that we
538    haven't exceeded the recursive call limit. */
539    
540  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
541    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
542    
543  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
544    
545    #ifdef SUPPORT_UTF8
546  utf8 = md->utf8;       /* Local copy of the flag */  utf8 = md->utf8;       /* Local copy of the flag */
547    #else
548    utf8 = FALSE;
549    #endif
550    
551  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
552  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
553  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
554  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
555    When match() is called in other circumstances, don't add to the chain. If this
556    is a tail recursion, use a block from the workspace, as the one on the stack is
557    already used. */
558    
559  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
560    {    {
561    newptrb.epb_prev = eptrb;    eptrblock *p;
562    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
563    eptrb = &newptrb;      {
564        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
565        p = md->eptrchain + md->eptrn++;
566        }
567      else p = &newptrb;
568      p->epb_saved_eptr = eptr;
569      p->epb_prev = eptrb;
570      eptrb = p;
571    }    }
572    
573  /* Now start processing the operations. */  /* Now start processing the opcodes. */
574    
575  for (;;)  for (;;)
576    {    {
577      minimize = possessive = FALSE;
578    op = *ecode;    op = *ecode;
   minimize = FALSE;  
579    
580    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
581    matching at least one subject character. */    matching at least one subject character. */
# Line 543  for (;;) Line 585  for (;;)
585        eptr > md->start_match)        eptr > md->start_match)
586      md->hitend = TRUE;      md->hitend = TRUE;
587    
588    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
589      {      {
590      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
591        the current subject position in the working slot at the top of the vector.
592      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
593      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
594        reference inside the group.
595      if (number > EXTRACT_BASIC_MAX)  
596        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
597        values of the final offsets, in case they were set by a previous iteration
598        of the same bracket.
599    
600        If there isn't enough space in the offset vector, treat this as if it were
601        a non-capturing bracket. Don't worry about setting the flag for the error
602        case here; that is handled in the code for KET. */
603    
604        case OP_CBRA:
605        case OP_SCBRA:
606        number = GET2(ecode, 1+LINK_SIZE);
607      offset = number << 1;      offset = number << 1;
608    
609  #ifdef DEBUG  #ifdef DEBUG
610      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
611        printf("subject=");
612      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
613      printf("\n");      printf("\n");
614  #endif  #endif
# Line 584  for (;;) Line 623  for (;;)
623        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
624        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
625    
626          flags = (op == OP_SCBRA)? match_cbegroup : 0;
627        do        do
628          {          {
629          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
630            match_isgroup);            ims, eptrb, flags);
631          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
632          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
633          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 603  for (;;) Line 643  for (;;)
643        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
644        }        }
645    
646      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
647        bracket. */
648    
649      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
650    
651    /* Other types of node can be handled by a switch */      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
652        final alternative within the brackets, we would return the result of a
653        recursive call to match() whatever happened. We can reduce stack usage by
654        turning this into a tail recursion. */
655    
656    switch(op)      case OP_BRA:
657      {      case OP_SBRA:
658      case OP_BRA:     /* Non-capturing bracket: optimized */      DPRINTF(("start non-capturing bracket\n"));
659      DPRINTF(("start bracket 0\n"));      flags = (op >= OP_SBRA)? match_cbegroup : 0;
660      do      for (;;)
661        {        {
662        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)
663          match_isgroup);          {
664            ecode += _pcre_OP_lengths[*ecode];
665            flags |= match_tail_recursed;
666            DPRINTF(("bracket 0 tail recursion\n"));
667            goto TAIL_RECURSE;
668            }
669    
670          /* For non-final alternatives, continue the loop for a NOMATCH result;
671          otherwise return. */
672    
673          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
674            eptrb, flags);
675        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
676        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
677        }        }
678      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
679    
680      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
681      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
682      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
683      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
684        obeyed, we can use tail recursion to avoid using another stack frame. */
685    
686      case OP_COND:      case OP_COND:
687      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
688        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
689          {
690          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
691          condition = md->recursive != NULL &&
692            (offset == RREF_ANY || offset == md->recursive->group_num);
693          ecode += condition? 3 : GET(ecode, 1);
694          }
695    
696        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
697        {        {
698        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
699        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
700          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
701          (offset < offset_top && md->offset_vector[offset] >= 0);        }
702        RMATCH(rrc, eptr, ecode + (condition?  
703          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
704          offset_top, md, ims, eptrb, match_isgroup);        {
705        RRETURN(rrc);        condition = FALSE;
706          ecode += GET(ecode, 1);
707        }        }
708    
709      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
710      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
711        assertion. */
712    
713      else      else
714        {        {
715        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
716            match_condassert | match_isgroup);            match_condassert);
717        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
718          {          {
719          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
720            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
721          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
722          }          }
723        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
724          {          {
725          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
726          }          }
727        else ecode += GET(ecode, 1);        else
728        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
729          match_isgroup);          condition = FALSE;
730        RRETURN(rrc);          ecode += GET(ecode, 1);
731            }
732        }        }
     /* Control never reaches here */  
733    
734      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
735      encountered. */      we can use tail recursion to avoid using another stack frame. If the second
736        alternative doesn't exist, we can just plough on. */
737    
738      case OP_CREF:      if (condition || *ecode == OP_ALT)
739      case OP_BRANUMBER:        {
740      ecode += 3;        ecode += 1 + LINK_SIZE;
741          flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
742          goto TAIL_RECURSE;
743          }
744        else
745          {
746          ecode += 1 + LINK_SIZE;
747          }
748      break;      break;
749    
750      /* End of the pattern. If we are in a recursion, we should restore the  
751      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
752        restore the offsets appropriately and continue from after the call. */
753    
754      case OP_END:      case OP_END:
755      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
756        {        {
757        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
758        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
759        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
760        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
761          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
# Line 717  for (;;) Line 791  for (;;)
791      case OP_ASSERTBACK:      case OP_ASSERTBACK:
792      do      do
793        {        {
794        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
795        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
796        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 744  for (;;) Line 817  for (;;)
817      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
818      do      do
819        {        {
820        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
821        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
822        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
823        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 766  for (;;) Line 838  for (;;)
838  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
839      if (utf8)      if (utf8)
840        {        {
841        c = GET(ecode,1);        i = GET(ecode, 1);
842        for (i = 0; i < c; i++)        while (i-- > 0)
843          {          {
844          eptr--;          eptr--;
845          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 780  for (;;) Line 852  for (;;)
852      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
853    
854        {        {
855        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
856        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
857        }        }
858    
# Line 800  for (;;) Line 872  for (;;)
872        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
873        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
874        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
875        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
876        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
877        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = md->start_match - md->start_subject;
878        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
# Line 837  for (;;) Line 909  for (;;)
909      case OP_RECURSE:      case OP_RECURSE:
910        {        {
911        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
912        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
913            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
914    
915        /* Add to "recursing stack" */        /* Add to "recursing stack" */
916    
# Line 876  for (;;) Line 943  for (;;)
943        restore the offset and recursion data. */        restore the offset and recursion data. */
944    
945        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
946          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
947        do        do
948          {          {
949          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
950              eptrb, match_isgroup);            md, ims, eptrb, flags);
951          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
952            {            {
953              DPRINTF(("Recursion matched\n"));
954            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
955            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
956              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
957            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
958            }            }
959          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH)
960              {
961              DPRINTF(("Recursion gave error %d\n", rrc));
962              RRETURN(rrc);
963              }
964    
965          md->recursive = &new_recursive;          md->recursive = &new_recursive;
966          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 985  for (;;)
985      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
986    
987      case OP_ONCE:      case OP_ONCE:
988        {      prev = ecode;
989        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
990    
991        do      do
992          {        {
993          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
994            eptrb, match_isgroup);          eptrb, 0);
995          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
996          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
997          ecode += GET(ecode,1);        ecode += GET(ecode,1);
998          }        }
999        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1000    
1001        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1002    
1003        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1004    
1005        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1006        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1007    
1008        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1009    
1010        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1011        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1012    
1013        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1014        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1015        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1016        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1017        course of events. */      course of events. */
1018    
1019        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1020          {        {
1021          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1022          break;        break;
1023          }        }
1024    
1025        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1026        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1027        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1028        opcode. */      any options that changed within the bracket before re-running it, so
1029        check the next opcode. */
1030    
1031        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1032          {        {
1033          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1034          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1035          }        }
1036    
1037        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1038          {        {
1039          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1040          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1041          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1042          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = match_tail_recursed;
1043          }        goto TAIL_RECURSE;
1044        else  /* OP_KETRMAX */        }
1045          {      else  /* OP_KETRMAX */
1046          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1047          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1048          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1049          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1050          }        flags = match_tail_recursed;
1051          goto TAIL_RECURSE;
1052        }        }
1053      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1054    
1055      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1056      bracketed group and go to there. */      bracketed group and go to there. */
# Line 994  for (;;) Line 1068  for (;;)
1068      case OP_BRAZERO:      case OP_BRAZERO:
1069        {        {
1070        next = ecode+1;        next = ecode+1;
1071        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1072        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1073        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1074        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1075        }        }
1076      break;      break;
1077    
1078      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1079        {        {
1080        next = ecode+1;        next = ecode+1;
1081        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1082        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
         match_isgroup);  
1083        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1084        ecode++;        ecode++;
1085        }        }
1086      break;      break;
1087    
1088      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1089    
1090      case OP_KET:      case OP_KET:
1091      case OP_KETRMIN:      case OP_KETRMIN:
1092      case OP_KETRMAX:      case OP_KETRMAX:
1093        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1094    
1095        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1096        infinite repeats of empty string matches, retrieve the subject start from
1097        the chain. Otherwise, set it NULL. */
1098    
1099        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1100          {
1101        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1102            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1103            *prev == OP_ONCE)        }
1104          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1105    
1106        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1107        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1108        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1109    
1110        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1111          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1112          number = *prev - OP_BRA;          *prev == OP_ONCE)
1113          {
1114          md->end_match_ptr = eptr;      /* For ONCE */
1115          md->end_offset_top = offset_top;
1116          RRETURN(MATCH_MATCH);
1117          }
1118    
1119          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1120          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1121        bumping the high water mark. Note that whole-pattern recursion is coded as
1122        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1123        when the OP_END is reached. Other recursion is handled here. */
1124    
1125          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1126          offset = number << 1;        {
1127          number = GET2(prev, 1+LINK_SIZE);
1128          offset = number << 1;
1129    
1130  #ifdef DEBUG  #ifdef DEBUG
1131          printf("end bracket %d", number);        printf("end bracket %d", number);
1132          printf("\n");        printf("\n");
1133  #endif  #endif
1134    
1135          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1136          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1137          into group 0, so it won't be picked up here. Instead, we catch it when          {
1138          the OP_END is reached. */          md->offset_vector[offset] =
1139              md->offset_vector[md->offset_end - number];
1140          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1141            {          if (offset_top <= offset) offset_top = offset + 2;
1142            md->capture_last = number;          }
1143            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1144              {        /* Handle a recursively called group. Restore the offsets
1145              md->offset_vector[offset] =        appropriately and continue from after the call. */
1146                md->offset_vector[md->offset_end - number];  
1147              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1148              if (offset_top <= offset) offset_top = offset + 2;          {
1149              }          recursion_info *rec = md->recursive;
1150            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1151            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1152            appropriately and continue from after the call. */          md->start_match = rec->save_start;
1153            memcpy(md->offset_vector, rec->offset_save,
1154            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1155              {          ecode = rec->after_call;
1156              recursion_info *rec = md->recursive;          ims = original_ims;
1157              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1158          }          }
1159          }
1160    
1161        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1162        the group. */      flags, in case they got changed during the group. */
1163    
1164        ims = original_ims;      ims = original_ims;
1165        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1166    
1167        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1168        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1169        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1170        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1171        course of events. */      course of events. */
1172    
1173        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1174          {        {
1175          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1176          break;        break;
1177          }        }
1178    
1179        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1180        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1181        tail recursion to avoid using another stack frame. */
1182    
1183        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       }  
1184    
1185      RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KETRMIN)
1186          {
1187          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1188          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1189          ecode = prev;
1190          flags |= match_tail_recursed;
1191          goto TAIL_RECURSE;
1192          }
1193        else  /* OP_KETRMAX */
1194          {
1195          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1196          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1197          ecode += 1 + LINK_SIZE;
1198          flags = match_tail_recursed;
1199          goto TAIL_RECURSE;
1200          }
1201        /* Control never gets here */
1202    
1203      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1204    
# Line 1135  for (;;) Line 1206  for (;;)
1206      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1207      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1208        {        {
1209        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1210              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1211          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1212        ecode++;        ecode++;
1213        break;        break;
# Line 1163  for (;;) Line 1235  for (;;)
1235      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1236        {        {
1237        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1238          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1239        else        else
1240          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1241        ecode++;        ecode++;
# Line 1174  for (;;) Line 1246  for (;;)
1246        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1247        if (!md->endonly)        if (!md->endonly)
1248          {          {
1249          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1250             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1251            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1252          ecode++;          ecode++;
1253          break;          break;
1254          }          }
1255        }        }
1256      /* ... else fall through */      /* ... else fall through for endonly */
1257    
1258      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1259    
# Line 1193  for (;;) Line 1265  for (;;)
1265      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1266    
1267      case OP_EODN:      case OP_EODN:
1268      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1269         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1270          RRETURN(MATCH_NOMATCH);
1271      ecode++;      ecode++;
1272      break;      break;
1273    
# Line 1247  for (;;) Line 1320  for (;;)
1320      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1321    
1322      case OP_ANY:      case OP_ANY:
1323      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1324        RRETURN(MATCH_NOMATCH);        {
1325          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1326          }
1327      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1328      if (utf8)      if (utf8)
1329        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1330      ecode++;      ecode++;
1331      break;      break;
1332    
# Line 1343  for (;;) Line 1416  for (;;)
1416      ecode++;      ecode++;
1417      break;      break;
1418    
1419        case OP_ANYNL:
1420        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1421        GETCHARINCTEST(c, eptr);
1422        switch(c)
1423          {
1424          default: RRETURN(MATCH_NOMATCH);
1425          case 0x000d:
1426          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1427          break;
1428          case 0x000a:
1429          case 0x000b:
1430          case 0x000c:
1431          case 0x0085:
1432          case 0x2028:
1433          case 0x2029:
1434          break;
1435          }
1436        ecode++;
1437        break;
1438    
1439  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1440      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1441      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1352  for (;;) Line 1445  for (;;)
1445      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1446      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1447        {        {
1448        int chartype, rqdtype;        int chartype, script;
1449        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
1450    
1451        if (rqdtype >= 128)        switch(ecode[1])
1452          {          {
1453          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1454            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1455            break;
1456    
1457            case PT_LAMP:
1458            if ((chartype == ucp_Lu ||
1459                 chartype == ucp_Ll ||
1460                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1461            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1462          }           break;
1463        else  
1464          {          case PT_GC:
1465          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1466              RRETURN(MATCH_NOMATCH);
1467            break;
1468    
1469            case PT_PC:
1470            if ((ecode[2] != chartype) == (op == OP_PROP))
1471            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1472            break;
1473    
1474            case PT_SC:
1475            if ((ecode[2] != script) == (op == OP_PROP))
1476              RRETURN(MATCH_NOMATCH);
1477            break;
1478    
1479            default:
1480            RRETURN(PCRE_ERROR_INTERNAL);
1481          }          }
1482    
1483          ecode += 3;
1484        }        }
1485      break;      break;
1486    
# Line 1379  for (;;) Line 1491  for (;;)
1491      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1492      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1493        {        {
1494        int chartype;        int chartype, script;
1495        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = ucp_findchar(c, &chartype, &othercase);  
1496        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1497        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1498          {          {
# Line 1390  for (;;) Line 1501  for (;;)
1501            {            {
1502            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1503            }            }
1504          category = ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1505          if (category != ucp_M) break;          if (category != ucp_M) break;
1506          eptr += len;          eptr += len;
1507          }          }
# Line 1683  for (;;) Line 1794  for (;;)
1794            while (eptr >= pp)            while (eptr >= pp)
1795              {              {
1796              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
             eptr--;  
1797              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1798                eptr--;
1799              }              }
1800            }            }
1801    
# Line 1836  for (;;) Line 1947  for (;;)
1947    
1948        else        else
1949          {          {
1950          int dc;          unsigned int dc;
1951          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1952          ecode += length;          ecode += length;
1953    
1954          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
1955          case of the character, if there is one. The result of ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
1956    
1957          if (fc != dc)          if (fc != dc)
1958            {            {
1959  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1960            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
1961  #endif  #endif
1962              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1963            }            }
# Line 1867  for (;;) Line 1974  for (;;)
1974        }        }
1975      break;      break;
1976    
1977      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
1978    
1979      case OP_EXACT:      case OP_EXACT:
1980      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
1981      ecode += 3;      ecode += 3;
1982      goto REPEATCHAR;      goto REPEATCHAR;
1983    
1984        case OP_POSUPTO:
1985        possessive = TRUE;
1986        /* Fall through */
1987    
1988      case OP_UPTO:      case OP_UPTO:
1989      case OP_MINUPTO:      case OP_MINUPTO:
1990      min = 0;      min = 0;
# Line 1882  for (;;) Line 1993  for (;;)
1993      ecode += 3;      ecode += 3;
1994      goto REPEATCHAR;      goto REPEATCHAR;
1995    
1996        case OP_POSSTAR:
1997        possessive = TRUE;
1998        min = 0;
1999        max = INT_MAX;
2000        ecode++;
2001        goto REPEATCHAR;
2002    
2003        case OP_POSPLUS:
2004        possessive = TRUE;
2005        min = 1;
2006        max = INT_MAX;
2007        ecode++;
2008        goto REPEATCHAR;
2009    
2010        case OP_POSQUERY:
2011        possessive = TRUE;
2012        min = 0;
2013        max = 1;
2014        ecode++;
2015        goto REPEATCHAR;
2016    
2017      case OP_STAR:      case OP_STAR:
2018      case OP_MINSTAR:      case OP_MINSTAR:
2019      case OP_PLUS:      case OP_PLUS:
# Line 1917  for (;;) Line 2049  for (;;)
2049          uschar occhars[8];          uschar occhars[8];
2050    
2051  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2052          int othercase;          unsigned int othercase;
         int chartype;  
2053          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2054               ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase > 0)  
2055            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2056  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2057    
# Line 1957  for (;;) Line 2087  for (;;)
2087              }              }
2088            /* Control never gets here */            /* Control never gets here */
2089            }            }
2090          else  
2091            else  /* Maximize */
2092            {            {
2093            pp = eptr;            pp = eptr;
2094            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 1971  for (;;) Line 2102  for (;;)
2102                eptr += oclength;                eptr += oclength;
2103                }                }
2104              }              }
2105    
2106              if (possessive) continue;
2107            while (eptr >= pp)            while (eptr >= pp)
2108             {             {
2109             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2025  for (;;) Line 2158  for (;;)
2158            }            }
2159          /* Control never gets here */          /* Control never gets here */
2160          }          }
2161        else        else  /* Maximize */
2162          {          {
2163          pp = eptr;          pp = eptr;
2164          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2166  for (;;)
2166            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2167            eptr++;            eptr++;
2168            }            }
2169            if (possessive) continue;
2170          while (eptr >= pp)          while (eptr >= pp)
2171            {            {
2172            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2061  for (;;) Line 2195  for (;;)
2195            }            }
2196          /* Control never gets here */          /* Control never gets here */
2197          }          }
2198        else        else  /* Maximize */
2199          {          {
2200          pp = eptr;          pp = eptr;
2201          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2203  for (;;)
2203            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2204            eptr++;            eptr++;
2205            }            }
2206            if (possessive) continue;
2207          while (eptr >= pp)          while (eptr >= pp)
2208            {            {
2209            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2121  for (;;) Line 2256  for (;;)
2256      ecode += 3;      ecode += 3;
2257      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2258    
2259        case OP_NOTPOSSTAR:
2260        possessive = TRUE;
2261        min = 0;
2262        max = INT_MAX;
2263        ecode++;
2264        goto REPEATNOTCHAR;
2265    
2266        case OP_NOTPOSPLUS:
2267        possessive = TRUE;
2268        min = 1;
2269        max = INT_MAX;
2270        ecode++;
2271        goto REPEATNOTCHAR;
2272    
2273        case OP_NOTPOSQUERY:
2274        possessive = TRUE;
2275        min = 0;
2276        max = 1;
2277        ecode++;
2278        goto REPEATNOTCHAR;
2279    
2280        case OP_NOTPOSUPTO:
2281        possessive = TRUE;
2282        min = 0;
2283        max = GET2(ecode, 1);
2284        ecode += 3;
2285        goto REPEATNOTCHAR;
2286    
2287      case OP_NOTSTAR:      case OP_NOTSTAR:
2288      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2289      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2160  for (;;) Line 2323  for (;;)
2323        /* UTF-8 mode */        /* UTF-8 mode */
2324        if (utf8)        if (utf8)
2325          {          {
2326          register int d;          register unsigned int d;
2327          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2328            {            {
2329            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2185  for (;;) Line 2348  for (;;)
2348          /* UTF-8 mode */          /* UTF-8 mode */
2349          if (utf8)          if (utf8)
2350            {            {
2351            register int d;            register unsigned int d;
2352            for (fi = min;; fi++)            for (fi = min;; fi++)
2353              {              {
2354              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2221  for (;;) Line 2384  for (;;)
2384          /* UTF-8 mode */          /* UTF-8 mode */
2385          if (utf8)          if (utf8)
2386            {            {
2387            register int d;            register unsigned int d;
2388            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2389              {              {
2390              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2394  for (;;)
2394              if (fc == d) break;              if (fc == d) break;
2395              eptr += len;              eptr += len;
2396              }              }
2397            for(;;)          if (possessive) continue;
2398            for(;;)
2399              {              {
2400              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2401              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 2248  for (;;) Line 2412  for (;;)
2412              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2413              eptr++;              eptr++;
2414              }              }
2415              if (possessive) continue;
2416            while (eptr >= pp)            while (eptr >= pp)
2417              {              {
2418              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2269  for (;;) Line 2434  for (;;)
2434        /* UTF-8 mode */        /* UTF-8 mode */
2435        if (utf8)        if (utf8)
2436          {          {
2437          register int d;          register unsigned int d;
2438          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2439            {            {
2440            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2292  for (;;) Line 2457  for (;;)
2457          /* UTF-8 mode */          /* UTF-8 mode */
2458          if (utf8)          if (utf8)
2459            {            {
2460            register int d;            register unsigned int d;
2461            for (fi = min;; fi++)            for (fi = min;; fi++)
2462              {              {
2463              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2327  for (;;) Line 2492  for (;;)
2492          /* UTF-8 mode */          /* UTF-8 mode */
2493          if (utf8)          if (utf8)
2494            {            {
2495            register int d;            register unsigned int d;
2496            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2497              {              {
2498              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2501  for (;;)
2501              if (fc == d) break;              if (fc == d) break;
2502              eptr += len;              eptr += len;
2503              }              }
2504              if (possessive) continue;
2505            for(;;)            for(;;)
2506              {              {
2507              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2353  for (;;) Line 2519  for (;;)
2519              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2520              eptr++;              eptr++;
2521              }              }
2522              if (possessive) continue;
2523            while (eptr >= pp)            while (eptr >= pp)
2524              {              {
2525              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2384  for (;;) Line 2551  for (;;)
2551      ecode += 3;      ecode += 3;
2552      goto REPEATTYPE;      goto REPEATTYPE;
2553    
2554        case OP_TYPEPOSSTAR:
2555        possessive = TRUE;
2556        min = 0;
2557        max = INT_MAX;
2558        ecode++;
2559        goto REPEATTYPE;
2560    
2561        case OP_TYPEPOSPLUS:
2562        possessive = TRUE;
2563        min = 1;
2564        max = INT_MAX;
2565        ecode++;
2566        goto REPEATTYPE;
2567    
2568        case OP_TYPEPOSQUERY:
2569        possessive = TRUE;
2570        min = 0;
2571        max = 1;
2572        ecode++;
2573        goto REPEATTYPE;
2574    
2575        case OP_TYPEPOSUPTO:
2576        possessive = TRUE;
2577        min = 0;
2578        max = GET2(ecode, 1);
2579        ecode += 3;
2580        goto REPEATTYPE;
2581    
2582      case OP_TYPESTAR:      case OP_TYPESTAR:
2583      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2584      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 2603  for (;;)
2603        {        {
2604        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2605        prop_type = *ecode++;        prop_type = *ecode++;
2606        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2607        }        }
2608      else prop_type = -1;      else prop_type = -1;
2609  #endif  #endif
# Line 2434  for (;;) Line 2620  for (;;)
2620      if (min > 0)      if (min > 0)
2621        {        {
2622  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2623        if (prop_type > 0)        if (prop_type >= 0)
2624          {          {
2625          for (i = 1; i <= min; i++)          switch(prop_type)
2626            {            {
2627            GETCHARINC(c, eptr);            case PT_ANY:
2628            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2629            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2630              RRETURN(MATCH_NOMATCH);              {
2631                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2632                GETCHARINC(c, eptr);
2633                }
2634              break;
2635    
2636              case PT_LAMP:
2637              for (i = 1; i <= min; i++)
2638                {
2639                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2640                GETCHARINC(c, eptr);
2641                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2642                if ((prop_chartype == ucp_Lu ||
2643                     prop_chartype == ucp_Ll ||
2644                     prop_chartype == ucp_Lt) == prop_fail_result)
2645                  RRETURN(MATCH_NOMATCH);
2646                }
2647              break;
2648    
2649              case PT_GC:
2650              for (i = 1; i <= min; i++)
2651                {
2652                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2653                GETCHARINC(c, eptr);
2654                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2655                if ((prop_category == prop_value) == prop_fail_result)
2656                  RRETURN(MATCH_NOMATCH);
2657                }
2658              break;
2659    
2660              case PT_PC:
2661              for (i = 1; i <= min; i++)
2662                {
2663                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2664                GETCHARINC(c, eptr);
2665                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2666                if ((prop_chartype == prop_value) == prop_fail_result)
2667                  RRETURN(MATCH_NOMATCH);
2668                }
2669              break;
2670    
2671              case PT_SC:
2672              for (i = 1; i <= min; i++)
2673                {
2674                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2675                GETCHARINC(c, eptr);
2676                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2677                if ((prop_script == prop_value) == prop_fail_result)
2678                  RRETURN(MATCH_NOMATCH);
2679                }
2680              break;
2681    
2682              default:
2683              RRETURN(PCRE_ERROR_INTERNAL);
2684            }            }
2685          }          }
2686    
# Line 2453  for (;;) Line 2692  for (;;)
2692          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2693            {            {
2694            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2695            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2696            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2697            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2698              {              {
# Line 2462  for (;;) Line 2701  for (;;)
2701                {                {
2702                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2703                }                }
2704              prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2705              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2706              eptr += len;              eptr += len;
2707              }              }
# Line 2481  for (;;) Line 2720  for (;;)
2720          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2721            {            {
2722            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2723               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2724              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2725              eptr++;
2726            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2727            }            }
2728          break;          break;
# Line 2491  for (;;) Line 2731  for (;;)
2731          eptr += min;          eptr += min;
2732          break;          break;
2733    
2734            case OP_ANYNL:
2735            for (i = 1; i <= min; i++)
2736              {
2737              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2738              GETCHARINC(c, eptr);
2739              switch(c)
2740                {
2741                default: RRETURN(MATCH_NOMATCH);
2742                case 0x000d:
2743                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2744                break;
2745                case 0x000a:
2746                case 0x000b:
2747                case 0x000c:
2748                case 0x0085:
2749                case 0x2028:
2750                case 0x2029:
2751                break;
2752                }
2753              }
2754            break;
2755    
2756          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2757          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2758            {            {
# Line 2559  for (;;) Line 2821  for (;;)
2821  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2822    
2823        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2824        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2825          number of bytes present, as this was tested above. */
2826    
2827        switch(ctype)        switch(ctype)
2828          {          {
# Line 2567  for (;;) Line 2830  for (;;)
2830          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
2831            {            {
2832            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2833              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
2834                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2835                eptr++;
2836                }
2837            }            }
2838          else eptr += min;          else eptr += min;
2839          break;          break;
# Line 2576  for (;;) Line 2842  for (;;)
2842          eptr += min;          eptr += min;
2843          break;          break;
2844    
2845            /* Because of the CRLF case, we can't assume the minimum number of
2846            bytes are present in this case. */
2847    
2848            case OP_ANYNL:
2849            for (i = 1; i <= min; i++)
2850              {
2851              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2852              switch(*eptr++)
2853                {
2854                default: RRETURN(MATCH_NOMATCH);
2855                case 0x000d:
2856                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2857                break;
2858                case 0x000a:
2859                case 0x000b:
2860                case 0x000c:
2861                case 0x0085:
2862                break;
2863                }
2864              }
2865            break;
2866    
2867          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2868          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2869            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2624  for (;;) Line 2912  for (;;)
2912      if (minimize)      if (minimize)
2913        {        {
2914  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2915        if (prop_type > 0)        if (prop_type >= 0)
2916          {          {
2917          for (fi = min;; fi++)          switch(prop_type)
2918            {            {
2919            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
2920            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
2921            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
2922            GETCHARINC(c, eptr);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2923            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2924            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2925              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
2926                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2927                }
2928              /* Control never gets here */
2929    
2930              case PT_LAMP:
2931              for (fi = min;; fi++)
2932                {
2933                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2934                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2935                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2936                GETCHARINC(c, eptr);
2937                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2938                if ((prop_chartype == ucp_Lu ||
2939                     prop_chartype == ucp_Ll ||
2940                     prop_chartype == ucp_Lt) == prop_fail_result)
2941                  RRETURN(MATCH_NOMATCH);
2942                }
2943              /* Control never gets here */
2944    
2945              case PT_GC:
2946              for (fi = min;; fi++)
2947                {
2948                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2949                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2950                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951                GETCHARINC(c, eptr);
2952                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2953                if ((prop_category == prop_value) == prop_fail_result)
2954                  RRETURN(MATCH_NOMATCH);
2955                }
2956              /* Control never gets here */
2957    
2958              case PT_PC:
2959              for (fi = min;; fi++)
2960                {
2961                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2962                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2963                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2964                GETCHARINC(c, eptr);
2965                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2966                if ((prop_chartype == prop_value) == prop_fail_result)
2967                  RRETURN(MATCH_NOMATCH);
2968                }
2969              /* Control never gets here */
2970    
2971              case PT_SC:
2972              for (fi = min;; fi++)
2973                {
2974                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2975                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2976                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2977                GETCHARINC(c, eptr);
2978                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2979                if ((prop_script == prop_value) == prop_fail_result)
2980                  RRETURN(MATCH_NOMATCH);
2981                }
2982              /* Control never gets here */
2983    
2984              default:
2985              RRETURN(PCRE_ERROR_INTERNAL);
2986            }            }
2987          }          }
2988    
# Line 2649  for (;;) Line 2997  for (;;)
2997            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2998            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2999            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3000            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3001            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3002            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3003              {              {
# Line 2658  for (;;) Line 3006  for (;;)
3006                {                {
3007                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3008                }                }
3009              prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3010              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3011              eptr += len;              eptr += len;
3012              }              }
# Line 2676  for (;;) Line 3024  for (;;)
3024            {            {
3025            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3026            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3027            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3028                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3029                    IS_NEWLINE(eptr)))
3030                RRETURN(MATCH_NOMATCH);
3031    
3032            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3033            switch(ctype)            switch(ctype)
3034              {              {
3035              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3036              break;              break;
3037    
3038              case OP_ANYBYTE:              case OP_ANYBYTE:
3039              break;              break;
3040    
3041                case OP_ANYNL:
3042                switch(c)
3043                  {
3044                  default: RRETURN(MATCH_NOMATCH);
3045                  case 0x000d:
3046                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3047                  break;
3048                  case 0x000a:
3049                  case 0x000b:
3050                  case 0x000c:
3051                  case 0x0085:
3052                  case 0x2028:
3053                  case 0x2029:
3054                  break;
3055                  }
3056                break;
3057    
3058              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3059              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3060                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2731  for (;;) Line 3098  for (;;)
3098            {            {
3099            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3100            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3101            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3102                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3103                RRETURN(MATCH_NOMATCH);
3104    
3105            c = *eptr++;            c = *eptr++;
3106            switch(ctype)            switch(ctype)
3107              {              {
3108              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3109              break;              break;
3110    
3111              case OP_ANYBYTE:              case OP_ANYBYTE:
3112              break;              break;
3113    
3114                case OP_ANYNL:
3115                switch(c)
3116                  {
3117                  default: RRETURN(MATCH_NOMATCH);
3118                  case 0x000d:
3119                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3120                  break;
3121                  case 0x000a:
3122                  case 0x000b:
3123                  case 0x000c:
3124                  case 0x0085:
3125                  break;
3126                  }
3127                break;
3128    
3129              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3130              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3131              break;              break;
# Line 2774  for (;;) Line 3158  for (;;)
3158        /* Control never gets here */        /* Control never gets here */
3159        }        }
3160    
3161      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3162      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3163      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3164    
# Line 2783  for (;;) Line 3167  for (;;)
3167        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
3168    
3169  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3170        if (prop_type > 0)        if (prop_type >= 0)
3171          {          {
3172          for (i = min; i < max; i++)          switch(prop_type)
3173            {            {
3174            int len = 1;            case PT_ANY:
3175            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
3176            GETCHARLEN(c, eptr, len);              {
3177            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
3178            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
3179              break;              GETCHARLEN(c, eptr, len);
3180            eptr+= len;              if (prop_fail_result) break;
3181                eptr+= len;
3182                }
3183              break;
3184    
3185              case PT_LAMP:
3186              for (i = min; i < max; i++)
3187                {
3188                int len = 1;
3189                if (eptr >= md->end_subject) break;
3190                GETCHARLEN(c, eptr, len);
3191                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3192                if ((prop_chartype == ucp_Lu ||
3193                     prop_chartype == ucp_Ll ||
3194                     prop_chartype == ucp_Lt) == prop_fail_result)
3195                  break;
3196                eptr+= len;
3197                }
3198              break;
3199    
3200              case PT_GC:
3201              for (i = min; i < max; i++)
3202                {
3203                int len = 1;
3204                if (eptr >= md->end_subject) break;
3205                GETCHARLEN(c, eptr, len);
3206                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3207                if ((prop_category == prop_value) == prop_fail_result)
3208                  break;
3209                eptr+= len;
3210                }
3211              break;
3212    
3213              case PT_PC:
3214              for (i = min; i < max; i++)
3215                {
3216                int len = 1;
3217                if (eptr >= md->end_subject) break;
3218                GETCHARLEN(c, eptr, len);
3219                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3220                if ((prop_chartype == prop_value) == prop_fail_result)
3221                  break;
3222                eptr+= len;
3223                }
3224              break;
3225    
3226              case PT_SC:
3227              for (i = min; i < max; i++)
3228                {
3229                int len = 1;
3230                if (eptr >= md->end_subject) break;
3231                GETCHARLEN(c, eptr, len);
3232                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3233                if ((prop_script == prop_value) == prop_fail_result)
3234                  break;
3235                eptr+= len;
3236                }
3237              break;
3238            }            }
3239    
3240          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3241    
3242            if (possessive) continue;
3243          for(;;)          for(;;)
3244            {            {
3245            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2816  for (;;) Line 3258  for (;;)
3258            {            {
3259            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3260            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3261            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3262            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3263            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3264              {              {
# Line 2825  for (;;) Line 3267  for (;;)
3267                {                {
3268                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3269                }                }
3270              prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3271              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3272              eptr += len;              eptr += len;
3273              }              }
# Line 2833  for (;;) Line 3275  for (;;)
3275    
3276          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3277    
3278            if (possessive) continue;
3279          for(;;)          for(;;)
3280            {            {
3281            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2846  for (;;) Line 3289  for (;;)
3289                {                {
3290                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3291                }                }
3292              prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3293              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3294              eptr--;              eptr--;
3295              }              }
# Line 2865  for (;;) Line 3308  for (;;)
3308            {            {
3309            case OP_ANY:            case OP_ANY:
3310    
3311            /* Special code is required for UTF8, but when the maximum is unlimited            /* Special code is required for UTF8, but when the maximum is
3312            we don't need it, so we repeat the non-UTF8 code. This is probably            unlimited we don't need it, so we repeat the non-UTF8 code. This is
3313            worth it, because .* is quite a common idiom. */            probably worth it, because .* is quite a common idiom. */
3314    
3315            if (max < INT_MAX)            if (max < INT_MAX)
3316              {              {
# Line 2875  for (;;) Line 3318  for (;;)
3318                {                {
3319                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3320                  {                  {
3321                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3322                  eptr++;                  eptr++;
3323                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3324                  }                  }
# Line 2884  for (;;) Line 3327  for (;;)
3327                {                {
3328                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3329                  {                  {
3330                    if (eptr >= md->end_subject) break;
3331                  eptr++;                  eptr++;
3332                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3333                  }                  }
# Line 2898  for (;;) Line 3342  for (;;)
3342                {                {
3343                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3344                  {                  {
3345                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3346                  eptr++;                  eptr++;
3347                  }                  }
3348                break;                break;
# Line 2906  for (;;) Line 3350  for (;;)
3350              else              else
3351                {                {
3352                c = max - min;                c = max - min;
3353                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3354                    c = md->end_subject - eptr;
3355                eptr += c;                eptr += c;
3356                }                }
3357              }              }
# Line 2916  for (;;) Line 3361  for (;;)
3361    
3362            case OP_ANYBYTE:            case OP_ANYBYTE:
3363            c = max - min;            c = max - min;
3364            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3365                c = md->end_subject - eptr;
3366            eptr += c;            eptr += c;
3367            break;            break;
3368    
3369              case OP_ANYNL:
3370              for (i = min; i < max; i++)
3371                {
3372                int len = 1;
3373                if (eptr >= md->end_subject) break;
3374                GETCHARLEN(c, eptr, len);
3375                if (c == 0x000d)
3376                  {
3377                  if (++eptr >= md->end_subject) break;
3378                  if (*eptr == 0x000a) eptr++;
3379                  }
3380                else
3381                  {
3382                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3383                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3384                    break;
3385                  eptr += len;
3386                  }
3387                }
3388              break;
3389    
3390            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3391            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3392              {              {
# Line 2992  for (;;) Line 3459  for (;;)
3459    
3460          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3461    
3462            if (possessive) continue;
3463          for(;;)          for(;;)
3464            {            {
3465            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3012  for (;;) Line 3480  for (;;)
3480              {              {
3481              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3482                {                {
3483                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3484                eptr++;                eptr++;
3485                }                }
3486              break;              break;
# Line 3021  for (;;) Line 3489  for (;;)
3489    
3490            case OP_ANYBYTE:            case OP_ANYBYTE:
3491            c = max - min;            c = max - min;
3492            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3493                c = md->end_subject - eptr;
3494            eptr += c;            eptr += c;
3495            break;            break;
3496    
3497              case OP_ANYNL:
3498              for (i = min; i < max; i++)
3499                {
3500                if (eptr >= md->end_subject) break;
3501                c = *eptr;
3502                if (c == 0x000d)
3503                  {
3504                  if (++eptr >= md->end_subject) break;
3505                  if (*eptr == 0x000a) eptr++;
3506                  }
3507                else
3508                  {
3509                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3510                    break;
3511                  eptr++;
3512                  }
3513                }
3514              break;
3515    
3516            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3517            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3518              {              {
# Line 3085  for (;;) Line 3573  for (;;)
3573    
3574          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3575    
3576            if (possessive) continue;
3577          while (eptr >= pp)          while (eptr >= pp)
3578            {            {
3579            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3099  for (;;) Line 3588  for (;;)
3588        }        }
3589      /* Control never gets here */      /* Control never gets here */
3590    
3591      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3592      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3593    
3594      default:      default:
3595      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3596      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3597      }      }
3598    
3599    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3144  Undefine all the macros that were define Line 3631  Undefine all the macros that were define
3631    
3632  #undef cur_is_word  #undef cur_is_word
3633  #undef condition  #undef condition
 #undef minimize  
3634  #undef prev_is_word  #undef prev_is_word
3635    
3636  #undef original_ims  #undef original_ims
# Line 3200  Returns:          > 0 => success; value Line 3686  Returns:          > 0 => success; value
3686                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3687  */  */
3688    
3689  EXPORT int  PCRE_DATA_SCOPE int
3690  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3691    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3692    int offsetcount)    int offsetcount)
3693  {  {
3694  int rc, resetcount, ocount;  int rc, resetcount, ocount;
3695  int first_byte = -1;  int first_byte = -1;
3696  int req_byte = -1;  int req_byte = -1;
3697  int req_byte2 = -1;  int req_byte2 = -1;
3698  unsigned long int ims = 0;  int newline;
3699    unsigned long int ims;
3700  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
3701  BOOL anchored;  BOOL anchored;
3702  BOOL startline;  BOOL startline;
3703  BOOL firstline;  BOOL firstline;
3704  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3705  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3706    BOOL utf8;
3707  match_data match_block;  match_data match_block;
3708    match_data *md = &match_block;
3709  const uschar *tables;  const uschar *tables;
3710  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3711  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3712  const uschar *end_subject;  USPTR end_subject;
3713  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3714    eptrblock eptrchain[EPTR_WORK_SIZE];
3715    
3716  pcre_study_data internal_study;  pcre_study_data internal_study;
3717  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3241  if (offsetcount < 0) return PCRE_ERROR_B Line 3731  if (offsetcount < 0) return PCRE_ERROR_B
3731  the default values. */  the default values. */
3732    
3733  study = NULL;  study = NULL;
3734  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
3735  match_block.callout_data = NULL;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3736    md->callout_data = NULL;
3737    
3738  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
3739    
# Line 3254  if (extra_data != NULL) Line 3745  if (extra_data != NULL)
3745    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3746      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3747    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3748      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
3749      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3750        md->match_limit_recursion = extra_data->match_limit_recursion;
3751    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3752      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3753    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3754    }    }
3755    
# Line 3286  firstline = (re->options & PCRE_FIRSTLIN Line 3779  firstline = (re->options & PCRE_FIRSTLIN
3779    
3780  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
3781    
3782  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
3783    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3784    
3785  match_block.start_subject = (const uschar *)subject;  md->start_subject = (USPTR)subject;
3786  match_block.start_offset = start_offset;  md->start_offset = start_offset;
3787  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
3788  end_subject = match_block.end_subject;  end_subject = md->end_subject;
3789    
3790  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3791  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3792    
3793  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3794  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
3795  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
3796  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
3797  match_block.hitend = FALSE;  md->hitend = FALSE;
3798    
3799    md->recursive = NULL;                   /* No recursion at top level */
3800    md->eptrchain = eptrchain;              /* Make workspace generally available */
3801    
3802  match_block.recursive = NULL;                   /* No recursion at top level */  md->lcc = tables + lcc_offset;
3803    md->ctypes = tables + ctypes_offset;
3804    
3805    /* Handle different types of newline. The three bits give eight cases. If
3806    nothing is set at run time, whatever was used at compile time applies. */
3807    
3808    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3809           PCRE_NEWLINE_BITS)
3810      {
3811      case 0: newline = NEWLINE; break;   /* Compile-time default */
3812      case PCRE_NEWLINE_CR: newline = '\r'; break;
3813      case PCRE_NEWLINE_LF: newline = '\n'; break;
3814      case PCRE_NEWLINE_CR+
3815           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3816      case PCRE_NEWLINE_ANY: newline = -1; break;
3817      default: return PCRE_ERROR_BADNEWLINE;
3818      }
3819    
3820  match_block.lcc = tables + lcc_offset;  if (newline < 0)
3821  match_block.ctypes = tables + ctypes_offset;    {
3822      md->nltype = NLTYPE_ANY;
3823      }
3824    else
3825      {
3826      md->nltype = NLTYPE_FIXED;
3827      if (newline > 255)
3828        {
3829        md->nllen = 2;
3830        md->nl[0] = (newline >> 8) & 255;
3831        md->nl[1] = newline & 255;
3832        }
3833      else
3834        {
3835        md->nllen = 1;
3836        md->nl[0] = newline;
3837        }
3838      }
3839    
3840  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
3841  moment. */  moment. */
3842    
3843  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3844    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
3845    
3846  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3847  back the character offset. */  back the character offset. */
3848    
3849  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3850  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3851    {    {
3852    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3853      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3350  ocount = offsetcount - (offsetcount % 3) Line 3879  ocount = offsetcount - (offsetcount % 3)
3879  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
3880    {    {
3881    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
3882    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3883    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3884    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3885    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3886    }    }
3887  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
3888    
3889  match_block.offset_end = ocount;  md->offset_end = ocount;
3890  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
3891  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
3892  match_block.capture_last = -1;  md->capture_last = -1;
3893    
3894  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
3895  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3373  if (resetcount > offsetcount) resetcount Line 3902  if (resetcount > offsetcount) resetcount
3902  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
3903  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
3904    
3905  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
3906    {    {
3907    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
3908    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
3909    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
3910    }    }
# Line 3392  if (!anchored) Line 3921  if (!anchored)
3921      {      {
3922      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
3923      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3924        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
3925      }      }
3926    else    else
3927      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3410  if ((re->options & PCRE_REQCHSET) != 0) Line 3939  if ((re->options & PCRE_REQCHSET) != 0)
3939    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
3940    }    }
3941    
3942    
3943    /* ==========================================================================*/
3944    
3945  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3946  the loop runs just once. */  the loop runs just once. */
3947    
3948  do  for(;;)
3949    {    {
3950    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
3951    
3952    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
3953    
3954    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
3955      {      {
3956      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
3957      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
3958      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
3959      }      }
3960    
3961    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
3962    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
3963    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
3964    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
3965    */    the match fails at the newline, later code breaks this loop. */
3966    
3967    if (firstline)    if (firstline)
3968      {      {
3969      const uschar *t = start_match;      USPTR t = start_match;
3970      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3971      end_subject = t;      end_subject = t;
3972      }      }
3973    
# Line 3445  do Line 3977  do
3977      {      {
3978      if (first_byte_caseless)      if (first_byte_caseless)
3979        while (start_match < end_subject &&        while (start_match < end_subject &&
3980               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
3981          start_match++;          start_match++;
3982      else      else
3983        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
3984          start_match++;          start_match++;
3985      }      }
3986    
3987    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
3988    
3989    else if (startline)    else if (startline)
3990      {      {
3991      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
3992        {        {
3993        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
3994          start_match++;          start_match++;
3995        }        }
3996      }      }
# Line 3480  do Line 4012  do
4012    
4013  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4014    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4015    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4016    printf("\n");    printf("\n");
4017  #endif  #endif
4018    
# Line 3494  do Line 4026  do
4026    
4027    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4028    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4029    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4030    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4031    
4032    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4033    */    */
4034    
4035    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4036        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4037        !match_block.partial)        !md->partial)
4038      {      {
4039      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4040    
4041      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
4042      place we found it at last time. */      place we found it at last time. */
# Line 3527  do Line 4059  do
4059            }            }
4060          }          }
4061    
4062        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4063          forcing a match failure. */
4064    
4065        if (p >= end_subject) break;        if (p >= end_subject)
4066            {
4067            rc = MATCH_NOMATCH;
4068            break;
4069            }
4070    
4071        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4072        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3539  do Line 4076  do
4076        }        }
4077      }      }
4078    
4079    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4080    
4081    if (rc == MATCH_NOMATCH)    md->start_match = start_match;
4082      {    md->match_call_count = 0;
4083      if (firstline && *start_match == NEWLINE) break;    md->eptrn = 0;                          /* Next free eptrchain slot */
4084      start_match++;    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4085    
4086      /* Any return other than MATCH_NOMATCH breaks the loop. */
4087    
4088      if (rc != MATCH_NOMATCH) break;
4089    
4090      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4091      newline in the subject (though it may continue over the newline). Therefore,
4092      if we have just failed to match, starting at a newline, do not continue. */
4093    
4094      if (firstline && IS_NEWLINE(start_match)) break;
4095    
4096      /* Advance the match position by one character. */
4097    
4098      start_match++;
4099  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4100      if (match_block.utf8)    if (utf8)
4101        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4102          start_match++;        start_match++;
4103  #endif  #endif
     continue;  
     }  
4104    
4105    if (rc != MATCH_MATCH)    /* Break the loop if the pattern is anchored or if we have passed the end of
4106      {    the subject. */
4107      DPRINTF((">>>> error: returning %d\n", rc));  
4108      return rc;    if (anchored || start_match > end_subject) break;
     }  
4109    
4110    /* We have a match! Copy the offset information from temporary store if    /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4111    necessary */    are now at a LF, advance the match position by one more character. */
4112    
4113      if (start_match[-1] == '\r' &&
4114           (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4115           start_match < end_subject &&
4116           *start_match == '\n')
4117        start_match++;
4118    
4119      }   /* End of for(;;) "bumpalong" loop */
4120    
4121    /* ==========================================================================*/
4122    
4123    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4124    conditions is true:
4125    
4126    (1) The pattern is anchored;
4127    
4128    (2) We are past the end of the subject;
4129    
4130    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4131        this option requests that a match occur at or before the first newline in
4132        the subject.
4133    
4134    When we have a match and the offset vector is big enough to deal with any
4135    backreferences, captured substring offsets will already be set up. In the case
4136    where we had to get some local store to hold offsets for backreference
4137    processing, copy those that we can. In this case there need not be overflow if
4138    certain parts of the pattern were not used, even though there are more
4139    capturing parentheses than vector slots. */
4140    
4141    if (rc == MATCH_MATCH)
4142      {
4143    if (using_temporary_offsets)    if (using_temporary_offsets)
4144      {      {
4145      if (offsetcount >= 4)      if (offsetcount >= 4)
4146        {        {
4147        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4148          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4149        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4150        }        }
4151      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4152      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4153      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4154      }      }
4155    
4156    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4157      too many to fit into the vector. */
4158    
4159      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4160    
4161      /* If there is space, set up the whole thing as substring 0. */
4162    
4163    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4164      {      {
4165      offsets[0] = start_match - match_block.start_subject;      offsets[0] = start_match - md->start_subject;
4166      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4167      }      }
4168    
4169    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4170    return rc;    return rc;
4171    }    }
4172    
4173  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4174    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4175    
4176  if (using_temporary_offsets)  if (using_temporary_offsets)
4177    {    {
4178    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4179    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4180    }    }
4181    
4182  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4183      {
4184      DPRINTF((">>>> error: returning %d\n", rc));
4185      return rc;
4186      }
4187    else if (md->partial && md->hitend)
4188    {    {
4189    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4190    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.77  
changed lines
  Added in v.97

  ViewVC Help
Powered by ViewVC 1.1.5