/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 197 by ph10, Tue Jul 31 10:50:18 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* Undefine some potentially clashing cpp symbols */
52    
53  /* Structure for building a chain of data that actually lives on the  #undef min
54  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
55    
56  /* Flag bits for the match() function */  /* Flag bits for the match() function */
57    
58  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
59  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
60    
61  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
62  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 97  Returns:     nothing
97  static void  static void
98  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
99  {  {
100  int c;  unsigned int c;
101  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
102  while (length-- > 0)  while (length-- > 0)
103    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 186  calls by keeping local variables that ne Line 182  calls by keeping local variables that ne
182  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
183  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
184  always used to.  always used to.
185    
186    The original heap-recursive code used longjmp(). However, it seems that this
187    can be very slow on some operating systems. Following a suggestion from Stan
188    Switzer, the use of longjmp() has been abolished, at the cost of having to
189    provide a unique number for each call to RMATCH. There is no way of generating
190    a sequence of numbers at compile time in C. I have given them names, to make
191    them stand out more clearly.
192    
193    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
194    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
195    tests. Furthermore, not using longjmp() means that local dynamic variables
196    don't have indeterminate values; this has meant that the frame size can be
197    reduced because the result can be "passed back" by straight setting of the
198    variable instead of being passed in the frame.
199  ****************************************************************************  ****************************************************************************
200  ***************************************************************************/  ***************************************************************************/
201    
202    
203    /* Numbers for RMATCH calls */
204    
205    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
206           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
207           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
208           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
209           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50 };
210    
211    
212  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
213  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
214    actuall used in this definition. */
215    
216  #ifndef NO_RECURSE  #ifndef NO_RECURSE
217  #define REGISTER register  #define REGISTER register
218    
219  #ifdef DEBUG  #ifdef DEBUG
220  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
221    { \    { \
222    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
223    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
224    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
225    }    }
226  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 229  versions and production versions. */
229    return ra; \    return ra; \
230    }    }
231  #else  #else
232  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
233    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
234  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
235  #endif  #endif
236    
237  #else  #else
238    
239    
240  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
241  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
242  match(), which never changes. */  argument of match(), which never changes. */
243    
244  #define REGISTER  #define REGISTER
245    
246  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
247    {\    {\
248    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
249    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
250      {\    newframe->Xeptr = ra;\
251      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
252      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
253      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
254      newframe->Xims = re;\    newframe->Xims = re;\
255      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
256      newframe->Xflags = rg;\    newframe->Xflags = rg;\
257      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
258      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
259      frame = newframe;\    frame = newframe;\
260      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
261      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
262      }\    L_##rw:\
263    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
264    }    }
265    
266  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 254  match(), which never changes. */ Line 270  match(), which never changes. */
270    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
271    if (frame != NULL)\    if (frame != NULL)\
272      {\      {\
273      frame->Xresult = ra;\      rrc = ra;\
274      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
275      }\      }\
276    return ra;\    return ra;\
277    }    }
# Line 271  typedef struct heapframe { Line 286  typedef struct heapframe {
286    
287    const uschar *Xeptr;    const uschar *Xeptr;
288    const uschar *Xecode;    const uschar *Xecode;
289      const uschar *Xmstart;
290    int Xoffset_top;    int Xoffset_top;
291    long int Xims;    long int Xims;
292    eptrblock *Xeptrb;    eptrblock *Xeptrb;
293    int Xflags;    int Xflags;
294    int Xrdepth;    unsigned int Xrdepth;
295    
296    /* Function local variables */    /* Function local variables */
297    
# Line 291  typedef struct heapframe { Line 307  typedef struct heapframe {
307    
308    BOOL Xcur_is_word;    BOOL Xcur_is_word;
309    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
310    BOOL Xprev_is_word;    BOOL Xprev_is_word;
311    
312    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 318  typedef struct heapframe {
318    int Xprop_category;    int Xprop_category;
319    int Xprop_chartype;    int Xprop_chartype;
320    int Xprop_script;    int Xprop_script;
321    int *Xprop_test_variable;    int Xoclength;
322      uschar Xocchars[8];
323  #endif  #endif
324    
325    int Xctype;    int Xctype;
326    int Xfc;    unsigned int Xfc;
327    int Xfi;    int Xfi;
328    int Xlength;    int Xlength;
329    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 337  typedef struct heapframe {
337    
338    eptrblock Xnewptrb;    eptrblock Xnewptrb;
339    
340    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
341    
342    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
343    
344  } heapframe;  } heapframe;
345    
# Line 340  typedef struct heapframe { Line 355  typedef struct heapframe {
355  *         Match from current position            *  *         Match from current position            *
356  *************************************************/  *************************************************/
357    
358  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
359  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
360  same response.  same response.
361    
# Line 353  performance. Tests using gcc on a SPARC Line 365  performance. Tests using gcc on a SPARC
365  made performance worse.  made performance worse.
366    
367  Arguments:  Arguments:
368     eptr        pointer in subject     eptr        pointer to current character in subject
369     ecode       position in code     ecode       pointer to current position in compiled code
370       mstart      pointer to the current match start position (can be modified
371                     by encountering \K)
372     offset_top  current top pointer     offset_top  current top pointer
373     md          pointer to "static" info for the match     md          pointer to "static" info for the match
374     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 376  Arguments:
376                   brackets - for testing for empty matches                   brackets - for testing for empty matches
377     flags       can contain     flags       can contain
378                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
379                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
380                       group that can match an empty string
381     rdepth      the recursion depth     rdepth      the recursion depth
382    
383  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 372  Returns:       MATCH_MATCH if matched Line 387  Returns:       MATCH_MATCH if matched
387  */  */
388    
389  static int  static int
390  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
391    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
392    int flags, int rdepth)    int flags, unsigned int rdepth)
393  {  {
394  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
395  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
396  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
397    
398    register int  rrc;         /* Returns from recursive calls */
399    register int  i;           /* Used for loops not involving calls to RMATCH() */
400    register unsigned int c;   /* Character values not kept over RMATCH() calls */
401    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
402    
403  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
 register int  i;      /* Used for loops not involving calls to RMATCH() */  
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
404    
405  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
406  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 398  frame->Xprevframe = NULL;            /* Line 415  frame->Xprevframe = NULL;            /*
415    
416  frame->Xeptr = eptr;  frame->Xeptr = eptr;
417  frame->Xecode = ecode;  frame->Xecode = ecode;
418    frame->Xmstart = mstart;
419  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
420  frame->Xims = ims;  frame->Xims = ims;
421  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 430  HEAP_RECURSE:
430    
431  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
432  #define ecode              frame->Xecode  #define ecode              frame->Xecode
433    #define mstart             frame->Xmstart
434  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
435  #define ims                frame->Xims  #define ims                frame->Xims
436  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 434  HEAP_RECURSE: Line 453  HEAP_RECURSE:
453    
454  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
455  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
456  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
457    
458  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 464  HEAP_RECURSE:
464  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
465  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
466  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
467  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
468    #define occhars            frame->Xocchars
469  #endif  #endif
470    
471  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 489  HEAP_RECURSE:
489  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
490  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
491    
492  #else  #else         /* NO_RECURSE not defined */
493  #define fi i  #define fi i
494  #define fc c  #define fc c
495    
# Line 489  recursion_info new_recursive;      /* wi Line 508  recursion_info new_recursive;      /* wi
508                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
509  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
510  BOOL condition;  BOOL condition;
 BOOL minimize;  
511  BOOL prev_is_word;  BOOL prev_is_word;
512    
513  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 519  int prop_fail_result;
519  int prop_category;  int prop_category;
520  int prop_chartype;  int prop_chartype;
521  int prop_script;  int prop_script;
522  int *prop_test_variable;  int oclength;
523    uschar occhars[8];
524  #endif  #endif
525    
526  int ctype;  int ctype;
# Line 516  int save_offset1, save_offset2, save_off Line 535  int save_offset1, save_offset2, save_off
535  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
536    
537  eptrblock newptrb;  eptrblock newptrb;
538  #endif  #endif     /* NO_RECURSE */
539    
540  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
541  variables. */  variables. */
# Line 524  variables. */ Line 543  variables. */
543  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
544  prop_value = 0;  prop_value = 0;
545  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
546  #endif  #endif
547    
548    
549    /* This label is used for tail recursion, which is used in a few cases even
550    when NO_RECURSE is not defined, in order to reduce the amount of stack that is
551    used. Thanks to Ian Taylor for noticing this possibility and sending the
552    original patch. */
553    
554    TAIL_RECURSE:
555    
556  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
557  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
558  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
# Line 535  defined). However, RMATCH isn't like a f Line 561  defined). However, RMATCH isn't like a f
561  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
562  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
563    
564    #ifdef SUPPORT_UTF8
565    utf8 = md->utf8;       /* Local copy of the flag */
566    #else
567    utf8 = FALSE;
568    #endif
569    
570  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
571  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
572    
# Line 542  if (md->match_call_count++ >= md->match_ Line 574  if (md->match_call_count++ >= md->match_
574  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
575    
576  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
577    
578  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
579  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
580  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
581  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
582    When match() is called in other circumstances, don't add to the chain. The
583    match_cbegroup flag must NOT be used with tail recursion, because the memory
584    block that is used is on the stack, so a new one may be required for each
585    match(). */
586    
587  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
588    {    {
   newptrb.epb_prev = eptrb;  
589    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
590      newptrb.epb_prev = eptrb;
591    eptrb = &newptrb;    eptrb = &newptrb;
592    }    }
593    
594  /* Now start processing the operations. */  /* Now start processing the opcodes. */
595    
596  for (;;)  for (;;)
597    {    {
598      minimize = possessive = FALSE;
599    op = *ecode;    op = *ecode;
   minimize = FALSE;  
600    
601    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
602    matching at least one subject character. */    matching at least one subject character. */
603    
604    if (md->partial &&    if (md->partial &&
605        eptr >= md->end_subject &&        eptr >= md->end_subject &&
606        eptr > md->start_match)        eptr > mstart)
607      md->hitend = TRUE;      md->hitend = TRUE;
608    
609    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
610      {      {
611      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
612        the current subject position in the working slot at the top of the vector.
613      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
614      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
615        reference inside the group.
616      if (number > EXTRACT_BASIC_MAX)  
617        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
618        values of the final offsets, in case they were set by a previous iteration
619        of the same bracket.
620    
621        If there isn't enough space in the offset vector, treat this as if it were
622        a non-capturing bracket. Don't worry about setting the flag for the error
623        case here; that is handled in the code for KET. */
624    
625        case OP_CBRA:
626        case OP_SCBRA:
627        number = GET2(ecode, 1+LINK_SIZE);
628      offset = number << 1;      offset = number << 1;
629    
630  #ifdef DEBUG  #ifdef DEBUG
631      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
632        printf("subject=");
633      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
634      printf("\n");      printf("\n");
635  #endif  #endif
# Line 612  for (;;) Line 644  for (;;)
644        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
645        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
646    
647          flags = (op == OP_SCBRA)? match_cbegroup : 0;
648        do        do
649          {          {
650          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
651            match_isgroup);            ims, eptrb, flags, RM1);
652          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
653          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
654          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 631  for (;;) Line 664  for (;;)
664        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
665        }        }
666    
667      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
668        as a non-capturing bracket. */
669    
670      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
671      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
672    
673    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
674    
675    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
676      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
677      case OP_BRA:     /* Non-capturing bracket: optimized */  
678      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
679      do      final alternative within the brackets, we would return the result of a
680        recursive call to match() whatever happened. We can reduce stack usage by
681        turning this into a tail recursion, except in the case when match_cbegroup
682        is set.*/
683    
684        case OP_BRA:
685        case OP_SBRA:
686        DPRINTF(("start non-capturing bracket\n"));
687        flags = (op >= OP_SBRA)? match_cbegroup : 0;
688        for (;;)
689        {        {
690        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
691          match_isgroup);          {
692            if (flags == 0)    /* Not a possibly empty group */
693              {
694              ecode += _pcre_OP_lengths[*ecode];
695              DPRINTF(("bracket 0 tail recursion\n"));
696              goto TAIL_RECURSE;
697              }
698    
699            /* Possibly empty group; can't use tail recursion. */
700    
701            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
702              eptrb, flags, RM48);
703            RRETURN(rrc);
704            }
705    
706          /* For non-final alternatives, continue the loop for a NOMATCH result;
707          otherwise return. */
708    
709          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
710            eptrb, flags, RM2);
711        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
712        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
713        }        }
714      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
715    
716      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
717      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
718      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
719      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
720        obeyed, we can use tail recursion to avoid using another stack frame. */
721    
722      case OP_COND:      case OP_COND:
723      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
724        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
725          {
726          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
727          condition = md->recursive != NULL &&
728            (offset == RREF_ANY || offset == md->recursive->group_num);
729          ecode += condition? 3 : GET(ecode, 1);
730          }
731    
732        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
733        {        {
734        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
735        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
736          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
737          (offset < offset_top && md->offset_vector[offset] >= 0);        }
738        RMATCH(rrc, eptr, ecode + (condition?  
739          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
740          offset_top, md, ims, eptrb, match_isgroup);        {
741        RRETURN(rrc);        condition = FALSE;
742          ecode += GET(ecode, 1);
743        }        }
744    
745      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
746      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
747        assertion. */
748    
749      else      else
750        {        {
751        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
752            match_condassert | match_isgroup);            match_condassert, RM3);
753        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
754          {          {
755          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
756            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
757          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
758          }          }
759        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
760          {          {
761          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
762          }          }
763        else ecode += GET(ecode, 1);        else
764        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
765          match_isgroup);          condition = FALSE;
766        RRETURN(rrc);          ecode += GET(ecode, 1);
767            }
768        }        }
     /* Control never reaches here */  
769    
770      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
771      encountered. */      we can use tail recursion to avoid using another stack frame, except when
772        match_cbegroup is required for an unlimited repeat of a possibly empty
773        group. If the second alternative doesn't exist, we can just plough on. */
774    
775      case OP_CREF:      if (condition || *ecode == OP_ALT)
776      case OP_BRANUMBER:        {
777      ecode += 3;        ecode += 1 + LINK_SIZE;
778          if (op == OP_SCOND)        /* Possibly empty group */
779            {
780            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
781            RRETURN(rrc);
782            }
783          else                       /* Group must match something */
784            {
785            flags = 0;
786            goto TAIL_RECURSE;
787            }
788          }
789        else                         /* Condition false & no 2nd alternative */
790          {
791          ecode += 1 + LINK_SIZE;
792          }
793      break;      break;
794    
795      /* End of the pattern. If we are in a recursion, we should restore the  
796      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
797        restore the offsets appropriately and continue from after the call. */
798    
799      case OP_END:      case OP_END:
800      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
# Line 713  for (;;) Line 804  for (;;)
804        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
805        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
806          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
807        md->start_match = rec->save_start;        mstart = rec->save_start;
808        ims = original_ims;        ims = original_ims;
809        ecode = rec->after_call;        ecode = rec->after_call;
810        break;        break;
# Line 722  for (;;) Line 813  for (;;)
813      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
814      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
815    
816      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
817      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
818      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
819        md->start_match_ptr = mstart;  /* and the start (\K can modify) */
820      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
821    
822      /* Change option settings */      /* Change option settings */
# Line 745  for (;;) Line 837  for (;;)
837      case OP_ASSERTBACK:      case OP_ASSERTBACK:
838      do      do
839        {        {
840        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
841          match_isgroup);          RM4);
842        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
843        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
844        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 772  for (;;) Line 864  for (;;)
864      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
865      do      do
866        {        {
867        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
868          match_isgroup);          RM5);
869        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
870        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
871        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 794  for (;;) Line 886  for (;;)
886  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
887      if (utf8)      if (utf8)
888        {        {
889        c = GET(ecode,1);        i = GET(ecode, 1);
890        for (i = 0; i < c; i++)        while (i-- > 0)
891          {          {
892          eptr--;          eptr--;
893          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 808  for (;;) Line 900  for (;;)
900      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
901    
902        {        {
903        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
904        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
905        }        }
906    
# Line 830  for (;;) Line 922  for (;;)
922        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
923        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
924        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
925        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
926        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
927        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
928        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 865  for (;;) Line 957  for (;;)
957      case OP_RECURSE:      case OP_RECURSE:
958        {        {
959        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
960        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
961            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
962    
963        /* Add to "recursing stack" */        /* Add to "recursing stack" */
964    
# Line 897  for (;;) Line 984  for (;;)
984    
985        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
986              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
987        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
988        md->start_match = eptr;        mstart = eptr;
989    
990        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
991        restore the offset and recursion data. */        restore the offset and recursion data. */
992    
993        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
994          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
995        do        do
996          {          {
997          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
998              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
999          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1000            {            {
1001            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 945  for (;;) Line 1033  for (;;)
1033      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1034    
1035      case OP_ONCE:      case OP_ONCE:
1036        {      prev = ecode;
1037        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1038    
1039        do      do
1040          {        {
1041          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1042            eptrb, match_isgroup);        if (rrc == MATCH_MATCH) break;
1043          if (rrc == MATCH_MATCH) break;        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1044          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += GET(ecode,1);
1045          ecode += GET(ecode,1);        }
1046          }      while (*ecode == OP_ALT);
       while (*ecode == OP_ALT);  
1047    
1048        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1049    
1050        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1051    
1052        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1053        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1054    
1055        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1056    
1057        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1058        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1059    
1060        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1061        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1062        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1063        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1064        course of events. */      course of events. */
1065    
1066        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1067          {        {
1068          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1069          break;        break;
1070          }        }
1071    
1072        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1073        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1074        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1075        opcode. */      any options that changed within the bracket before re-running it, so
1076        check the next opcode. */
1077    
1078        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1079          {        {
1080          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1081          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1082          }        }
1083    
1084        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1085          {        {
1086          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1087          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1088          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1089          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1090          }        goto TAIL_RECURSE;
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1091        }        }
1092      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1093          {
1094          RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1095          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1096          ecode += 1 + LINK_SIZE;
1097          flags = 0;
1098          goto TAIL_RECURSE;
1099          }
1100        /* Control never gets here */
1101    
1102      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1103      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1027  for (;;) Line 1115  for (;;)
1115      case OP_BRAZERO:      case OP_BRAZERO:
1116        {        {
1117        next = ecode+1;        next = ecode+1;
1118        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1119        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1120        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1121        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1122        }        }
1123      break;      break;
1124    
1125      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1126        {        {
1127        next = ecode+1;        next = ecode+1;
1128        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1129        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1130        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1131        ecode++;        ecode++;
1132        }        }
1133      break;      break;
1134    
1135      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1136    
1137      case OP_KET:      case OP_KET:
1138      case OP_KETRMIN:      case OP_KETRMIN:
1139      case OP_KETRMAX:      case OP_KETRMAX:
1140        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
   
       /* Back up the stack of bracket start pointers. */  
1141    
1142        eptrb = eptrb->epb_prev;      /* If this was a group that remembered the subject start, in order to break
1143        infinite repeats of empty string matches, retrieve the subject start from
1144        the chain. Otherwise, set it NULL. */
1145    
1146        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev >= OP_SBRA)
1147            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        {
1148            *prev == OP_ONCE)        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1149          {        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1150          md->end_match_ptr = eptr;      /* For ONCE */        }
1151          md->end_offset_top = offset_top;      else saved_eptr = NULL;
         RRETURN(MATCH_MATCH);  
         }  
1152    
1153        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1154        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1155        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1156    
1157        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1158          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1159          number = *prev - OP_BRA;          *prev == OP_ONCE)
1160          {
1161          md->end_match_ptr = eptr;      /* For ONCE */
1162          md->end_offset_top = offset_top;
1163          RRETURN(MATCH_MATCH);
1164          }
1165    
1166          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1167          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1168        bumping the high water mark. Note that whole-pattern recursion is coded as
1169        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1170        when the OP_END is reached. Other recursion is handled here. */
1171    
1172          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1173          offset = number << 1;        {
1174          number = GET2(prev, 1+LINK_SIZE);
1175          offset = number << 1;
1176    
1177  #ifdef DEBUG  #ifdef DEBUG
1178          printf("end bracket %d", number);        printf("end bracket %d", number);
1179          printf("\n");        printf("\n");
1180  #endif  #endif
1181    
1182          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1183          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1184          into group 0, so it won't be picked up here. Instead, we catch it when          {
1185          the OP_END is reached. */          md->offset_vector[offset] =
1186              md->offset_vector[md->offset_end - number];
1187          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1188            {          if (offset_top <= offset) offset_top = offset + 2;
1189            md->capture_last = number;          }
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
1190    
1191            /* Handle a recursively called group. Restore the offsets        /* Handle a recursively called group. Restore the offsets
1192            appropriately and continue from after the call. */        appropriately and continue from after the call. */
1193    
1194            if (md->recursive != NULL && md->recursive->group_num == number)        if (md->recursive != NULL && md->recursive->group_num == number)
1195              {          {
1196              recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1197              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1198              md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
1199              md->start_match = rec->save_start;          mstart = rec->save_start;
1200              memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1201                rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1202              ecode = rec->after_call;          ecode = rec->after_call;
1203              ims = original_ims;          ims = original_ims;
1204              break;          break;
             }  
           }  
1205          }          }
1206          }
1207    
1208        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1209        the group. */      flags, in case they got changed during the group. */
1210    
1211        ims = original_ims;      ims = original_ims;
1212        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1213    
1214        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1215        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1216        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1217        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1218        course of events. */      course of events. */
1219    
1220        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1221          {        {
1222          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1223          break;        break;
1224          }        }
1225    
1226        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1227        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1228        tail recursion to avoid using another stack frame, unless we have an
1229        unlimited repeat of a group that can match an empty string. */
1230    
1231        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1232          {  
1233          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);      if (*ecode == OP_KETRMIN)
1234          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1235          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1236          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1237          }        if (flags != 0)    /* Could match an empty string */
       else  /* OP_KETRMAX */  
1238          {          {
1239          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1240          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RRETURN(rrc);
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1241          }          }
1242          ecode = prev;
1243          goto TAIL_RECURSE;
1244        }        }
1245        else  /* OP_KETRMAX */
1246      RRETURN(MATCH_NOMATCH);        {
1247          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1248          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1249          ecode += 1 + LINK_SIZE;
1250          flags = 0;
1251          goto TAIL_RECURSE;
1252          }
1253        /* Control never gets here */
1254    
1255      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1256    
# Line 1168  for (;;) Line 1258  for (;;)
1258      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1259      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1260        {        {
1261        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1262              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1263          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1264        ecode++;        ecode++;
1265        break;        break;
# Line 1189  for (;;) Line 1280  for (;;)
1280      ecode++;      ecode++;
1281      break;      break;
1282    
1283        /* Reset the start of match point */
1284    
1285        case OP_SET_SOM:
1286        mstart = eptr;
1287        ecode++;
1288        break;
1289    
1290      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1291      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1292    
# Line 1196  for (;;) Line 1294  for (;;)
1294      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1295        {        {
1296        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1297          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1298        else        else
1299          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1300        ecode++;        ecode++;
# Line 1207  for (;;) Line 1305  for (;;)
1305        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1306        if (!md->endonly)        if (!md->endonly)
1307          {          {
1308          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1309             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1310            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1311          ecode++;          ecode++;
1312          break;          break;
1313          }          }
1314        }        }
1315      /* ... else fall through */      /* ... else fall through for endonly */
1316    
1317      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1318    
# Line 1226  for (;;) Line 1324  for (;;)
1324      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1325    
1326      case OP_EODN:      case OP_EODN:
1327      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1328         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1329          RRETURN(MATCH_NOMATCH);
1330      ecode++;      ecode++;
1331      break;      break;
1332    
# Line 1280  for (;;) Line 1379  for (;;)
1379      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1380    
1381      case OP_ANY:      case OP_ANY:
1382      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1383        RRETURN(MATCH_NOMATCH);        {
1384          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1385          }
1386      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1387      if (utf8)      if (utf8)
1388        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1389      ecode++;      ecode++;
1390      break;      break;
1391    
# Line 1376  for (;;) Line 1475  for (;;)
1475      ecode++;      ecode++;
1476      break;      break;
1477    
1478        case OP_ANYNL:
1479        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1480        GETCHARINCTEST(c, eptr);
1481        switch(c)
1482          {
1483          default: RRETURN(MATCH_NOMATCH);
1484          case 0x000d:
1485          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1486          break;
1487          case 0x000a:
1488          case 0x000b:
1489          case 0x000c:
1490          case 0x0085:
1491          case 0x2028:
1492          case 0x2029:
1493          break;
1494          }
1495        ecode++;
1496        break;
1497    
1498        case OP_NOT_HSPACE:
1499        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1500        GETCHARINCTEST(c, eptr);
1501        switch(c)
1502          {
1503          default: break;
1504          case 0x09:      /* HT */
1505          case 0x20:      /* SPACE */
1506          case 0xa0:      /* NBSP */
1507          case 0x1680:    /* OGHAM SPACE MARK */
1508          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1509          case 0x2000:    /* EN QUAD */
1510          case 0x2001:    /* EM QUAD */
1511          case 0x2002:    /* EN SPACE */
1512          case 0x2003:    /* EM SPACE */
1513          case 0x2004:    /* THREE-PER-EM SPACE */
1514          case 0x2005:    /* FOUR-PER-EM SPACE */
1515          case 0x2006:    /* SIX-PER-EM SPACE */
1516          case 0x2007:    /* FIGURE SPACE */
1517          case 0x2008:    /* PUNCTUATION SPACE */
1518          case 0x2009:    /* THIN SPACE */
1519          case 0x200A:    /* HAIR SPACE */
1520          case 0x202f:    /* NARROW NO-BREAK SPACE */
1521          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1522          case 0x3000:    /* IDEOGRAPHIC SPACE */
1523          RRETURN(MATCH_NOMATCH);
1524          }
1525        ecode++;
1526        break;
1527    
1528        case OP_HSPACE:
1529        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1530        GETCHARINCTEST(c, eptr);
1531        switch(c)
1532          {
1533          default: RRETURN(MATCH_NOMATCH);
1534          case 0x09:      /* HT */
1535          case 0x20:      /* SPACE */
1536          case 0xa0:      /* NBSP */
1537          case 0x1680:    /* OGHAM SPACE MARK */
1538          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1539          case 0x2000:    /* EN QUAD */
1540          case 0x2001:    /* EM QUAD */
1541          case 0x2002:    /* EN SPACE */
1542          case 0x2003:    /* EM SPACE */
1543          case 0x2004:    /* THREE-PER-EM SPACE */
1544          case 0x2005:    /* FOUR-PER-EM SPACE */
1545          case 0x2006:    /* SIX-PER-EM SPACE */
1546          case 0x2007:    /* FIGURE SPACE */
1547          case 0x2008:    /* PUNCTUATION SPACE */
1548          case 0x2009:    /* THIN SPACE */
1549          case 0x200A:    /* HAIR SPACE */
1550          case 0x202f:    /* NARROW NO-BREAK SPACE */
1551          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1552          case 0x3000:    /* IDEOGRAPHIC SPACE */
1553          break;
1554          }
1555        ecode++;
1556        break;
1557    
1558        case OP_NOT_VSPACE:
1559        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1560        GETCHARINCTEST(c, eptr);
1561        switch(c)
1562          {
1563          default: break;
1564          case 0x0a:      /* LF */
1565          case 0x0b:      /* VT */
1566          case 0x0c:      /* FF */
1567          case 0x0d:      /* CR */
1568          case 0x85:      /* NEL */
1569          case 0x2028:    /* LINE SEPARATOR */
1570          case 0x2029:    /* PARAGRAPH SEPARATOR */
1571          RRETURN(MATCH_NOMATCH);
1572          }
1573        ecode++;
1574        break;
1575    
1576        case OP_VSPACE:
1577        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1578        GETCHARINCTEST(c, eptr);
1579        switch(c)
1580          {
1581          default: RRETURN(MATCH_NOMATCH);
1582          case 0x0a:      /* LF */
1583          case 0x0b:      /* VT */
1584          case 0x0c:      /* FF */
1585          case 0x0d:      /* CR */
1586          case 0x85:      /* NEL */
1587          case 0x2028:    /* LINE SEPARATOR */
1588          case 0x2029:    /* PARAGRAPH SEPARATOR */
1589          break;
1590          }
1591        ecode++;
1592        break;
1593    
1594  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1595      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1596      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1418  for (;;) Line 1633  for (;;)
1633    
1634          default:          default:
1635          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
1636          }          }
1637    
1638        ecode += 3;        ecode += 3;
# Line 1532  for (;;) Line 1746  for (;;)
1746          {          {
1747          for (fi = min;; fi++)          for (fi = min;; fi++)
1748            {            {
1749            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1750            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1751            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1752              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1553  for (;;) Line 1767  for (;;)
1767            }            }
1768          while (eptr >= pp)          while (eptr >= pp)
1769            {            {
1770            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1771            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1772            eptr -= length;            eptr -= length;
1773            }            }
# Line 1658  for (;;) Line 1872  for (;;)
1872            {            {
1873            for (fi = min;; fi++)            for (fi = min;; fi++)
1874              {              {
1875              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1876              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1877              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1878              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1678  for (;;) Line 1892  for (;;)
1892            {            {
1893            for (fi = min;; fi++)            for (fi = min;; fi++)
1894              {              {
1895              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1896              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1897              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1898              c = *eptr++;              c = *eptr++;
# Line 1715  for (;;) Line 1929  for (;;)
1929              }              }
1930            for (;;)            for (;;)
1931              {              {
1932              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1933              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1934              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1935              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1734  for (;;) Line 1948  for (;;)
1948              }              }
1949            while (eptr >= pp)            while (eptr >= pp)
1950              {              {
1951              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1952              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1953              eptr--;              eptr--;
1954              }              }
# Line 1805  for (;;) Line 2019  for (;;)
2019          {          {
2020          for (fi = min;; fi++)          for (fi = min;; fi++)
2021            {            {
2022            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2023            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2024            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2025            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1829  for (;;) Line 2043  for (;;)
2043            }            }
2044          for(;;)          for(;;)
2045            {            {
2046            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2047            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2049            BACKCHAR(eptr)            BACKCHAR(eptr)
# Line 1888  for (;;) Line 2102  for (;;)
2102    
2103        else        else
2104          {          {
2105          int dc;          unsigned int dc;
2106          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2107          ecode += length;          ecode += length;
2108    
# Line 1915  for (;;) Line 2129  for (;;)
2129        }        }
2130      break;      break;
2131    
2132      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2133    
2134      case OP_EXACT:      case OP_EXACT:
2135      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2136      ecode += 3;      ecode += 3;
2137      goto REPEATCHAR;      goto REPEATCHAR;
2138    
2139        case OP_POSUPTO:
2140        possessive = TRUE;
2141        /* Fall through */
2142    
2143      case OP_UPTO:      case OP_UPTO:
2144      case OP_MINUPTO:      case OP_MINUPTO:
2145      min = 0;      min = 0;
# Line 1930  for (;;) Line 2148  for (;;)
2148      ecode += 3;      ecode += 3;
2149      goto REPEATCHAR;      goto REPEATCHAR;
2150    
2151        case OP_POSSTAR:
2152        possessive = TRUE;
2153        min = 0;
2154        max = INT_MAX;
2155        ecode++;
2156        goto REPEATCHAR;
2157    
2158        case OP_POSPLUS:
2159        possessive = TRUE;
2160        min = 1;
2161        max = INT_MAX;
2162        ecode++;
2163        goto REPEATCHAR;
2164    
2165        case OP_POSQUERY:
2166        possessive = TRUE;
2167        min = 0;
2168        max = 1;
2169        ecode++;
2170        goto REPEATCHAR;
2171    
2172      case OP_STAR:      case OP_STAR:
2173      case OP_MINSTAR:      case OP_MINSTAR:
2174      case OP_PLUS:      case OP_PLUS:
# Line 1961  for (;;) Line 2200  for (;;)
2200    
2201        if (length > 1)        if (length > 1)
2202          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2203  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2204          int othercase;          unsigned int othercase;
2205          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2206              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase >= 0)  
2207            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2208            else oclength = 0;
2209  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2210    
2211          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2212            {            {
2213            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2214    #ifdef SUPPORT_UCP
2215            /* Need braces because of following else */            /* Need braces because of following else */
2216            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2217            else            else
# Line 1982  for (;;) Line 2219  for (;;)
2219              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2220              eptr += oclength;              eptr += oclength;
2221              }              }
2222    #else   /* without SUPPORT_UCP */
2223              else { RRETURN(MATCH_NOMATCH); }
2224    #endif  /* SUPPORT_UCP */
2225            }            }
2226    
2227          if (min == max) continue;          if (min == max) continue;
# Line 1990  for (;;) Line 2230  for (;;)
2230            {            {
2231            for (fi = min;; fi++)            for (fi = min;; fi++)
2232              {              {
2233              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2234              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2235              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2236              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2237    #ifdef SUPPORT_UCP
2238              /* Need braces because of following else */              /* Need braces because of following else */
2239              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2240              else              else
# Line 2001  for (;;) Line 2242  for (;;)
2242                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2243                eptr += oclength;                eptr += oclength;
2244                }                }
2245    #else   /* without SUPPORT_UCP */
2246                else { RRETURN (MATCH_NOMATCH); }
2247    #endif  /* SUPPORT_UCP */
2248              }              }
2249            /* Control never gets here */            /* Control never gets here */
2250            }            }
2251          else  
2252            else  /* Maximize */
2253            {            {
2254            pp = eptr;            pp = eptr;
2255            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2256              {              {
2257              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2258              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2259    #ifdef SUPPORT_UCP
2260              else if (oclength == 0) break;              else if (oclength == 0) break;
2261              else              else
2262                {                {
2263                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2264                eptr += oclength;                eptr += oclength;
2265                }                }
2266    #else   /* without SUPPORT_UCP */
2267                else break;
2268    #endif  /* SUPPORT_UCP */
2269              }              }
2270            while (eptr >= pp)  
2271              if (possessive) continue;
2272              for(;;)
2273             {             {
2274             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2275             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2276               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2277    #ifdef SUPPORT_UCP
2278               eptr--;
2279               BACKCHAR(eptr);
2280    #else   /* without SUPPORT_UCP */
2281             eptr -= length;             eptr -= length;
2282    #endif  /* SUPPORT_UCP */
2283             }             }
           RRETURN(MATCH_NOMATCH);  
2284            }            }
2285          /* Control never gets here */          /* Control never gets here */
2286          }          }
# Line 2064  for (;;) Line 2320  for (;;)
2320          {          {
2321          for (fi = min;; fi++)          for (fi = min;; fi++)
2322            {            {
2323            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2324            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2325            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2326                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2072  for (;;) Line 2328  for (;;)
2328            }            }
2329          /* Control never gets here */          /* Control never gets here */
2330          }          }
2331        else        else  /* Maximize */
2332          {          {
2333          pp = eptr;          pp = eptr;
2334          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2080  for (;;) Line 2336  for (;;)
2336            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2337            eptr++;            eptr++;
2338            }            }
2339            if (possessive) continue;
2340          while (eptr >= pp)          while (eptr >= pp)
2341            {            {
2342            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2343            eptr--;            eptr--;
2344            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345            }            }
# Line 2101  for (;;) Line 2358  for (;;)
2358          {          {
2359          for (fi = min;; fi++)          for (fi = min;; fi++)
2360            {            {
2361            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2362            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2363            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2364              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2365            }            }
2366          /* Control never gets here */          /* Control never gets here */
2367          }          }
2368        else        else  /* Maximize */
2369          {          {
2370          pp = eptr;          pp = eptr;
2371          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2116  for (;;) Line 2373  for (;;)
2373            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2374            eptr++;            eptr++;
2375            }            }
2376            if (possessive) continue;
2377          while (eptr >= pp)          while (eptr >= pp)
2378            {            {
2379            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2380            eptr--;            eptr--;
2381            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2382            }            }
# Line 2168  for (;;) Line 2426  for (;;)
2426      ecode += 3;      ecode += 3;
2427      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2428    
2429        case OP_NOTPOSSTAR:
2430        possessive = TRUE;
2431        min = 0;
2432        max = INT_MAX;
2433        ecode++;
2434        goto REPEATNOTCHAR;
2435    
2436        case OP_NOTPOSPLUS:
2437        possessive = TRUE;
2438        min = 1;
2439        max = INT_MAX;
2440        ecode++;
2441        goto REPEATNOTCHAR;
2442    
2443        case OP_NOTPOSQUERY:
2444        possessive = TRUE;
2445        min = 0;
2446        max = 1;
2447        ecode++;
2448        goto REPEATNOTCHAR;
2449    
2450        case OP_NOTPOSUPTO:
2451        possessive = TRUE;
2452        min = 0;
2453        max = GET2(ecode, 1);
2454        ecode += 3;
2455        goto REPEATNOTCHAR;
2456    
2457      case OP_NOTSTAR:      case OP_NOTSTAR:
2458      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2459      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2207  for (;;) Line 2493  for (;;)
2493        /* UTF-8 mode */        /* UTF-8 mode */
2494        if (utf8)        if (utf8)
2495          {          {
2496          register int d;          register unsigned int d;
2497          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2498            {            {
2499            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2232  for (;;) Line 2518  for (;;)
2518          /* UTF-8 mode */          /* UTF-8 mode */
2519          if (utf8)          if (utf8)
2520            {            {
2521            register int d;            register unsigned int d;
2522            for (fi = min;; fi++)            for (fi = min;; fi++)
2523              {              {
2524              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2525              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2526              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2527              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2249  for (;;) Line 2535  for (;;)
2535            {            {
2536            for (fi = min;; fi++)            for (fi = min;; fi++)
2537              {              {
2538              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2539              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2540              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2541                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2268  for (;;) Line 2554  for (;;)
2554          /* UTF-8 mode */          /* UTF-8 mode */
2555          if (utf8)          if (utf8)
2556            {            {
2557            register int d;            register unsigned int d;
2558            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2559              {              {
2560              int len = 1;              int len = 1;
# Line 2278  for (;;) Line 2564  for (;;)
2564              if (fc == d) break;              if (fc == d) break;
2565              eptr += len;              eptr += len;
2566              }              }
2567            for(;;)          if (possessive) continue;
2568            for(;;)
2569              {              {
2570              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2571              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2572              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2573              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2295  for (;;) Line 2582  for (;;)
2582              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2583              eptr++;              eptr++;
2584              }              }
2585              if (possessive) continue;
2586            while (eptr >= pp)            while (eptr >= pp)
2587              {              {
2588              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2589              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2590              eptr--;              eptr--;
2591              }              }
# Line 2316  for (;;) Line 2604  for (;;)
2604        /* UTF-8 mode */        /* UTF-8 mode */
2605        if (utf8)        if (utf8)
2606          {          {
2607          register int d;          register unsigned int d;
2608          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2609            {            {
2610            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2339  for (;;) Line 2627  for (;;)
2627          /* UTF-8 mode */          /* UTF-8 mode */
2628          if (utf8)          if (utf8)
2629            {            {
2630            register int d;            register unsigned int d;
2631            for (fi = min;; fi++)            for (fi = min;; fi++)
2632              {              {
2633              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2634              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2635              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2636              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2355  for (;;) Line 2643  for (;;)
2643            {            {
2644            for (fi = min;; fi++)            for (fi = min;; fi++)
2645              {              {
2646              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2647              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2648              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2649                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2374  for (;;) Line 2662  for (;;)
2662          /* UTF-8 mode */          /* UTF-8 mode */
2663          if (utf8)          if (utf8)
2664            {            {
2665            register int d;            register unsigned int d;
2666            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2667              {              {
2668              int len = 1;              int len = 1;
# Line 2383  for (;;) Line 2671  for (;;)
2671              if (fc == d) break;              if (fc == d) break;
2672              eptr += len;              eptr += len;
2673              }              }
2674              if (possessive) continue;
2675            for(;;)            for(;;)
2676              {              {
2677              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2678              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2679              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2680              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2400  for (;;) Line 2689  for (;;)
2689              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2690              eptr++;              eptr++;
2691              }              }
2692              if (possessive) continue;
2693            while (eptr >= pp)            while (eptr >= pp)
2694              {              {
2695              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2696              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2697              eptr--;              eptr--;
2698              }              }
# Line 2431  for (;;) Line 2721  for (;;)
2721      ecode += 3;      ecode += 3;
2722      goto REPEATTYPE;      goto REPEATTYPE;
2723    
2724        case OP_TYPEPOSSTAR:
2725        possessive = TRUE;
2726        min = 0;
2727        max = INT_MAX;
2728        ecode++;
2729        goto REPEATTYPE;
2730    
2731        case OP_TYPEPOSPLUS:
2732        possessive = TRUE;
2733        min = 1;
2734        max = INT_MAX;
2735        ecode++;
2736        goto REPEATTYPE;
2737    
2738        case OP_TYPEPOSQUERY:
2739        possessive = TRUE;
2740        min = 0;
2741        max = 1;
2742        ecode++;
2743        goto REPEATTYPE;
2744    
2745        case OP_TYPEPOSUPTO:
2746        possessive = TRUE;
2747        min = 0;
2748        max = GET2(ecode, 1);
2749        ecode += 3;
2750        goto REPEATTYPE;
2751    
2752      case OP_TYPESTAR:      case OP_TYPESTAR:
2753      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2754      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2481  for (;;) Line 2799  for (;;)
2799            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2800              {              {
2801              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2802              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2803              }              }
2804            break;            break;
2805    
# Line 2489  for (;;) Line 2807  for (;;)
2807            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2808              {              {
2809              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2810              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2811              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2812              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2813                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 2502  for (;;) Line 2820  for (;;)
2820            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2821              {              {
2822              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2823              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2824              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2825              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2826                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2513  for (;;) Line 2831  for (;;)
2831            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2832              {              {
2833              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2834              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2835              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2836              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2837                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2524  for (;;) Line 2842  for (;;)
2842            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2843              {              {
2844              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2845              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2846              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2847              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2848                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2533  for (;;) Line 2851  for (;;)
2851    
2852            default:            default:
2853            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
2854            }            }
2855          }          }
2856    
# Line 2573  for (;;) Line 2890  for (;;)
2890          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2891            {            {
2892            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2893               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2894              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2895              eptr++;
2896            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2897            }            }
2898          break;          break;
# Line 2583  for (;;) Line 2901  for (;;)
2901          eptr += min;          eptr += min;
2902          break;          break;
2903    
2904            case OP_ANYNL:
2905            for (i = 1; i <= min; i++)
2906              {
2907              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2908              GETCHARINC(c, eptr);
2909              switch(c)
2910                {
2911                default: RRETURN(MATCH_NOMATCH);
2912                case 0x000d:
2913                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2914                break;
2915                case 0x000a:
2916                case 0x000b:
2917                case 0x000c:
2918                case 0x0085:
2919                case 0x2028:
2920                case 0x2029:
2921                break;
2922                }
2923              }
2924            break;
2925    
2926            case OP_NOT_HSPACE:
2927            for (i = 1; i <= min; i++)
2928              {
2929              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2930              GETCHARINC(c, eptr);
2931              switch(c)
2932                {
2933                default: break;
2934                case 0x09:      /* HT */
2935                case 0x20:      /* SPACE */
2936                case 0xa0:      /* NBSP */
2937                case 0x1680:    /* OGHAM SPACE MARK */
2938                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2939                case 0x2000:    /* EN QUAD */
2940                case 0x2001:    /* EM QUAD */
2941                case 0x2002:    /* EN SPACE */
2942                case 0x2003:    /* EM SPACE */
2943                case 0x2004:    /* THREE-PER-EM SPACE */
2944                case 0x2005:    /* FOUR-PER-EM SPACE */
2945                case 0x2006:    /* SIX-PER-EM SPACE */
2946                case 0x2007:    /* FIGURE SPACE */
2947                case 0x2008:    /* PUNCTUATION SPACE */
2948                case 0x2009:    /* THIN SPACE */
2949                case 0x200A:    /* HAIR SPACE */
2950                case 0x202f:    /* NARROW NO-BREAK SPACE */
2951                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2952                case 0x3000:    /* IDEOGRAPHIC SPACE */
2953                RRETURN(MATCH_NOMATCH);
2954                }
2955              }
2956            break;
2957    
2958            case OP_HSPACE:
2959            for (i = 1; i <= min; i++)
2960              {
2961              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2962              GETCHARINC(c, eptr);
2963              switch(c)
2964                {
2965                default: RRETURN(MATCH_NOMATCH);
2966                case 0x09:      /* HT */
2967                case 0x20:      /* SPACE */
2968                case 0xa0:      /* NBSP */
2969                case 0x1680:    /* OGHAM SPACE MARK */
2970                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2971                case 0x2000:    /* EN QUAD */
2972                case 0x2001:    /* EM QUAD */
2973                case 0x2002:    /* EN SPACE */
2974                case 0x2003:    /* EM SPACE */
2975                case 0x2004:    /* THREE-PER-EM SPACE */
2976                case 0x2005:    /* FOUR-PER-EM SPACE */
2977                case 0x2006:    /* SIX-PER-EM SPACE */
2978                case 0x2007:    /* FIGURE SPACE */
2979                case 0x2008:    /* PUNCTUATION SPACE */
2980                case 0x2009:    /* THIN SPACE */
2981                case 0x200A:    /* HAIR SPACE */
2982                case 0x202f:    /* NARROW NO-BREAK SPACE */
2983                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2984                case 0x3000:    /* IDEOGRAPHIC SPACE */
2985                break;
2986                }
2987              }
2988            break;
2989    
2990            case OP_NOT_VSPACE:
2991            for (i = 1; i <= min; i++)
2992              {
2993              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2994              GETCHARINC(c, eptr);
2995              switch(c)
2996                {
2997                default: break;
2998                case 0x0a:      /* LF */
2999                case 0x0b:      /* VT */
3000                case 0x0c:      /* FF */
3001                case 0x0d:      /* CR */
3002                case 0x85:      /* NEL */
3003                case 0x2028:    /* LINE SEPARATOR */
3004                case 0x2029:    /* PARAGRAPH SEPARATOR */
3005                RRETURN(MATCH_NOMATCH);
3006                }
3007              }
3008            break;
3009    
3010            case OP_VSPACE:
3011            for (i = 1; i <= min; i++)
3012              {
3013              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3014              GETCHARINC(c, eptr);
3015              switch(c)
3016                {
3017                default: RRETURN(MATCH_NOMATCH);
3018                case 0x0a:      /* LF */
3019                case 0x0b:      /* VT */
3020                case 0x0c:      /* FF */
3021                case 0x0d:      /* CR */
3022                case 0x85:      /* NEL */
3023                case 0x2028:    /* LINE SEPARATOR */
3024                case 0x2029:    /* PARAGRAPH SEPARATOR */
3025                break;
3026                }
3027              }
3028            break;
3029    
3030          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3031          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3032            {            {
# Line 2651  for (;;) Line 3095  for (;;)
3095  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3096    
3097        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3098        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3099          number of bytes present, as this was tested above. */
3100    
3101        switch(ctype)        switch(ctype)
3102          {          {
# Line 2659  for (;;) Line 3104  for (;;)
3104          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
3105            {            {
3106            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3107              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
3108                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3109                eptr++;
3110                }
3111            }            }
3112          else eptr += min;          else eptr += min;
3113          break;          break;
# Line 2668  for (;;) Line 3116  for (;;)
3116          eptr += min;          eptr += min;
3117          break;          break;
3118    
3119            /* Because of the CRLF case, we can't assume the minimum number of
3120            bytes are present in this case. */
3121    
3122            case OP_ANYNL:
3123            for (i = 1; i <= min; i++)
3124              {
3125              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3126              switch(*eptr++)
3127                {
3128                default: RRETURN(MATCH_NOMATCH);
3129                case 0x000d:
3130                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3131                break;
3132                case 0x000a:
3133                case 0x000b:
3134                case 0x000c:
3135                case 0x0085:
3136                break;
3137                }
3138              }
3139            break;
3140    
3141            case OP_NOT_HSPACE:
3142            for (i = 1; i <= min; i++)
3143              {
3144              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3145              switch(*eptr++)
3146                {
3147                default: break;
3148                case 0x09:      /* HT */
3149                case 0x20:      /* SPACE */
3150                case 0xa0:      /* NBSP */
3151                RRETURN(MATCH_NOMATCH);
3152                }
3153              }
3154            break;
3155    
3156            case OP_HSPACE:
3157            for (i = 1; i <= min; i++)
3158              {
3159              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3160              switch(*eptr++)
3161                {
3162                default: RRETURN(MATCH_NOMATCH);
3163                case 0x09:      /* HT */
3164                case 0x20:      /* SPACE */
3165                case 0xa0:      /* NBSP */
3166                break;
3167                }
3168              }
3169            break;
3170    
3171            case OP_NOT_VSPACE:
3172            for (i = 1; i <= min; i++)
3173              {
3174              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3175              switch(*eptr++)
3176                {
3177                default: break;
3178                case 0x0a:      /* LF */
3179                case 0x0b:      /* VT */
3180                case 0x0c:      /* FF */
3181                case 0x0d:      /* CR */
3182                case 0x85:      /* NEL */
3183                RRETURN(MATCH_NOMATCH);
3184                }
3185              }
3186            break;
3187    
3188            case OP_VSPACE:
3189            for (i = 1; i <= min; i++)
3190              {
3191              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3192              switch(*eptr++)
3193                {
3194                default: RRETURN(MATCH_NOMATCH);
3195                case 0x0a:      /* LF */
3196                case 0x0b:      /* VT */
3197                case 0x0c:      /* FF */
3198                case 0x0d:      /* CR */
3199                case 0x85:      /* NEL */
3200                break;
3201                }
3202              }
3203            break;
3204    
3205          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3206          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3207            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2723  for (;;) Line 3257  for (;;)
3257            case PT_ANY:            case PT_ANY:
3258            for (fi = min;; fi++)            for (fi = min;; fi++)
3259              {              {
3260              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3261              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3262              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3263              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3264              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3265              }              }
3266            break;            /* Control never gets here */
3267    
3268            case PT_LAMP:            case PT_LAMP:
3269            for (fi = min;; fi++)            for (fi = min;; fi++)
3270              {              {
3271              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3272              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3273              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3274              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2744  for (;;) Line 3278  for (;;)
3278                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3279                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3280              }              }
3281            break;            /* Control never gets here */
3282    
3283            case PT_GC:            case PT_GC:
3284            for (fi = min;; fi++)            for (fi = min;; fi++)
3285              {              {
3286              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3287              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3288              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3289              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2757  for (;;) Line 3291  for (;;)
3291              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3292                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3293              }              }
3294            break;            /* Control never gets here */
3295    
3296            case PT_PC:            case PT_PC:
3297            for (fi = min;; fi++)            for (fi = min;; fi++)
3298              {              {
3299              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3300              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3302              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2770  for (;;) Line 3304  for (;;)
3304              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3305                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3306              }              }
3307            break;            /* Control never gets here */
3308    
3309            case PT_SC:            case PT_SC:
3310            for (fi = min;; fi++)            for (fi = min;; fi++)
3311              {              {
3312              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3313              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3314              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3315              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2783  for (;;) Line 3317  for (;;)
3317              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3318                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3319              }              }
3320            break;            /* Control never gets here */
3321    
3322            default:            default:
3323            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3324            }            }
3325          }          }
3326    
# Line 2798  for (;;) Line 3331  for (;;)
3331          {          {
3332          for (fi = min;; fi++)          for (fi = min;; fi++)
3333            {            {
3334            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3335            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3336            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3337            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
# Line 2827  for (;;) Line 3360  for (;;)
3360          {          {
3361          for (fi = min;; fi++)          for (fi = min;; fi++)
3362            {            {
3363            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3364            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3365            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3366                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3367                    IS_NEWLINE(eptr)))
3368                RRETURN(MATCH_NOMATCH);
3369    
3370            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3371            switch(ctype)            switch(ctype)
3372              {              {
3373              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3374              break;              break;
3375    
3376              case OP_ANYBYTE:              case OP_ANYBYTE:
3377              break;              break;
3378    
3379                case OP_ANYNL:
3380                switch(c)
3381                  {
3382                  default: RRETURN(MATCH_NOMATCH);
3383                  case 0x000d:
3384                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3385                  break;
3386                  case 0x000a:
3387                  case 0x000b:
3388                  case 0x000c:
3389                  case 0x0085:
3390                  case 0x2028:
3391                  case 0x2029:
3392                  break;
3393                  }
3394                break;
3395    
3396                case OP_NOT_HSPACE:
3397                switch(c)
3398                  {
3399                  default: break;
3400                  case 0x09:      /* HT */
3401                  case 0x20:      /* SPACE */
3402                  case 0xa0:      /* NBSP */
3403                  case 0x1680:    /* OGHAM SPACE MARK */
3404                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3405                  case 0x2000:    /* EN QUAD */
3406                  case 0x2001:    /* EM QUAD */
3407                  case 0x2002:    /* EN SPACE */
3408                  case 0x2003:    /* EM SPACE */
3409                  case 0x2004:    /* THREE-PER-EM SPACE */
3410                  case 0x2005:    /* FOUR-PER-EM SPACE */
3411                  case 0x2006:    /* SIX-PER-EM SPACE */
3412                  case 0x2007:    /* FIGURE SPACE */
3413                  case 0x2008:    /* PUNCTUATION SPACE */
3414                  case 0x2009:    /* THIN SPACE */
3415                  case 0x200A:    /* HAIR SPACE */
3416                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3417                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3418                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3419                  RRETURN(MATCH_NOMATCH);
3420                  }
3421                break;
3422    
3423                case OP_HSPACE:
3424                switch(c)
3425                  {
3426                  default: RRETURN(MATCH_NOMATCH);
3427                  case 0x09:      /* HT */
3428                  case 0x20:      /* SPACE */
3429                  case 0xa0:      /* NBSP */
3430                  case 0x1680:    /* OGHAM SPACE MARK */
3431                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3432                  case 0x2000:    /* EN QUAD */
3433                  case 0x2001:    /* EM QUAD */
3434                  case 0x2002:    /* EN SPACE */
3435                  case 0x2003:    /* EM SPACE */
3436                  case 0x2004:    /* THREE-PER-EM SPACE */
3437                  case 0x2005:    /* FOUR-PER-EM SPACE */
3438                  case 0x2006:    /* SIX-PER-EM SPACE */
3439                  case 0x2007:    /* FIGURE SPACE */
3440                  case 0x2008:    /* PUNCTUATION SPACE */
3441                  case 0x2009:    /* THIN SPACE */
3442                  case 0x200A:    /* HAIR SPACE */
3443                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3444                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3445                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3446                  break;
3447                  }
3448                break;
3449    
3450                case OP_NOT_VSPACE:
3451                switch(c)
3452                  {
3453                  default: break;
3454                  case 0x0a:      /* LF */
3455                  case 0x0b:      /* VT */
3456                  case 0x0c:      /* FF */
3457                  case 0x0d:      /* CR */
3458                  case 0x85:      /* NEL */
3459                  case 0x2028:    /* LINE SEPARATOR */
3460                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3461                  RRETURN(MATCH_NOMATCH);
3462                  }
3463                break;
3464    
3465                case OP_VSPACE:
3466                switch(c)
3467                  {
3468                  default: RRETURN(MATCH_NOMATCH);
3469                  case 0x0a:      /* LF */
3470                  case 0x0b:      /* VT */
3471                  case 0x0c:      /* FF */
3472                  case 0x0d:      /* CR */
3473                  case 0x85:      /* NEL */
3474                  case 0x2028:    /* LINE SEPARATOR */
3475                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3476                  break;
3477                  }
3478                break;
3479    
3480              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3481              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3482                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2882  for (;;) Line 3518  for (;;)
3518          {          {
3519          for (fi = min;; fi++)          for (fi = min;; fi++)
3520            {            {
3521            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3522            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3523            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3524                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3525                RRETURN(MATCH_NOMATCH);
3526    
3527            c = *eptr++;            c = *eptr++;
3528            switch(ctype)            switch(ctype)
3529              {              {
3530              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3531              break;              break;
3532    
3533              case OP_ANYBYTE:              case OP_ANYBYTE:
3534              break;              break;
3535    
3536                case OP_ANYNL:
3537                switch(c)
3538                  {
3539                  default: RRETURN(MATCH_NOMATCH);
3540                  case 0x000d:
3541                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3542                  break;
3543                  case 0x000a:
3544                  case 0x000b:
3545                  case 0x000c:
3546                  case 0x0085:
3547                  break;
3548                  }
3549                break;
3550    
3551                case OP_NOT_HSPACE:
3552                switch(c)
3553                  {
3554                  default: break;
3555                  case 0x09:      /* HT */
3556                  case 0x20:      /* SPACE */
3557                  case 0xa0:      /* NBSP */
3558                  RRETURN(MATCH_NOMATCH);
3559                  }
3560                break;
3561    
3562                case OP_HSPACE:
3563                switch(c)
3564                  {
3565                  default: RRETURN(MATCH_NOMATCH);
3566                  case 0x09:      /* HT */
3567                  case 0x20:      /* SPACE */
3568                  case 0xa0:      /* NBSP */
3569                  break;
3570                  }
3571                break;
3572    
3573                case OP_NOT_VSPACE:
3574                switch(c)
3575                  {
3576                  default: break;
3577                  case 0x0a:      /* LF */
3578                  case 0x0b:      /* VT */
3579                  case 0x0c:      /* FF */
3580                  case 0x0d:      /* CR */
3581                  case 0x85:      /* NEL */
3582                  RRETURN(MATCH_NOMATCH);
3583                  }
3584                break;
3585    
3586                case OP_VSPACE:
3587                switch(c)
3588                  {
3589                  default: RRETURN(MATCH_NOMATCH);
3590                  case 0x0a:      /* LF */
3591                  case 0x0b:      /* VT */
3592                  case 0x0c:      /* FF */
3593                  case 0x0d:      /* CR */
3594                  case 0x85:      /* NEL */
3595                  break;
3596                  }
3597                break;
3598    
3599              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3600              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3601              break;              break;
# Line 2927  for (;;) Line 3628  for (;;)
3628        /* Control never gets here */        /* Control never gets here */
3629        }        }
3630    
3631      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3632      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3633      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3634    
# Line 3008  for (;;) Line 3709  for (;;)
3709    
3710          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3711    
3712            if (possessive) continue;
3713          for(;;)          for(;;)
3714            {            {
3715            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3716            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3717            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3718            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3043  for (;;) Line 3745  for (;;)
3745    
3746          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3747    
3748            if (possessive) continue;
3749          for(;;)          for(;;)
3750            {            {
3751            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3752            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3753            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3754            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 3074  for (;;) Line 3777  for (;;)
3777          switch(ctype)          switch(ctype)
3778            {            {
3779            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is unlimited  
           we don't need it, so we repeat the non-UTF8 code. This is probably  
           worth it, because .* is quite a common idiom. */  
   
3780            if (max < INT_MAX)            if (max < INT_MAX)
3781              {              {
3782              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
3783                {                {
3784                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3785                  {                  {
3786                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3787                  eptr++;                  eptr++;
3788                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3789                  }                  }
# Line 3094  for (;;) Line 3792  for (;;)
3792                {                {
3793                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3794                  {                  {
3795                    if (eptr >= md->end_subject) break;
3796                  eptr++;                  eptr++;
3797                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3798                  }                  }
# Line 3108  for (;;) Line 3807  for (;;)
3807                {                {
3808                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3809                  {                  {
3810                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3811                  eptr++;                  eptr++;
3812                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3813                  }                  }
               break;  
3814                }                }
3815              else              else
3816                {                {
3817                c = max - min;                eptr = md->end_subject;
               if (c > md->end_subject - eptr) c = md->end_subject - eptr;  
               eptr += c;  
3818                }                }
3819              }              }
3820            break;            break;
# Line 3126  for (;;) Line 3823  for (;;)
3823    
3824            case OP_ANYBYTE:            case OP_ANYBYTE:
3825            c = max - min;            c = max - min;
3826            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3827                c = md->end_subject - eptr;
3828            eptr += c;            eptr += c;
3829            break;            break;
3830    
3831              case OP_ANYNL:
3832              for (i = min; i < max; i++)
3833                {
3834                int len = 1;
3835                if (eptr >= md->end_subject) break;
3836                GETCHARLEN(c, eptr, len);
3837                if (c == 0x000d)
3838                  {
3839                  if (++eptr >= md->end_subject) break;
3840                  if (*eptr == 0x000a) eptr++;
3841                  }
3842                else
3843                  {
3844                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3845                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3846                    break;
3847                  eptr += len;
3848                  }
3849                }
3850              break;
3851    
3852              case OP_NOT_HSPACE:
3853              case OP_HSPACE:
3854              for (i = min; i < max; i++)
3855                {
3856                BOOL gotspace;
3857                int len = 1;
3858                if (eptr >= md->end_subject) break;
3859                GETCHARLEN(c, eptr, len);
3860                switch(c)
3861                  {
3862                  default: gotspace = FALSE; break;
3863                  case 0x09:      /* HT */
3864                  case 0x20:      /* SPACE */
3865                  case 0xa0:      /* NBSP */
3866                  case 0x1680:    /* OGHAM SPACE MARK */
3867                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3868                  case 0x2000:    /* EN QUAD */
3869                  case 0x2001:    /* EM QUAD */
3870                  case 0x2002:    /* EN SPACE */
3871                  case 0x2003:    /* EM SPACE */
3872                  case 0x2004:    /* THREE-PER-EM SPACE */
3873                  case 0x2005:    /* FOUR-PER-EM SPACE */
3874                  case 0x2006:    /* SIX-PER-EM SPACE */
3875                  case 0x2007:    /* FIGURE SPACE */
3876                  case 0x2008:    /* PUNCTUATION SPACE */
3877                  case 0x2009:    /* THIN SPACE */
3878                  case 0x200A:    /* HAIR SPACE */
3879                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3880                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3881                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3882                  gotspace = TRUE;
3883                  break;
3884                  }
3885                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3886                eptr += len;
3887                }
3888              break;
3889    
3890              case OP_NOT_VSPACE:
3891              case OP_VSPACE:
3892              for (i = min; i < max; i++)
3893                {
3894                BOOL gotspace;
3895                int len = 1;
3896                if (eptr >= md->end_subject) break;
3897                GETCHARLEN(c, eptr, len);
3898                switch(c)
3899                  {
3900                  default: gotspace = FALSE; break;
3901                  case 0x0a:      /* LF */
3902                  case 0x0b:      /* VT */
3903                  case 0x0c:      /* FF */
3904                  case 0x0d:      /* CR */
3905                  case 0x85:      /* NEL */
3906                  case 0x2028:    /* LINE SEPARATOR */
3907                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3908                  gotspace = TRUE;
3909                  break;
3910                  }
3911                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3912                eptr += len;
3913                }
3914              break;
3915    
3916            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3917            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3918              {              {
# Line 3202  for (;;) Line 3985  for (;;)
3985    
3986          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3987    
3988            if (possessive) continue;
3989          for(;;)          for(;;)
3990            {            {
3991            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3992            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3993            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3994            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3222  for (;;) Line 4006  for (;;)
4006              {              {
4007              for (i = min; i < max; i++)              for (i = min; i < max; i++)
4008                {                {
4009                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4010                eptr++;                eptr++;
4011                }                }
4012              break;              break;
# Line 3231  for (;;) Line 4015  for (;;)
4015    
4016            case OP_ANYBYTE:            case OP_ANYBYTE:
4017            c = max - min;            c = max - min;
4018            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
4019                c = md->end_subject - eptr;
4020            eptr += c;            eptr += c;
4021            break;            break;
4022    
4023              case OP_ANYNL:
4024              for (i = min; i < max; i++)
4025                {
4026                if (eptr >= md->end_subject) break;
4027                c = *eptr;
4028                if (c == 0x000d)
4029                  {
4030                  if (++eptr >= md->end_subject) break;
4031                  if (*eptr == 0x000a) eptr++;
4032                  }
4033                else
4034                  {
4035                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4036                    break;
4037                  eptr++;
4038                  }
4039                }
4040              break;
4041    
4042              case OP_NOT_HSPACE:
4043              for (i = min; i < max; i++)
4044                {
4045                if (eptr >= md->end_subject) break;
4046                c = *eptr;
4047                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4048                eptr++;
4049                }
4050              break;
4051    
4052              case OP_HSPACE:
4053              for (i = min; i < max; i++)
4054                {
4055                if (eptr >= md->end_subject) break;
4056                c = *eptr;
4057                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4058                eptr++;
4059                }
4060              break;
4061    
4062              case OP_NOT_VSPACE:
4063              for (i = min; i < max; i++)
4064                {
4065                if (eptr >= md->end_subject) break;
4066                c = *eptr;
4067                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4068                  break;
4069                eptr++;
4070                }
4071              break;
4072    
4073              case OP_VSPACE:
4074              for (i = min; i < max; i++)
4075                {
4076                if (eptr >= md->end_subject) break;
4077                c = *eptr;
4078                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4079                  break;
4080                eptr++;
4081                }
4082              break;
4083    
4084            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4085            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4086              {              {
# Line 3295  for (;;) Line 4141  for (;;)
4141    
4142          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4143    
4144            if (possessive) continue;
4145          while (eptr >= pp)          while (eptr >= pp)
4146            {            {
4147            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4148            eptr--;            eptr--;
4149            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4150            }            }
# Line 3309  for (;;) Line 4156  for (;;)
4156        }        }
4157      /* Control never gets here */      /* Control never gets here */
4158    
4159      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
4160      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
4161    
4162      default:      default:
4163      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
4164      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4165      }      }
4166    
4167    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3325  for (;;) Line 4170  for (;;)
4170    
4171    }             /* End of main loop */    }             /* End of main loop */
4172  /* Control never reaches here */  /* Control never reaches here */
4173    
4174    
4175    /* When compiling to use the heap rather than the stack for recursive calls to
4176    match(), the RRETURN() macro jumps here. The number that is saved in
4177    frame->Xwhere indicates which label we actually want to return to. */
4178    
4179    #ifdef NO_RECURSE
4180    #define LBL(val) case val: goto L_RM##val;
4181    HEAP_RETURN:
4182    switch (frame->Xwhere)
4183      {
4184      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4185      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4186      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4187      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4188      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4189      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4190      default:
4191      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4192      return PCRE_ERROR_INTERNAL;
4193      }
4194    #undef LBL
4195    #endif  /* NO_RECURSE */
4196  }  }
4197    
4198    
# Line 3337  Undefine all the macros that were define Line 4205  Undefine all the macros that were define
4205  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4206  #undef eptr  #undef eptr
4207  #undef ecode  #undef ecode
4208    #undef mstart
4209  #undef offset_top  #undef offset_top
4210  #undef ims  #undef ims
4211  #undef eptrb  #undef eptrb
# Line 3354  Undefine all the macros that were define Line 4223  Undefine all the macros that were define
4223    
4224  #undef cur_is_word  #undef cur_is_word
4225  #undef condition  #undef condition
 #undef minimize  
4226  #undef prev_is_word  #undef prev_is_word
4227    
4228  #undef original_ims  #undef original_ims
# Line 3410  Returns:          > 0 => success; value Line 4278  Returns:          > 0 => success; value
4278                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4279  */  */
4280    
4281  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
4282  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4283    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4284    int offsetcount)    int offsetcount)
# Line 3419  int rc, resetcount, ocount; Line 4287  int rc, resetcount, ocount;
4287  int first_byte = -1;  int first_byte = -1;
4288  int req_byte = -1;  int req_byte = -1;
4289  int req_byte2 = -1;  int req_byte2 = -1;
4290  unsigned long int ims = 0;  int newline;
4291    unsigned long int ims;
4292  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
4293  BOOL anchored;  BOOL anchored;
4294  BOOL startline;  BOOL startline;
4295  BOOL firstline;  BOOL firstline;
4296  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
4297  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
4298    BOOL utf8;
4299  match_data match_block;  match_data match_block;
4300    match_data *md = &match_block;
4301  const uschar *tables;  const uschar *tables;
4302  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4303  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
# Line 3451  if (offsetcount < 0) return PCRE_ERROR_B Line 4322  if (offsetcount < 0) return PCRE_ERROR_B
4322  the default values. */  the default values. */
4323    
4324  study = NULL;  study = NULL;
4325  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
4326  match_block.match_limit_recursion = MATCH_LIMIT_RECURSION;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4327  match_block.callout_data = NULL;  md->callout_data = NULL;
4328    
4329  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
4330    
# Line 3465  if (extra_data != NULL) Line 4336  if (extra_data != NULL)
4336    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4337      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
4338    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4339      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
4340    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4341      match_block.match_limit_recursion = extra_data->match_limit_recursion;      md->match_limit_recursion = extra_data->match_limit_recursion;
4342    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4343      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
4344    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4345    }    }
4346    
# Line 3499  firstline = (re->options & PCRE_FIRSTLIN Line 4370  firstline = (re->options & PCRE_FIRSTLIN
4370    
4371  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
4372    
4373  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
4374    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
4375    
4376  match_block.start_subject = (USPTR)subject;  md->start_subject = (USPTR)subject;
4377  match_block.start_offset = start_offset;  md->start_offset = start_offset;
4378  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
4379  end_subject = match_block.end_subject;  end_subject = md->end_subject;
4380    
4381  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4382  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4383    
4384  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4385  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4386  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4387  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
4388  match_block.hitend = FALSE;  md->hitend = FALSE;
4389    
4390    md->recursive = NULL;                   /* No recursion at top level */
4391    
4392  match_block.recursive = NULL;                   /* No recursion at top level */  md->lcc = tables + lcc_offset;
4393    md->ctypes = tables + ctypes_offset;
4394    
4395  match_block.lcc = tables + lcc_offset;  /* Handle different types of newline. The three bits give eight cases. If
4396  match_block.ctypes = tables + ctypes_offset;  nothing is set at run time, whatever was used at compile time applies. */
4397    
4398    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4399           PCRE_NEWLINE_BITS)
4400      {
4401      case 0: newline = NEWLINE; break;   /* Compile-time default */
4402      case PCRE_NEWLINE_CR: newline = '\r'; break;
4403      case PCRE_NEWLINE_LF: newline = '\n'; break;
4404      case PCRE_NEWLINE_CR+
4405           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4406      case PCRE_NEWLINE_ANY: newline = -1; break;
4407      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4408      default: return PCRE_ERROR_BADNEWLINE;
4409      }
4410    
4411    if (newline == -2)
4412      {
4413      md->nltype = NLTYPE_ANYCRLF;
4414      }
4415    else if (newline < 0)
4416      {
4417      md->nltype = NLTYPE_ANY;
4418      }
4419    else
4420      {
4421      md->nltype = NLTYPE_FIXED;
4422      if (newline > 255)
4423        {
4424        md->nllen = 2;
4425        md->nl[0] = (newline >> 8) & 255;
4426        md->nl[1] = newline & 255;
4427        }
4428      else
4429        {
4430        md->nllen = 1;
4431        md->nl[0] = newline;
4432        }
4433      }
4434    
4435  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4436  moment. */  moment. */
4437    
4438  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4439    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4440    
4441  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4442  back the character offset. */  back the character offset. */
4443    
4444  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4445  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4446    {    {
4447    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4448      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3563  ocount = offsetcount - (offsetcount % 3) Line 4474  ocount = offsetcount - (offsetcount % 3)
4474  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
4475    {    {
4476    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
4477    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4478    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4479    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
4480    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
4481    }    }
4482  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
4483    
4484  match_block.offset_end = ocount;  md->offset_end = ocount;
4485  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
4486  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
4487  match_block.capture_last = -1;  md->capture_last = -1;
4488    
4489  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
4490  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3586  if (resetcount > offsetcount) resetcount Line 4497  if (resetcount > offsetcount) resetcount
4497  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
4498  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
4499    
4500  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
4501    {    {
4502    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
4503    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
4504    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
4505    }    }
# Line 3605  if (!anchored) Line 4516  if (!anchored)
4516      {      {
4517      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4518      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4519        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
4520      }      }
4521    else    else
4522      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3623  if ((re->options & PCRE_REQCHSET) != 0) Line 4534  if ((re->options & PCRE_REQCHSET) != 0)
4534    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4535    }    }
4536    
4537    
4538    /* ==========================================================================*/
4539    
4540  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4541  the loop runs just once. */  the loop runs just once. */
4542    
4543  do  for(;;)
4544    {    {
4545    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4546    
4547    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4548    
4549    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
4550      {      {
4551      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
4552      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4553      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4554      }      }
4555    
4556    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
4557    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
4558    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
4559    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
4560    */    the match fails at the newline, later code breaks this loop. */
4561    
4562    if (firstline)    if (firstline)
4563      {      {
4564      USPTR t = start_match;      USPTR t = start_match;
4565      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4566      end_subject = t;      end_subject = t;
4567      }      }
4568    
# Line 3658  do Line 4572  do
4572      {      {
4573      if (first_byte_caseless)      if (first_byte_caseless)
4574        while (start_match < end_subject &&        while (start_match < end_subject &&
4575               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4576          start_match++;          start_match++;
4577      else      else
4578        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4579          start_match++;          start_match++;
4580      }      }
4581    
4582    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4583    
4584    else if (startline)    else if (startline)
4585      {      {
4586      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4587        {        {
4588        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4589            start_match++;
4590    
4591          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4592          and we are now at a LF, advance the match position by one more character.
4593          */
4594    
4595          if (start_match[-1] == '\r' &&
4596               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4597               start_match < end_subject &&
4598               *start_match == '\n')
4599          start_match++;          start_match++;
4600        }        }
4601      }      }
# Line 3693  do Line 4617  do
4617    
4618  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4619    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4620    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4621    printf("\n");    printf("\n");
4622  #endif  #endif
4623    
# Line 3707  do Line 4631  do
4631    
4632    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4633    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4634    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4635    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4636    
4637    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4638    */    */
4639    
4640    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4641        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4642        !match_block.partial)        !md->partial)
4643      {      {
4644      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4645    
# Line 3740  do Line 4664  do
4664            }            }
4665          }          }
4666    
4667        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4668          forcing a match failure. */
4669    
4670        if (p >= end_subject) break;        if (p >= end_subject)
4671            {
4672            rc = MATCH_NOMATCH;
4673            break;
4674            }
4675    
4676        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4677        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3752  do Line 4681  do
4681        }        }
4682      }      }
4683    
4684    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup, 0);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4685    
4686    if (rc == MATCH_NOMATCH)    md->start_match_ptr = start_match;      /* Insurance */
4687      {    md->match_call_count = 0;
4688      if (firstline && *start_match == NEWLINE) break;    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4689      start_match++;  
4690      /* Any return other than MATCH_NOMATCH breaks the loop. */
4691    
4692      if (rc != MATCH_NOMATCH) break;
4693    
4694      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4695      newline in the subject (though it may continue over the newline). Therefore,
4696      if we have just failed to match, starting at a newline, do not continue. */
4697    
4698      if (firstline && IS_NEWLINE(start_match)) break;
4699    
4700      /* Advance the match position by one character. */
4701    
4702      start_match++;
4703  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4704      if (match_block.utf8)    if (utf8)
4705        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4706          start_match++;        start_match++;
4707  #endif  #endif
4708      continue;  
4709      }    /* Break the loop if the pattern is anchored or if we have passed the end of
4710      the subject. */
4711    
4712      if (anchored || start_match > end_subject) break;
4713    
4714      /* If we have just passed a CR and the newline option is CRLF or ANY or
4715      ANYCRLF, and we are now at a LF, advance the match position by one more
4716      character. */
4717    
4718      if (start_match[-1] == '\r' &&
4719           (md->nltype == NLTYPE_ANY ||
4720            md->nltype == NLTYPE_ANYCRLF ||
4721            md->nllen == 2) &&
4722           start_match < end_subject &&
4723           *start_match == '\n')
4724        start_match++;
4725    
4726    if (rc != MATCH_MATCH)    }   /* End of for(;;) "bumpalong" loop */
4727      {  
4728      DPRINTF((">>>> error: returning %d\n", rc));  /* ==========================================================================*/
4729      return rc;  
4730      }  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4731    conditions is true:
4732    
4733    (1) The pattern is anchored;
4734    
4735    /* We have a match! Copy the offset information from temporary store if  (2) We are past the end of the subject;
   necessary */  
4736    
4737    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4738        this option requests that a match occur at or before the first newline in
4739        the subject.
4740    
4741    When we have a match and the offset vector is big enough to deal with any
4742    backreferences, captured substring offsets will already be set up. In the case
4743    where we had to get some local store to hold offsets for backreference
4744    processing, copy those that we can. In this case there need not be overflow if
4745    certain parts of the pattern were not used, even though there are more
4746    capturing parentheses than vector slots. */
4747    
4748    if (rc == MATCH_MATCH)
4749      {
4750    if (using_temporary_offsets)    if (using_temporary_offsets)
4751      {      {
4752      if (offsetcount >= 4)      if (offsetcount >= 4)
4753        {        {
4754        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4755          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4756        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4757        }        }
4758      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4759      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4760      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4761      }      }
4762    
4763    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4764      too many to fit into the vector. */
4765    
4766      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4767    
4768      /* If there is space, set up the whole thing as substring 0. The value of
4769      md->start_match_ptr might be modified if \K was encountered on the success
4770      matching path. */
4771    
4772    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4773      {      {
4774      offsets[0] = start_match - match_block.start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4775      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4776      }      }
4777    
4778    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4779    return rc;    return rc;
4780    }    }
4781    
4782  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4783    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4784    
4785  if (using_temporary_offsets)  if (using_temporary_offsets)
4786    {    {
4787    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4788    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4789    }    }
4790    
4791  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4792      {
4793      DPRINTF((">>>> error: returning %d\n", rc));
4794      return rc;
4795      }
4796    else if (md->partial && md->hitend)
4797    {    {
4798    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4799    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.87  
changed lines
  Added in v.197

  ViewVC Help
Powered by ViewVC 1.1.5