/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 190 by ph10, Thu Jul 19 10:38:20 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* Undefine some potentially clashing cpp symbols */
52    
53  /* Structure for building a chain of data that actually lives on the  #undef min
54  stack, for holding the values of the subject pointer at the start of each  #undef max
55  subpattern, so as to detect when an empty string has been matched by a  
56  subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
57  are on the heap, not on the stack. */  obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
58    
59  typedef struct eptrblock {  #define EPTR_WORK_SIZE (1000)
   struct eptrblock *epb_prev;  
   USPTR epb_saved_eptr;  
 } eptrblock;  
60    
61  /* Flag bits for the match() function */  /* Flag bits for the match() function */
62    
63  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
64  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
65    #define match_tail_recursed  0x04  /* Tail recursive call */
66    
67  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
68  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 103  Returns:     nothing
103  static void  static void
104  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
105  {  {
106  int c;  unsigned int c;
107  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
108  while (length-- > 0)  while (length-- > 0)
109    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 186  calls by keeping local variables that ne Line 188  calls by keeping local variables that ne
188  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
189  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
190  always used to.  always used to.
191    
192    The original heap-recursive code used longjmp(). However, it seems that this
193    can be very slow on some operating systems. Following a suggestion from Stan
194    Switzer, the use of longjmp() has been abolished, at the cost of having to
195    provide a unique number for each call to RMATCH. There is no way of generating
196    a sequence of numbers at compile time in C. I have given them names, to make
197    them stand out more clearly.
198    
199    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
200    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
201    tests. Furthermore, not using longjmp() means that local dynamic variables
202    don't have indeterminate values; this has meant that the frame size can be
203    reduced because the result can be "passed back" by straight setting of the
204    variable instead of being passed in the frame.
205  ****************************************************************************  ****************************************************************************
206  ***************************************************************************/  ***************************************************************************/
207    
208    
209    /* Numbers for RMATCH calls */
210    
211    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
212           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
213           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
214           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
215           RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };
216    
217    
218  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
219  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
220    actuall used in this definition. */
221    
222  #ifndef NO_RECURSE  #ifndef NO_RECURSE
223  #define REGISTER register  #define REGISTER register
224    
225  #ifdef DEBUG  #ifdef DEBUG
226  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
227    { \    { \
228    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
229    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
230    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
231    }    }
232  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 208  versions and production versions. */ Line 235  versions and production versions. */
235    return ra; \    return ra; \
236    }    }
237  #else  #else
238  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
239    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
240  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
241  #endif  #endif
242    
243  #else  #else
244    
245    
246  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
247  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
248  match(), which never changes. */  argument of match(), which never changes. */
249    
250  #define REGISTER  #define REGISTER
251    
252  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
253    {\    {\
254    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
255    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
256      {\    newframe->Xeptr = ra;\
257      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
258      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
259      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
260      newframe->Xims = re;\    newframe->Xims = re;\
261      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
262      newframe->Xflags = rg;\    newframe->Xflags = rg;\
263      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
264      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
265      frame = newframe;\    frame = newframe;\
266      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
267      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
268      }\    L_##rw:\
269    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
270    }    }
271    
272  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 254  match(), which never changes. */ Line 276  match(), which never changes. */
276    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
277    if (frame != NULL)\    if (frame != NULL)\
278      {\      {\
279      frame->Xresult = ra;\      rrc = ra;\
280      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
281      }\      }\
282    return ra;\    return ra;\
283    }    }
# Line 271  typedef struct heapframe { Line 292  typedef struct heapframe {
292    
293    const uschar *Xeptr;    const uschar *Xeptr;
294    const uschar *Xecode;    const uschar *Xecode;
295      const uschar *Xmstart;
296    int Xoffset_top;    int Xoffset_top;
297    long int Xims;    long int Xims;
298    eptrblock *Xeptrb;    eptrblock *Xeptrb;
299    int Xflags;    int Xflags;
300    int Xrdepth;    unsigned int Xrdepth;
301    
302    /* Function local variables */    /* Function local variables */
303    
# Line 291  typedef struct heapframe { Line 313  typedef struct heapframe {
313    
314    BOOL Xcur_is_word;    BOOL Xcur_is_word;
315    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
316    BOOL Xprev_is_word;    BOOL Xprev_is_word;
317    
318    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
# Line 303  typedef struct heapframe { Line 324  typedef struct heapframe {
324    int Xprop_category;    int Xprop_category;
325    int Xprop_chartype;    int Xprop_chartype;
326    int Xprop_script;    int Xprop_script;
327    int *Xprop_test_variable;    int Xoclength;
328      uschar Xocchars[8];
329  #endif  #endif
330    
331    int Xctype;    int Xctype;
332    int Xfc;    unsigned int Xfc;
333    int Xfi;    int Xfi;
334    int Xlength;    int Xlength;
335    int Xmax;    int Xmax;
# Line 321  typedef struct heapframe { Line 343  typedef struct heapframe {
343    
344    eptrblock Xnewptrb;    eptrblock Xnewptrb;
345    
346    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
347    
348    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
349    
350  } heapframe;  } heapframe;
351    
# Line 340  typedef struct heapframe { Line 361  typedef struct heapframe {
361  *         Match from current position            *  *         Match from current position            *
362  *************************************************/  *************************************************/
363    
364  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
365  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
366  same response.  same response.
367    
# Line 353  performance. Tests using gcc on a SPARC Line 371  performance. Tests using gcc on a SPARC
371  made performance worse.  made performance worse.
372    
373  Arguments:  Arguments:
374     eptr        pointer in subject     eptr        pointer to current character in subject
375     ecode       position in code     ecode       pointer to current position in compiled code
376       mstart      pointer to the current match start position (can be modified
377                     by encountering \K)
378     offset_top  current top pointer     offset_top  current top pointer
379     md          pointer to "static" info for the match     md          pointer to "static" info for the match
380     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 362  Arguments: Line 382  Arguments:
382                   brackets - for testing for empty matches                   brackets - for testing for empty matches
383     flags       can contain     flags       can contain
384                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
385                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
386                       group that can match an empty string
387                     match_tail_recursed - this is a tail_recursed group
388     rdepth      the recursion depth     rdepth      the recursion depth
389    
390  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 372  Returns:       MATCH_MATCH if matched Line 394  Returns:       MATCH_MATCH if matched
394  */  */
395    
396  static int  static int
397  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
398    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
399    int flags, int rdepth)    int flags, unsigned int rdepth)
400  {  {
401  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
402  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
403  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
404    
405    register int  rrc;         /* Returns from recursive calls */
406    register int  i;           /* Used for loops not involving calls to RMATCH() */
407    register unsigned int c;   /* Character values not kept over RMATCH() calls */
408    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
409    
410  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
 register int  i;      /* Used for loops not involving calls to RMATCH() */  
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
411    
412  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
413  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 398  frame->Xprevframe = NULL;            /* Line 422  frame->Xprevframe = NULL;            /*
422    
423  frame->Xeptr = eptr;  frame->Xeptr = eptr;
424  frame->Xecode = ecode;  frame->Xecode = ecode;
425    frame->Xmstart = mstart;
426  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
427  frame->Xims = ims;  frame->Xims = ims;
428  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 412  HEAP_RECURSE: Line 437  HEAP_RECURSE:
437    
438  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
439  #define ecode              frame->Xecode  #define ecode              frame->Xecode
440    #define mstart             frame->Xmstart
441  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
442  #define ims                frame->Xims  #define ims                frame->Xims
443  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 434  HEAP_RECURSE: Line 460  HEAP_RECURSE:
460    
461  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
462  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
463  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
464    
465  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
# Line 446  HEAP_RECURSE: Line 471  HEAP_RECURSE:
471  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
472  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
473  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
474  #define prop_test_variable frame->Xprop_test_variable  #define oclength           frame->Xoclength
475    #define occhars            frame->Xocchars
476  #endif  #endif
477    
478  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 470  HEAP_RECURSE: Line 496  HEAP_RECURSE:
496  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
497  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
498    
499  #else  #else         /* NO_RECURSE not defined */
500  #define fi i  #define fi i
501  #define fc c  #define fc c
502    
# Line 489  recursion_info new_recursive;      /* wi Line 515  recursion_info new_recursive;      /* wi
515                                     /* that do not have to be preserved over  */                                     /* that do not have to be preserved over  */
516  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
517  BOOL condition;  BOOL condition;
 BOOL minimize;  
518  BOOL prev_is_word;  BOOL prev_is_word;
519    
520  unsigned long int original_ims;  unsigned long int original_ims;
# Line 501  int prop_fail_result; Line 526  int prop_fail_result;
526  int prop_category;  int prop_category;
527  int prop_chartype;  int prop_chartype;
528  int prop_script;  int prop_script;
529  int *prop_test_variable;  int oclength;
530    uschar occhars[8];
531  #endif  #endif
532    
533  int ctype;  int ctype;
# Line 516  int save_offset1, save_offset2, save_off Line 542  int save_offset1, save_offset2, save_off
542  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
543    
544  eptrblock newptrb;  eptrblock newptrb;
545  #endif  #endif     /* NO_RECURSE */
546    
547  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
548  variables. */  variables. */
# Line 524  variables. */ Line 550  variables. */
550  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
551  prop_value = 0;  prop_value = 0;
552  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_variable = NULL;  
553  #endif  #endif
554    
555    
556    /* This label is used for tail recursion, which is used in a few cases even
557    when NO_RECURSE is not defined, in order to reduce the amount of stack that is
558    used. Thanks to Ian Taylor for noticing this possibility and sending the
559    original patch. */
560    
561    TAIL_RECURSE:
562    
563  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
564  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
565  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
# Line 535  defined). However, RMATCH isn't like a f Line 568  defined). However, RMATCH isn't like a f
568  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
569  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
570    
571    #ifdef SUPPORT_UTF8
572    utf8 = md->utf8;       /* Local copy of the flag */
573    #else
574    utf8 = FALSE;
575    #endif
576    
577  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
578  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
579    
# Line 542  if (md->match_call_count++ >= md->match_ Line 581  if (md->match_call_count++ >= md->match_
581  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
582    
583  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
584    
585  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
586  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
587  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
588  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
589    When match() is called in other circumstances, don't add to the chain. If this
590    is a tail recursion, use a block from the workspace, as the one on the stack is
591    already used. */
592    
593  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
594    {    {
595    newptrb.epb_prev = eptrb;    eptrblock *p;
596    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
597    eptrb = &newptrb;      {
598        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
599        p = md->eptrchain + md->eptrn++;
600        }
601      else p = &newptrb;
602      p->epb_saved_eptr = eptr;
603      p->epb_prev = eptrb;
604      eptrb = p;
605    }    }
606    
607  /* Now start processing the operations. */  /* Now start processing the opcodes. */
608    
609  for (;;)  for (;;)
610    {    {
611      minimize = possessive = FALSE;
612    op = *ecode;    op = *ecode;
   minimize = FALSE;  
613    
614    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
615    matching at least one subject character. */    matching at least one subject character. */
616    
617    if (md->partial &&    if (md->partial &&
618        eptr >= md->end_subject &&        eptr >= md->end_subject &&
619        eptr > md->start_match)        eptr > mstart)
620      md->hitend = TRUE;      md->hitend = TRUE;
621    
622    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
623      {      {
624      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
625        the current subject position in the working slot at the top of the vector.
626      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
627      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
628        reference inside the group.
629      if (number > EXTRACT_BASIC_MAX)  
630        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
631        values of the final offsets, in case they were set by a previous iteration
632        of the same bracket.
633    
634        If there isn't enough space in the offset vector, treat this as if it were
635        a non-capturing bracket. Don't worry about setting the flag for the error
636        case here; that is handled in the code for KET. */
637    
638        case OP_CBRA:
639        case OP_SCBRA:
640        number = GET2(ecode, 1+LINK_SIZE);
641      offset = number << 1;      offset = number << 1;
642    
643  #ifdef DEBUG  #ifdef DEBUG
644      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
645        printf("subject=");
646      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
647      printf("\n");      printf("\n");
648  #endif  #endif
# Line 612  for (;;) Line 657  for (;;)
657        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
658        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
659    
660          flags = (op == OP_SCBRA)? match_cbegroup : 0;
661        do        do
662          {          {
663          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
664            match_isgroup);            ims, eptrb, flags, RM1);
665          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
666          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
667          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 631  for (;;) Line 677  for (;;)
677        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
678        }        }
679    
680      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
681        bracket. */
682    
683      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
684    
685    /* Other types of node can be handled by a switch */      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
686        final alternative within the brackets, we would return the result of a
687        recursive call to match() whatever happened. We can reduce stack usage by
688        turning this into a tail recursion. */
689    
690    switch(op)      case OP_BRA:
691      {      case OP_SBRA:
692      case OP_BRA:     /* Non-capturing bracket: optimized */      DPRINTF(("start non-capturing bracket\n"));
693      DPRINTF(("start bracket 0\n"));      flags = (op >= OP_SBRA)? match_cbegroup : 0;
694      do      for (;;)
695        {        {
696        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)
697          match_isgroup);          {
698            ecode += _pcre_OP_lengths[*ecode];
699            flags |= match_tail_recursed;
700            DPRINTF(("bracket 0 tail recursion\n"));
701            goto TAIL_RECURSE;
702            }
703    
704          /* For non-final alternatives, continue the loop for a NOMATCH result;
705          otherwise return. */
706    
707          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
708            eptrb, flags, RM2);
709        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
710        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
711        }        }
712      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
713    
714      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
715      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
716      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
717      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
718        obeyed, we can use tail recursion to avoid using another stack frame. */
719    
720      case OP_COND:      case OP_COND:
721      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
722        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
723          {
724          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
725          condition = md->recursive != NULL &&
726            (offset == RREF_ANY || offset == md->recursive->group_num);
727          ecode += condition? 3 : GET(ecode, 1);
728          }
729    
730        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
731        {        {
732        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
733        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
734          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
735          (offset < offset_top && md->offset_vector[offset] >= 0);        }
736        RMATCH(rrc, eptr, ecode + (condition?  
737          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
738          offset_top, md, ims, eptrb, match_isgroup);        {
739        RRETURN(rrc);        condition = FALSE;
740          ecode += GET(ecode, 1);
741        }        }
742    
743      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
744      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
745        assertion. */
746    
747      else      else
748        {        {
749        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
750            match_condassert | match_isgroup);            match_condassert, RM3);
751        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
752          {          {
753          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
754            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
755          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
756          }          }
757        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
758          {          {
759          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
760          }          }
761        else ecode += GET(ecode, 1);        else
762        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
763          match_isgroup);          condition = FALSE;
764        RRETURN(rrc);          ecode += GET(ecode, 1);
765            }
766        }        }
     /* Control never reaches here */  
767    
768      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
769      encountered. */      we can use tail recursion to avoid using another stack frame. If the second
770        alternative doesn't exist, we can just plough on. */
771    
772      case OP_CREF:      if (condition || *ecode == OP_ALT)
773      case OP_BRANUMBER:        {
774      ecode += 3;        ecode += 1 + LINK_SIZE;
775          flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
776          goto TAIL_RECURSE;
777          }
778        else
779          {
780          ecode += 1 + LINK_SIZE;
781          }
782      break;      break;
783    
784      /* End of the pattern. If we are in a recursion, we should restore the  
785      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
786        restore the offsets appropriately and continue from after the call. */
787    
788      case OP_END:      case OP_END:
789      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
# Line 713  for (;;) Line 793  for (;;)
793        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
794        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
795          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
796        md->start_match = rec->save_start;        mstart = rec->save_start;
797        ims = original_ims;        ims = original_ims;
798        ecode = rec->after_call;        ecode = rec->after_call;
799        break;        break;
# Line 722  for (;;) Line 802  for (;;)
802      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
803      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
804    
805      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
806      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
807      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
808        md->start_match_ptr = mstart;  /* and the start (\K can modify) */
809      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
810    
811      /* Change option settings */      /* Change option settings */
# Line 745  for (;;) Line 826  for (;;)
826      case OP_ASSERTBACK:      case OP_ASSERTBACK:
827      do      do
828        {        {
829        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
830          match_isgroup);          RM4);
831        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
832        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
833        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 772  for (;;) Line 853  for (;;)
853      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
854      do      do
855        {        {
856        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
857          match_isgroup);          RM5);
858        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
859        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
860        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 794  for (;;) Line 875  for (;;)
875  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
876      if (utf8)      if (utf8)
877        {        {
878        c = GET(ecode,1);        i = GET(ecode, 1);
879        for (i = 0; i < c; i++)        while (i-- > 0)
880          {          {
881          eptr--;          eptr--;
882          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 808  for (;;) Line 889  for (;;)
889      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
890    
891        {        {
892        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
893        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
894        }        }
895    
# Line 830  for (;;) Line 911  for (;;)
911        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
912        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
913        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
914        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
915        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
916        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
917        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 865  for (;;) Line 946  for (;;)
946      case OP_RECURSE:      case OP_RECURSE:
947        {        {
948        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
949        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
950            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
951    
952        /* Add to "recursing stack" */        /* Add to "recursing stack" */
953    
# Line 897  for (;;) Line 973  for (;;)
973    
974        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
975              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
976        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
977        md->start_match = eptr;        mstart = eptr;
978    
979        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
980        restore the offset and recursion data. */        restore the offset and recursion data. */
981    
982        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
983          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
984        do        do
985          {          {
986          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
987              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
988          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
989            {            {
990            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 945  for (;;) Line 1022  for (;;)
1022      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1023    
1024      case OP_ONCE:      case OP_ONCE:
1025        {      prev = ecode;
1026        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1027    
1028        do      do
1029          {        {
1030          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1031            eptrb, match_isgroup);          eptrb, 0, RM7);
1032          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1033          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1034          ecode += GET(ecode,1);        ecode += GET(ecode,1);
1035          }        }
1036        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1037    
1038        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1039    
1040        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1041    
1042        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1043        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1044    
1045        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1046    
1047        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1048        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1049    
1050        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1051        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1052        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1053        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1054        course of events. */      course of events. */
1055    
1056        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1057          {        {
1058          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1059          break;        break;
1060          }        }
1061    
1062        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1063        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1064        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1065        opcode. */      any options that changed within the bracket before re-running it, so
1066        check the next opcode. */
1067    
1068        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1069          {        {
1070          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1071          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1072          }        }
1073    
1074        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1075          {        {
1076          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1077          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RM8);
1078          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1079          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode = prev;
1080          }        flags = match_tail_recursed;
1081        else  /* OP_KETRMAX */        goto TAIL_RECURSE;
1082          {        }
1083          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      else  /* OP_KETRMAX */
1084          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1085          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1086          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1087          }        ecode += 1 + LINK_SIZE;
1088          flags = match_tail_recursed;
1089          goto TAIL_RECURSE;
1090        }        }
1091      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1092    
1093      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1094      bracketed group and go to there. */      bracketed group and go to there. */
# Line 1027  for (;;) Line 1106  for (;;)
1106      case OP_BRAZERO:      case OP_BRAZERO:
1107        {        {
1108        next = ecode+1;        next = ecode+1;
1109        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1110        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1111        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1112        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1113        }        }
1114      break;      break;
1115    
1116      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1117        {        {
1118        next = ecode+1;        next = ecode+1;
1119        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1120        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1121        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1122        ecode++;        ecode++;
1123        }        }
1124      break;      break;
1125    
1126      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1127    
1128      case OP_KET:      case OP_KET:
1129      case OP_KETRMIN:      case OP_KETRMIN:
1130      case OP_KETRMAX:      case OP_KETRMAX:
1131        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1132    
1133        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1134        infinite repeats of empty string matches, retrieve the subject start from
1135        the chain. Otherwise, set it NULL. */
1136    
1137        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1138          {
1139        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1140            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1141            *prev == OP_ONCE)        }
1142          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1143    
1144        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1145        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1146        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1147    
1148        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1149          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1150          number = *prev - OP_BRA;          *prev == OP_ONCE)
1151          {
1152          md->end_match_ptr = eptr;      /* For ONCE */
1153          md->end_offset_top = offset_top;
1154          RRETURN(MATCH_MATCH);
1155          }
1156    
1157          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1158          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1159        bumping the high water mark. Note that whole-pattern recursion is coded as
1160        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1161        when the OP_END is reached. Other recursion is handled here. */
1162    
1163          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1164          offset = number << 1;        {
1165          number = GET2(prev, 1+LINK_SIZE);
1166          offset = number << 1;
1167    
1168  #ifdef DEBUG  #ifdef DEBUG
1169          printf("end bracket %d", number);        printf("end bracket %d", number);
1170          printf("\n");        printf("\n");
1171  #endif  #endif
1172    
1173          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1174          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1175          into group 0, so it won't be picked up here. Instead, we catch it when          {
1176          the OP_END is reached. */          md->offset_vector[offset] =
1177              md->offset_vector[md->offset_end - number];
1178          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1179            {          if (offset_top <= offset) offset_top = offset + 2;
           md->capture_last = number;  
           if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
             {  
             md->offset_vector[offset] =  
               md->offset_vector[md->offset_end - number];  
             md->offset_vector[offset+1] = eptr - md->start_subject;  
             if (offset_top <= offset) offset_top = offset + 2;  
             }  
   
           /* Handle a recursively called group. Restore the offsets  
           appropriately and continue from after the call. */  
   
           if (md->recursive != NULL && md->recursive->group_num == number)  
             {  
             recursion_info *rec = md->recursive;  
             DPRINTF(("Recursion (%d) succeeded - continuing\n", number));  
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1180          }          }
1181    
1182        /* Reset the value of the ims flags, in case they got changed during        /* Handle a recursively called group. Restore the offsets
1183        the group. */        appropriately and continue from after the call. */
1184    
1185        ims = original_ims;        if (md->recursive != NULL && md->recursive->group_num == number)
       DPRINTF(("ims reset to %02lx\n", ims));  
   
       /* For a non-repeating ket, just continue at this level. This also  
       happens for a repeating ket if no characters were matched in the group.  
       This is the forcible breaking of infinite loops as implemented in Perl  
       5.005. If there is an options reset, it will get obeyed in the normal  
       course of events. */  
   
       if (*ecode == OP_KET || eptr == saved_eptr)  
1186          {          {
1187          ecode += 1 + LINK_SIZE;          recursion_info *rec = md->recursive;
1188            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1189            md->recursive = rec->prevrec;
1190            mstart = rec->save_start;
1191            memcpy(md->offset_vector, rec->offset_save,
1192              rec->saved_max * sizeof(int));
1193            ecode = rec->after_call;
1194            ims = original_ims;
1195          break;          break;
1196          }          }
1197          }
1198    
1199        /* The repeating kets try the rest of the pattern or restart from the      /* For both capturing and non-capturing groups, reset the value of the ims
1200        preceding bracket, in the appropriate order. */      flags, in case they got changed during the group. */
1201    
1202        if (*ecode == OP_KETRMIN)      ims = original_ims;
1203          {      DPRINTF(("ims reset to %02lx\n", ims));
1204          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
1205          if (rrc != MATCH_NOMATCH) RRETURN(rrc);      /* For a non-repeating ket, just continue at this level. This also
1206          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      happens for a repeating ket if no characters were matched in the group.
1207          if (rrc != MATCH_NOMATCH) RRETURN(rrc);      This is the forcible breaking of infinite loops as implemented in Perl
1208          }      5.005. If there is an options reset, it will get obeyed in the normal
1209        else  /* OP_KETRMAX */      course of events. */
1210          {  
1211          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);      if (*ecode == OP_KET || eptr == saved_eptr)
1212          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1213          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        ecode += 1 + LINK_SIZE;
1214          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        break;
         }  
1215        }        }
1216    
1217      RRETURN(MATCH_NOMATCH);      /* The repeating kets try the rest of the pattern or restart from the
1218        preceding bracket, in the appropriate order. In the second case, we can use
1219        tail recursion to avoid using another stack frame. */
1220    
1221        flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1222    
1223        if (*ecode == OP_KETRMIN)
1224          {
1225          RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1226            RM12);
1227          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1228          ecode = prev;
1229          flags |= match_tail_recursed;
1230          goto TAIL_RECURSE;
1231          }
1232        else  /* OP_KETRMAX */
1233          {
1234          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1235          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1236          ecode += 1 + LINK_SIZE;
1237          flags = match_tail_recursed;
1238          goto TAIL_RECURSE;
1239          }
1240        /* Control never gets here */
1241    
1242      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1243    
# Line 1168  for (;;) Line 1245  for (;;)
1245      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1246      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1247        {        {
1248        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1249              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1250          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1251        ecode++;        ecode++;
1252        break;        break;
# Line 1189  for (;;) Line 1267  for (;;)
1267      ecode++;      ecode++;
1268      break;      break;
1269    
1270        /* Reset the start of match point */
1271    
1272        case OP_SET_SOM:
1273        mstart = eptr;
1274        ecode++;
1275        break;
1276    
1277      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1278      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1279    
# Line 1196  for (;;) Line 1281  for (;;)
1281      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1282        {        {
1283        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1284          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1285        else        else
1286          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1287        ecode++;        ecode++;
# Line 1207  for (;;) Line 1292  for (;;)
1292        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1293        if (!md->endonly)        if (!md->endonly)
1294          {          {
1295          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1296             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1297            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1298          ecode++;          ecode++;
1299          break;          break;
1300          }          }
1301        }        }
1302      /* ... else fall through */      /* ... else fall through for endonly */
1303    
1304      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1305    
# Line 1226  for (;;) Line 1311  for (;;)
1311      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1312    
1313      case OP_EODN:      case OP_EODN:
1314      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1315         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1316          RRETURN(MATCH_NOMATCH);
1317      ecode++;      ecode++;
1318      break;      break;
1319    
# Line 1280  for (;;) Line 1366  for (;;)
1366      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1367    
1368      case OP_ANY:      case OP_ANY:
1369      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1370        RRETURN(MATCH_NOMATCH);        {
1371          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1372          }
1373      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1374      if (utf8)      if (utf8)
1375        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1376      ecode++;      ecode++;
1377      break;      break;
1378    
# Line 1376  for (;;) Line 1462  for (;;)
1462      ecode++;      ecode++;
1463      break;      break;
1464    
1465        case OP_ANYNL:
1466        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1467        GETCHARINCTEST(c, eptr);
1468        switch(c)
1469          {
1470          default: RRETURN(MATCH_NOMATCH);
1471          case 0x000d:
1472          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1473          break;
1474          case 0x000a:
1475          case 0x000b:
1476          case 0x000c:
1477          case 0x0085:
1478          case 0x2028:
1479          case 0x2029:
1480          break;
1481          }
1482        ecode++;
1483        break;
1484    
1485        case OP_NOT_HSPACE:
1486        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1487        GETCHARINCTEST(c, eptr);
1488        switch(c)
1489          {
1490          default: break;
1491          case 0x09:      /* HT */
1492          case 0x20:      /* SPACE */
1493          case 0xa0:      /* NBSP */
1494          case 0x1680:    /* OGHAM SPACE MARK */
1495          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1496          case 0x2000:    /* EN QUAD */
1497          case 0x2001:    /* EM QUAD */
1498          case 0x2002:    /* EN SPACE */
1499          case 0x2003:    /* EM SPACE */
1500          case 0x2004:    /* THREE-PER-EM SPACE */
1501          case 0x2005:    /* FOUR-PER-EM SPACE */
1502          case 0x2006:    /* SIX-PER-EM SPACE */
1503          case 0x2007:    /* FIGURE SPACE */
1504          case 0x2008:    /* PUNCTUATION SPACE */
1505          case 0x2009:    /* THIN SPACE */
1506          case 0x200A:    /* HAIR SPACE */
1507          case 0x202f:    /* NARROW NO-BREAK SPACE */
1508          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1509          case 0x3000:    /* IDEOGRAPHIC SPACE */
1510          RRETURN(MATCH_NOMATCH);
1511          }
1512        ecode++;
1513        break;
1514    
1515        case OP_HSPACE:
1516        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1517        GETCHARINCTEST(c, eptr);
1518        switch(c)
1519          {
1520          default: RRETURN(MATCH_NOMATCH);
1521          case 0x09:      /* HT */
1522          case 0x20:      /* SPACE */
1523          case 0xa0:      /* NBSP */
1524          case 0x1680:    /* OGHAM SPACE MARK */
1525          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1526          case 0x2000:    /* EN QUAD */
1527          case 0x2001:    /* EM QUAD */
1528          case 0x2002:    /* EN SPACE */
1529          case 0x2003:    /* EM SPACE */
1530          case 0x2004:    /* THREE-PER-EM SPACE */
1531          case 0x2005:    /* FOUR-PER-EM SPACE */
1532          case 0x2006:    /* SIX-PER-EM SPACE */
1533          case 0x2007:    /* FIGURE SPACE */
1534          case 0x2008:    /* PUNCTUATION SPACE */
1535          case 0x2009:    /* THIN SPACE */
1536          case 0x200A:    /* HAIR SPACE */
1537          case 0x202f:    /* NARROW NO-BREAK SPACE */
1538          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1539          case 0x3000:    /* IDEOGRAPHIC SPACE */
1540          break;
1541          }
1542        ecode++;
1543        break;
1544    
1545        case OP_NOT_VSPACE:
1546        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1547        GETCHARINCTEST(c, eptr);
1548        switch(c)
1549          {
1550          default: break;
1551          case 0x0a:      /* LF */
1552          case 0x0b:      /* VT */
1553          case 0x0c:      /* FF */
1554          case 0x0d:      /* CR */
1555          case 0x85:      /* NEL */
1556          case 0x2028:    /* LINE SEPARATOR */
1557          case 0x2029:    /* PARAGRAPH SEPARATOR */
1558          RRETURN(MATCH_NOMATCH);
1559          }
1560        ecode++;
1561        break;
1562    
1563        case OP_VSPACE:
1564        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1565        GETCHARINCTEST(c, eptr);
1566        switch(c)
1567          {
1568          default: RRETURN(MATCH_NOMATCH);
1569          case 0x0a:      /* LF */
1570          case 0x0b:      /* VT */
1571          case 0x0c:      /* FF */
1572          case 0x0d:      /* CR */
1573          case 0x85:      /* NEL */
1574          case 0x2028:    /* LINE SEPARATOR */
1575          case 0x2029:    /* PARAGRAPH SEPARATOR */
1576          break;
1577          }
1578        ecode++;
1579        break;
1580    
1581  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1582      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1583      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1418  for (;;) Line 1620  for (;;)
1620    
1621          default:          default:
1622          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
         break;  
1623          }          }
1624    
1625        ecode += 3;        ecode += 3;
# Line 1532  for (;;) Line 1733  for (;;)
1733          {          {
1734          for (fi = min;; fi++)          for (fi = min;; fi++)
1735            {            {
1736            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1737            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1738            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1739              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1553  for (;;) Line 1754  for (;;)
1754            }            }
1755          while (eptr >= pp)          while (eptr >= pp)
1756            {            {
1757            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1758            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1759            eptr -= length;            eptr -= length;
1760            }            }
# Line 1658  for (;;) Line 1859  for (;;)
1859            {            {
1860            for (fi = min;; fi++)            for (fi = min;; fi++)
1861              {              {
1862              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1863              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1864              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1865              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1678  for (;;) Line 1879  for (;;)
1879            {            {
1880            for (fi = min;; fi++)            for (fi = min;; fi++)
1881              {              {
1882              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1883              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1884              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1885              c = *eptr++;              c = *eptr++;
# Line 1715  for (;;) Line 1916  for (;;)
1916              }              }
1917            for (;;)            for (;;)
1918              {              {
1919              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1920              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1921              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1922              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1734  for (;;) Line 1935  for (;;)
1935              }              }
1936            while (eptr >= pp)            while (eptr >= pp)
1937              {              {
1938              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1939              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1940              eptr--;              eptr--;
1941              }              }
# Line 1805  for (;;) Line 2006  for (;;)
2006          {          {
2007          for (fi = min;; fi++)          for (fi = min;; fi++)
2008            {            {
2009            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2010            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2011            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2012            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1829  for (;;) Line 2030  for (;;)
2030            }            }
2031          for(;;)          for(;;)
2032            {            {
2033            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2034            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2035            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2036            BACKCHAR(eptr)            BACKCHAR(eptr)
# Line 1888  for (;;) Line 2089  for (;;)
2089    
2090        else        else
2091          {          {
2092          int dc;          unsigned int dc;
2093          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2094          ecode += length;          ecode += length;
2095    
# Line 1915  for (;;) Line 2116  for (;;)
2116        }        }
2117      break;      break;
2118    
2119      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2120    
2121      case OP_EXACT:      case OP_EXACT:
2122      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2123      ecode += 3;      ecode += 3;
2124      goto REPEATCHAR;      goto REPEATCHAR;
2125    
2126        case OP_POSUPTO:
2127        possessive = TRUE;
2128        /* Fall through */
2129    
2130      case OP_UPTO:      case OP_UPTO:
2131      case OP_MINUPTO:      case OP_MINUPTO:
2132      min = 0;      min = 0;
# Line 1930  for (;;) Line 2135  for (;;)
2135      ecode += 3;      ecode += 3;
2136      goto REPEATCHAR;      goto REPEATCHAR;
2137    
2138        case OP_POSSTAR:
2139        possessive = TRUE;
2140        min = 0;
2141        max = INT_MAX;
2142        ecode++;
2143        goto REPEATCHAR;
2144    
2145        case OP_POSPLUS:
2146        possessive = TRUE;
2147        min = 1;
2148        max = INT_MAX;
2149        ecode++;
2150        goto REPEATCHAR;
2151    
2152        case OP_POSQUERY:
2153        possessive = TRUE;
2154        min = 0;
2155        max = 1;
2156        ecode++;
2157        goto REPEATCHAR;
2158    
2159      case OP_STAR:      case OP_STAR:
2160      case OP_MINSTAR:      case OP_MINSTAR:
2161      case OP_PLUS:      case OP_PLUS:
# Line 1961  for (;;) Line 2187  for (;;)
2187    
2188        if (length > 1)        if (length > 1)
2189          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2190  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2191          int othercase;          unsigned int othercase;
2192          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2193              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase >= 0)  
2194            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2195            else oclength = 0;
2196  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2197    
2198          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2199            {            {
2200            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2201    #ifdef SUPPORT_UCP
2202            /* Need braces because of following else */            /* Need braces because of following else */
2203            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2204            else            else
# Line 1982  for (;;) Line 2206  for (;;)
2206              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2207              eptr += oclength;              eptr += oclength;
2208              }              }
2209    #else   /* without SUPPORT_UCP */
2210              else { RRETURN(MATCH_NOMATCH); }
2211    #endif  /* SUPPORT_UCP */
2212            }            }
2213    
2214          if (min == max) continue;          if (min == max) continue;
# Line 1990  for (;;) Line 2217  for (;;)
2217            {            {
2218            for (fi = min;; fi++)            for (fi = min;; fi++)
2219              {              {
2220              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2221              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2222              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2223              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2224    #ifdef SUPPORT_UCP
2225              /* Need braces because of following else */              /* Need braces because of following else */
2226              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2227              else              else
# Line 2001  for (;;) Line 2229  for (;;)
2229                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2230                eptr += oclength;                eptr += oclength;
2231                }                }
2232    #else   /* without SUPPORT_UCP */
2233                else { RRETURN (MATCH_NOMATCH); }
2234    #endif  /* SUPPORT_UCP */
2235              }              }
2236            /* Control never gets here */            /* Control never gets here */
2237            }            }
2238          else  
2239            else  /* Maximize */
2240            {            {
2241            pp = eptr;            pp = eptr;
2242            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2243              {              {
2244              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2245              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2246    #ifdef SUPPORT_UCP
2247              else if (oclength == 0) break;              else if (oclength == 0) break;
2248              else              else
2249                {                {
2250                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2251                eptr += oclength;                eptr += oclength;
2252                }                }
2253    #else   /* without SUPPORT_UCP */
2254                else break;
2255    #endif  /* SUPPORT_UCP */
2256              }              }
2257            while (eptr >= pp)  
2258              if (possessive) continue;
2259              for(;;)
2260             {             {
2261             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2262             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2263               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2264    #ifdef SUPPORT_UCP
2265               eptr--;
2266               BACKCHAR(eptr);
2267    #else   /* without SUPPORT_UCP */
2268             eptr -= length;             eptr -= length;
2269    #endif  /* SUPPORT_UCP */
2270             }             }
           RRETURN(MATCH_NOMATCH);  
2271            }            }
2272          /* Control never gets here */          /* Control never gets here */
2273          }          }
# Line 2064  for (;;) Line 2307  for (;;)
2307          {          {
2308          for (fi = min;; fi++)          for (fi = min;; fi++)
2309            {            {
2310            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2311            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2312            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2313                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2072  for (;;) Line 2315  for (;;)
2315            }            }
2316          /* Control never gets here */          /* Control never gets here */
2317          }          }
2318        else        else  /* Maximize */
2319          {          {
2320          pp = eptr;          pp = eptr;
2321          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2080  for (;;) Line 2323  for (;;)
2323            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2324            eptr++;            eptr++;
2325            }            }
2326            if (possessive) continue;
2327          while (eptr >= pp)          while (eptr >= pp)
2328            {            {
2329            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2330            eptr--;            eptr--;
2331            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2332            }            }
# Line 2101  for (;;) Line 2345  for (;;)
2345          {          {
2346          for (fi = min;; fi++)          for (fi = min;; fi++)
2347            {            {
2348            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2349            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2350            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2351              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2352            }            }
2353          /* Control never gets here */          /* Control never gets here */
2354          }          }
2355        else        else  /* Maximize */
2356          {          {
2357          pp = eptr;          pp = eptr;
2358          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2116  for (;;) Line 2360  for (;;)
2360            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2361            eptr++;            eptr++;
2362            }            }
2363            if (possessive) continue;
2364          while (eptr >= pp)          while (eptr >= pp)
2365            {            {
2366            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2367            eptr--;            eptr--;
2368            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2369            }            }
# Line 2168  for (;;) Line 2413  for (;;)
2413      ecode += 3;      ecode += 3;
2414      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2415    
2416        case OP_NOTPOSSTAR:
2417        possessive = TRUE;
2418        min = 0;
2419        max = INT_MAX;
2420        ecode++;
2421        goto REPEATNOTCHAR;
2422    
2423        case OP_NOTPOSPLUS:
2424        possessive = TRUE;
2425        min = 1;
2426        max = INT_MAX;
2427        ecode++;
2428        goto REPEATNOTCHAR;
2429    
2430        case OP_NOTPOSQUERY:
2431        possessive = TRUE;
2432        min = 0;
2433        max = 1;
2434        ecode++;
2435        goto REPEATNOTCHAR;
2436    
2437        case OP_NOTPOSUPTO:
2438        possessive = TRUE;
2439        min = 0;
2440        max = GET2(ecode, 1);
2441        ecode += 3;
2442        goto REPEATNOTCHAR;
2443    
2444      case OP_NOTSTAR:      case OP_NOTSTAR:
2445      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2446      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2207  for (;;) Line 2480  for (;;)
2480        /* UTF-8 mode */        /* UTF-8 mode */
2481        if (utf8)        if (utf8)
2482          {          {
2483          register int d;          register unsigned int d;
2484          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2485            {            {
2486            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2232  for (;;) Line 2505  for (;;)
2505          /* UTF-8 mode */          /* UTF-8 mode */
2506          if (utf8)          if (utf8)
2507            {            {
2508            register int d;            register unsigned int d;
2509            for (fi = min;; fi++)            for (fi = min;; fi++)
2510              {              {
2511              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2512              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2513              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2514              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2249  for (;;) Line 2522  for (;;)
2522            {            {
2523            for (fi = min;; fi++)            for (fi = min;; fi++)
2524              {              {
2525              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2526              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2528                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2268  for (;;) Line 2541  for (;;)
2541          /* UTF-8 mode */          /* UTF-8 mode */
2542          if (utf8)          if (utf8)
2543            {            {
2544            register int d;            register unsigned int d;
2545            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2546              {              {
2547              int len = 1;              int len = 1;
# Line 2278  for (;;) Line 2551  for (;;)
2551              if (fc == d) break;              if (fc == d) break;
2552              eptr += len;              eptr += len;
2553              }              }
2554            for(;;)          if (possessive) continue;
2555            for(;;)
2556              {              {
2557              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2558              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2559              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2560              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2295  for (;;) Line 2569  for (;;)
2569              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2570              eptr++;              eptr++;
2571              }              }
2572              if (possessive) continue;
2573            while (eptr >= pp)            while (eptr >= pp)
2574              {              {
2575              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2576              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2577              eptr--;              eptr--;
2578              }              }
# Line 2316  for (;;) Line 2591  for (;;)
2591        /* UTF-8 mode */        /* UTF-8 mode */
2592        if (utf8)        if (utf8)
2593          {          {
2594          register int d;          register unsigned int d;
2595          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2596            {            {
2597            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2339  for (;;) Line 2614  for (;;)
2614          /* UTF-8 mode */          /* UTF-8 mode */
2615          if (utf8)          if (utf8)
2616            {            {
2617            register int d;            register unsigned int d;
2618            for (fi = min;; fi++)            for (fi = min;; fi++)
2619              {              {
2620              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2621              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2622              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2623              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2355  for (;;) Line 2630  for (;;)
2630            {            {
2631            for (fi = min;; fi++)            for (fi = min;; fi++)
2632              {              {
2633              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2634              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2635              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2636                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2374  for (;;) Line 2649  for (;;)
2649          /* UTF-8 mode */          /* UTF-8 mode */
2650          if (utf8)          if (utf8)
2651            {            {
2652            register int d;            register unsigned int d;
2653            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2654              {              {
2655              int len = 1;              int len = 1;
# Line 2383  for (;;) Line 2658  for (;;)
2658              if (fc == d) break;              if (fc == d) break;
2659              eptr += len;              eptr += len;
2660              }              }
2661              if (possessive) continue;
2662            for(;;)            for(;;)
2663              {              {
2664              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2665              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2666              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2667              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2400  for (;;) Line 2676  for (;;)
2676              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2677              eptr++;              eptr++;
2678              }              }
2679              if (possessive) continue;
2680            while (eptr >= pp)            while (eptr >= pp)
2681              {              {
2682              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2683              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684              eptr--;              eptr--;
2685              }              }
# Line 2431  for (;;) Line 2708  for (;;)
2708      ecode += 3;      ecode += 3;
2709      goto REPEATTYPE;      goto REPEATTYPE;
2710    
2711      case OP_TYPESTAR:      case OP_TYPEPOSSTAR:
2712      case OP_TYPEMINSTAR:      possessive = TRUE;
2713      case OP_TYPEPLUS:      min = 0;
2714      case OP_TYPEMINPLUS:      max = INT_MAX;
2715      case OP_TYPEQUERY:      ecode++;
2716        goto REPEATTYPE;
2717    
2718        case OP_TYPEPOSPLUS:
2719        possessive = TRUE;
2720        min = 1;
2721        max = INT_MAX;
2722        ecode++;
2723        goto REPEATTYPE;
2724    
2725        case OP_TYPEPOSQUERY:
2726        possessive = TRUE;
2727        min = 0;
2728        max = 1;
2729        ecode++;
2730        goto REPEATTYPE;
2731    
2732        case OP_TYPEPOSUPTO:
2733        possessive = TRUE;
2734        min = 0;
2735        max = GET2(ecode, 1);
2736        ecode += 3;
2737        goto REPEATTYPE;
2738    
2739        case OP_TYPESTAR:
2740        case OP_TYPEMINSTAR:
2741        case OP_TYPEPLUS:
2742        case OP_TYPEMINPLUS:
2743        case OP_TYPEQUERY:
2744      case OP_TYPEMINQUERY:      case OP_TYPEMINQUERY:
2745      c = *ecode++ - OP_TYPESTAR;      c = *ecode++ - OP_TYPESTAR;
2746      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
# Line 2481  for (;;) Line 2786  for (;;)
2786            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2787              {              {
2788              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2789              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2790              }              }
2791            break;            break;
2792    
# Line 2489  for (;;) Line 2794  for (;;)
2794            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2795              {              {
2796              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2797              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2798              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2799              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2800                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 2502  for (;;) Line 2807  for (;;)
2807            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2808              {              {
2809              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2810              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2811              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2812              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2813                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2513  for (;;) Line 2818  for (;;)
2818            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2819              {              {
2820              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2821              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2822              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2823              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2824                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2524  for (;;) Line 2829  for (;;)
2829            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2830              {              {
2831              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2832              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2833              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2834              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2835                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2533  for (;;) Line 2838  for (;;)
2838    
2839            default:            default:
2840            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
2841            }            }
2842          }          }
2843    
# Line 2573  for (;;) Line 2877  for (;;)
2877          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2878            {            {
2879            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2880               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2881              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2882              eptr++;
2883            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2884            }            }
2885          break;          break;
# Line 2583  for (;;) Line 2888  for (;;)
2888          eptr += min;          eptr += min;
2889          break;          break;
2890    
2891            case OP_ANYNL:
2892            for (i = 1; i <= min; i++)
2893              {
2894              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2895              GETCHARINC(c, eptr);
2896              switch(c)
2897                {
2898                default: RRETURN(MATCH_NOMATCH);
2899                case 0x000d:
2900                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2901                break;
2902                case 0x000a:
2903                case 0x000b:
2904                case 0x000c:
2905                case 0x0085:
2906                case 0x2028:
2907                case 0x2029:
2908                break;
2909                }
2910              }
2911            break;
2912    
2913            case OP_NOT_HSPACE:
2914            for (i = 1; i <= min; i++)
2915              {
2916              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2917              GETCHARINC(c, eptr);
2918              switch(c)
2919                {
2920                default: break;
2921                case 0x09:      /* HT */
2922                case 0x20:      /* SPACE */
2923                case 0xa0:      /* NBSP */
2924                case 0x1680:    /* OGHAM SPACE MARK */
2925                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2926                case 0x2000:    /* EN QUAD */
2927                case 0x2001:    /* EM QUAD */
2928                case 0x2002:    /* EN SPACE */
2929                case 0x2003:    /* EM SPACE */
2930                case 0x2004:    /* THREE-PER-EM SPACE */
2931                case 0x2005:    /* FOUR-PER-EM SPACE */
2932                case 0x2006:    /* SIX-PER-EM SPACE */
2933                case 0x2007:    /* FIGURE SPACE */
2934                case 0x2008:    /* PUNCTUATION SPACE */
2935                case 0x2009:    /* THIN SPACE */
2936                case 0x200A:    /* HAIR SPACE */
2937                case 0x202f:    /* NARROW NO-BREAK SPACE */
2938                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2939                case 0x3000:    /* IDEOGRAPHIC SPACE */
2940                RRETURN(MATCH_NOMATCH);
2941                }
2942              }
2943            break;
2944    
2945            case OP_HSPACE:
2946            for (i = 1; i <= min; i++)
2947              {
2948              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2949              GETCHARINC(c, eptr);
2950              switch(c)
2951                {
2952                default: RRETURN(MATCH_NOMATCH);
2953                case 0x09:      /* HT */
2954                case 0x20:      /* SPACE */
2955                case 0xa0:      /* NBSP */
2956                case 0x1680:    /* OGHAM SPACE MARK */
2957                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2958                case 0x2000:    /* EN QUAD */
2959                case 0x2001:    /* EM QUAD */
2960                case 0x2002:    /* EN SPACE */
2961                case 0x2003:    /* EM SPACE */
2962                case 0x2004:    /* THREE-PER-EM SPACE */
2963                case 0x2005:    /* FOUR-PER-EM SPACE */
2964                case 0x2006:    /* SIX-PER-EM SPACE */
2965                case 0x2007:    /* FIGURE SPACE */
2966                case 0x2008:    /* PUNCTUATION SPACE */
2967                case 0x2009:    /* THIN SPACE */
2968                case 0x200A:    /* HAIR SPACE */
2969                case 0x202f:    /* NARROW NO-BREAK SPACE */
2970                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2971                case 0x3000:    /* IDEOGRAPHIC SPACE */
2972                break;
2973                }
2974              }
2975            break;
2976    
2977            case OP_NOT_VSPACE:
2978            for (i = 1; i <= min; i++)
2979              {
2980              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2981              GETCHARINC(c, eptr);
2982              switch(c)
2983                {
2984                default: break;
2985                case 0x0a:      /* LF */
2986                case 0x0b:      /* VT */
2987                case 0x0c:      /* FF */
2988                case 0x0d:      /* CR */
2989                case 0x85:      /* NEL */
2990                case 0x2028:    /* LINE SEPARATOR */
2991                case 0x2029:    /* PARAGRAPH SEPARATOR */
2992                RRETURN(MATCH_NOMATCH);
2993                }
2994              }
2995            break;
2996    
2997            case OP_VSPACE:
2998            for (i = 1; i <= min; i++)
2999              {
3000              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3001              GETCHARINC(c, eptr);
3002              switch(c)
3003                {
3004                default: RRETURN(MATCH_NOMATCH);
3005                case 0x0a:      /* LF */
3006                case 0x0b:      /* VT */
3007                case 0x0c:      /* FF */
3008                case 0x0d:      /* CR */
3009                case 0x85:      /* NEL */
3010                case 0x2028:    /* LINE SEPARATOR */
3011                case 0x2029:    /* PARAGRAPH SEPARATOR */
3012                break;
3013                }
3014              }
3015            break;
3016    
3017          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3018          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3019            {            {
# Line 2651  for (;;) Line 3082  for (;;)
3082  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3083    
3084        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3085        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3086          number of bytes present, as this was tested above. */
3087    
3088        switch(ctype)        switch(ctype)
3089          {          {
# Line 2659  for (;;) Line 3091  for (;;)
3091          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
3092            {            {
3093            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3094              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
3095                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3096                eptr++;
3097                }
3098            }            }
3099          else eptr += min;          else eptr += min;
3100          break;          break;
# Line 2668  for (;;) Line 3103  for (;;)
3103          eptr += min;          eptr += min;
3104          break;          break;
3105    
3106            /* Because of the CRLF case, we can't assume the minimum number of
3107            bytes are present in this case. */
3108    
3109            case OP_ANYNL:
3110            for (i = 1; i <= min; i++)
3111              {
3112              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3113              switch(*eptr++)
3114                {
3115                default: RRETURN(MATCH_NOMATCH);
3116                case 0x000d:
3117                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3118                break;
3119                case 0x000a:
3120                case 0x000b:
3121                case 0x000c:
3122                case 0x0085:
3123                break;
3124                }
3125              }
3126            break;
3127    
3128            case OP_NOT_HSPACE:
3129            for (i = 1; i <= min; i++)
3130              {
3131              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3132              switch(*eptr++)
3133                {
3134                default: break;
3135                case 0x09:      /* HT */
3136                case 0x20:      /* SPACE */
3137                case 0xa0:      /* NBSP */
3138                RRETURN(MATCH_NOMATCH);
3139                }
3140              }
3141            break;
3142    
3143            case OP_HSPACE:
3144            for (i = 1; i <= min; i++)
3145              {
3146              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3147              switch(*eptr++)
3148                {
3149                default: RRETURN(MATCH_NOMATCH);
3150                case 0x09:      /* HT */
3151                case 0x20:      /* SPACE */
3152                case 0xa0:      /* NBSP */
3153                break;
3154                }
3155              }
3156            break;
3157    
3158            case OP_NOT_VSPACE:
3159            for (i = 1; i <= min; i++)
3160              {
3161              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3162              switch(*eptr++)
3163                {
3164                default: break;
3165                case 0x0a:      /* LF */
3166                case 0x0b:      /* VT */
3167                case 0x0c:      /* FF */
3168                case 0x0d:      /* CR */
3169                case 0x85:      /* NEL */
3170                RRETURN(MATCH_NOMATCH);
3171                }
3172              }
3173            break;
3174    
3175            case OP_VSPACE:
3176            for (i = 1; i <= min; i++)
3177              {
3178              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3179              switch(*eptr++)
3180                {
3181                default: RRETURN(MATCH_NOMATCH);
3182                case 0x0a:      /* LF */
3183                case 0x0b:      /* VT */
3184                case 0x0c:      /* FF */
3185                case 0x0d:      /* CR */
3186                case 0x85:      /* NEL */
3187                break;
3188                }
3189              }
3190            break;
3191    
3192          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3193          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3194            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2723  for (;;) Line 3244  for (;;)
3244            case PT_ANY:            case PT_ANY:
3245            for (fi = min;; fi++)            for (fi = min;; fi++)
3246              {              {
3247              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3248              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3249              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3250              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3251              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3252              }              }
3253            break;            /* Control never gets here */
3254    
3255            case PT_LAMP:            case PT_LAMP:
3256            for (fi = min;; fi++)            for (fi = min;; fi++)
3257              {              {
3258              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3259              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3260              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3261              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2744  for (;;) Line 3265  for (;;)
3265                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3266                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3267              }              }
3268            break;            /* Control never gets here */
3269    
3270            case PT_GC:            case PT_GC:
3271            for (fi = min;; fi++)            for (fi = min;; fi++)
3272              {              {
3273              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3274              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3275              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3276              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2757  for (;;) Line 3278  for (;;)
3278              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3279                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3280              }              }
3281            break;            /* Control never gets here */
3282    
3283            case PT_PC:            case PT_PC:
3284            for (fi = min;; fi++)            for (fi = min;; fi++)
3285              {              {
3286              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3287              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3288              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3289              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2770  for (;;) Line 3291  for (;;)
3291              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3292                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3293              }              }
3294            break;            /* Control never gets here */
3295    
3296            case PT_SC:            case PT_SC:
3297            for (fi = min;; fi++)            for (fi = min;; fi++)
3298              {              {
3299              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3300              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3302              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2783  for (;;) Line 3304  for (;;)
3304              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3305                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3306              }              }
3307            break;            /* Control never gets here */
3308    
3309            default:            default:
3310            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
           break;  
3311            }            }
3312          }          }
3313    
# Line 2798  for (;;) Line 3318  for (;;)
3318          {          {
3319          for (fi = min;; fi++)          for (fi = min;; fi++)
3320            {            {
3321            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3322            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3323            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3324            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
# Line 2827  for (;;) Line 3347  for (;;)
3347          {          {
3348          for (fi = min;; fi++)          for (fi = min;; fi++)
3349            {            {
3350            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3351            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3352            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3353                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3354                    IS_NEWLINE(eptr)))
3355                RRETURN(MATCH_NOMATCH);
3356    
3357            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3358            switch(ctype)            switch(ctype)
3359              {              {
3360              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3361              break;              break;
3362    
3363              case OP_ANYBYTE:              case OP_ANYBYTE:
3364              break;              break;
3365    
3366                case OP_ANYNL:
3367                switch(c)
3368                  {
3369                  default: RRETURN(MATCH_NOMATCH);
3370                  case 0x000d:
3371                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3372                  break;
3373                  case 0x000a:
3374                  case 0x000b:
3375                  case 0x000c:
3376                  case 0x0085:
3377                  case 0x2028:
3378                  case 0x2029:
3379                  break;
3380                  }
3381                break;
3382    
3383                case OP_NOT_HSPACE:
3384                switch(c)
3385                  {
3386                  default: break;
3387                  case 0x09:      /* HT */
3388                  case 0x20:      /* SPACE */
3389                  case 0xa0:      /* NBSP */
3390                  case 0x1680:    /* OGHAM SPACE MARK */
3391                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3392                  case 0x2000:    /* EN QUAD */
3393                  case 0x2001:    /* EM QUAD */
3394                  case 0x2002:    /* EN SPACE */
3395                  case 0x2003:    /* EM SPACE */
3396                  case 0x2004:    /* THREE-PER-EM SPACE */
3397                  case 0x2005:    /* FOUR-PER-EM SPACE */
3398                  case 0x2006:    /* SIX-PER-EM SPACE */
3399                  case 0x2007:    /* FIGURE SPACE */
3400                  case 0x2008:    /* PUNCTUATION SPACE */
3401                  case 0x2009:    /* THIN SPACE */
3402                  case 0x200A:    /* HAIR SPACE */
3403                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3404                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3405                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3406                  RRETURN(MATCH_NOMATCH);
3407                  }
3408                break;
3409    
3410                case OP_HSPACE:
3411                switch(c)
3412                  {
3413                  default: RRETURN(MATCH_NOMATCH);
3414                  case 0x09:      /* HT */
3415                  case 0x20:      /* SPACE */
3416                  case 0xa0:      /* NBSP */
3417                  case 0x1680:    /* OGHAM SPACE MARK */
3418                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3419                  case 0x2000:    /* EN QUAD */
3420                  case 0x2001:    /* EM QUAD */
3421                  case 0x2002:    /* EN SPACE */
3422                  case 0x2003:    /* EM SPACE */
3423                  case 0x2004:    /* THREE-PER-EM SPACE */
3424                  case 0x2005:    /* FOUR-PER-EM SPACE */
3425                  case 0x2006:    /* SIX-PER-EM SPACE */
3426                  case 0x2007:    /* FIGURE SPACE */
3427                  case 0x2008:    /* PUNCTUATION SPACE */
3428                  case 0x2009:    /* THIN SPACE */
3429                  case 0x200A:    /* HAIR SPACE */
3430                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3431                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3432                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3433                  break;
3434                  }
3435                break;
3436    
3437                case OP_NOT_VSPACE:
3438                switch(c)
3439                  {
3440                  default: break;
3441                  case 0x0a:      /* LF */
3442                  case 0x0b:      /* VT */
3443                  case 0x0c:      /* FF */
3444                  case 0x0d:      /* CR */
3445                  case 0x85:      /* NEL */
3446                  case 0x2028:    /* LINE SEPARATOR */
3447                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3448                  RRETURN(MATCH_NOMATCH);
3449                  }
3450                break;
3451    
3452                case OP_VSPACE:
3453                switch(c)
3454                  {
3455                  default: RRETURN(MATCH_NOMATCH);
3456                  case 0x0a:      /* LF */
3457                  case 0x0b:      /* VT */
3458                  case 0x0c:      /* FF */
3459                  case 0x0d:      /* CR */
3460                  case 0x85:      /* NEL */
3461                  case 0x2028:    /* LINE SEPARATOR */
3462                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3463                  break;
3464                  }
3465                break;
3466    
3467              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3468              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3469                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2882  for (;;) Line 3505  for (;;)
3505          {          {
3506          for (fi = min;; fi++)          for (fi = min;; fi++)
3507            {            {
3508            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3509            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3510            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3511                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3512                RRETURN(MATCH_NOMATCH);
3513    
3514            c = *eptr++;            c = *eptr++;
3515            switch(ctype)            switch(ctype)
3516              {              {
3517              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3518              break;              break;
3519    
3520              case OP_ANYBYTE:              case OP_ANYBYTE:
3521              break;              break;
3522    
3523                case OP_ANYNL:
3524                switch(c)
3525                  {
3526                  default: RRETURN(MATCH_NOMATCH);
3527                  case 0x000d:
3528                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3529                  break;
3530                  case 0x000a:
3531                  case 0x000b:
3532                  case 0x000c:
3533                  case 0x0085:
3534                  break;
3535                  }
3536                break;
3537    
3538                case OP_NOT_HSPACE:
3539                switch(c)
3540                  {
3541                  default: break;
3542                  case 0x09:      /* HT */
3543                  case 0x20:      /* SPACE */
3544                  case 0xa0:      /* NBSP */
3545                  RRETURN(MATCH_NOMATCH);
3546                  }
3547                break;
3548    
3549                case OP_HSPACE:
3550                switch(c)
3551                  {
3552                  default: RRETURN(MATCH_NOMATCH);
3553                  case 0x09:      /* HT */
3554                  case 0x20:      /* SPACE */
3555                  case 0xa0:      /* NBSP */
3556                  break;
3557                  }
3558                break;
3559    
3560                case OP_NOT_VSPACE:
3561                switch(c)
3562                  {
3563                  default: break;
3564                  case 0x0a:      /* LF */
3565                  case 0x0b:      /* VT */
3566                  case 0x0c:      /* FF */
3567                  case 0x0d:      /* CR */
3568                  case 0x85:      /* NEL */
3569                  RRETURN(MATCH_NOMATCH);
3570                  }
3571                break;
3572    
3573                case OP_VSPACE:
3574                switch(c)
3575                  {
3576                  default: RRETURN(MATCH_NOMATCH);
3577                  case 0x0a:      /* LF */
3578                  case 0x0b:      /* VT */
3579                  case 0x0c:      /* FF */
3580                  case 0x0d:      /* CR */
3581                  case 0x85:      /* NEL */
3582                  break;
3583                  }
3584                break;
3585    
3586              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3587              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3588              break;              break;
# Line 2927  for (;;) Line 3615  for (;;)
3615        /* Control never gets here */        /* Control never gets here */
3616        }        }
3617    
3618      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3619      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3620      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3621    
# Line 3008  for (;;) Line 3696  for (;;)
3696    
3697          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3698    
3699            if (possessive) continue;
3700          for(;;)          for(;;)
3701            {            {
3702            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3703            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3704            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3705            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3043  for (;;) Line 3732  for (;;)
3732    
3733          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3734    
3735            if (possessive) continue;
3736          for(;;)          for(;;)
3737            {            {
3738            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3739            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3740            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3741            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 3074  for (;;) Line 3764  for (;;)
3764          switch(ctype)          switch(ctype)
3765            {            {
3766            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is unlimited  
           we don't need it, so we repeat the non-UTF8 code. This is probably  
           worth it, because .* is quite a common idiom. */  
   
3767            if (max < INT_MAX)            if (max < INT_MAX)
3768              {              {
3769              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
3770                {                {
3771                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3772                  {                  {
3773                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3774                  eptr++;                  eptr++;
3775                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3776                  }                  }
# Line 3094  for (;;) Line 3779  for (;;)
3779                {                {
3780                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3781                  {                  {
3782                    if (eptr >= md->end_subject) break;
3783                  eptr++;                  eptr++;
3784                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3785                  }                  }
# Line 3108  for (;;) Line 3794  for (;;)
3794                {                {
3795                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3796                  {                  {
3797                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3798                  eptr++;                  eptr++;
3799                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3800                  }                  }
               break;  
3801                }                }
3802              else              else
3803                {                {
3804                c = max - min;                eptr = md->end_subject;
               if (c > md->end_subject - eptr) c = md->end_subject - eptr;  
               eptr += c;  
3805                }                }
3806              }              }
3807            break;            break;
# Line 3126  for (;;) Line 3810  for (;;)
3810    
3811            case OP_ANYBYTE:            case OP_ANYBYTE:
3812            c = max - min;            c = max - min;
3813            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3814                c = md->end_subject - eptr;
3815            eptr += c;            eptr += c;
3816            break;            break;
3817    
3818              case OP_ANYNL:
3819              for (i = min; i < max; i++)
3820                {
3821                int len = 1;
3822                if (eptr >= md->end_subject) break;
3823                GETCHARLEN(c, eptr, len);
3824                if (c == 0x000d)
3825                  {
3826                  if (++eptr >= md->end_subject) break;
3827                  if (*eptr == 0x000a) eptr++;
3828                  }
3829                else
3830                  {
3831                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3832                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3833                    break;
3834                  eptr += len;
3835                  }
3836                }
3837              break;
3838    
3839              case OP_NOT_HSPACE:
3840              case OP_HSPACE:
3841              for (i = min; i < max; i++)
3842                {
3843                BOOL gotspace;
3844                int len = 1;
3845                if (eptr >= md->end_subject) break;
3846                GETCHARLEN(c, eptr, len);
3847                switch(c)
3848                  {
3849                  default: gotspace = FALSE; break;
3850                  case 0x09:      /* HT */
3851                  case 0x20:      /* SPACE */
3852                  case 0xa0:      /* NBSP */
3853                  case 0x1680:    /* OGHAM SPACE MARK */
3854                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3855                  case 0x2000:    /* EN QUAD */
3856                  case 0x2001:    /* EM QUAD */
3857                  case 0x2002:    /* EN SPACE */
3858                  case 0x2003:    /* EM SPACE */
3859                  case 0x2004:    /* THREE-PER-EM SPACE */
3860                  case 0x2005:    /* FOUR-PER-EM SPACE */
3861                  case 0x2006:    /* SIX-PER-EM SPACE */
3862                  case 0x2007:    /* FIGURE SPACE */
3863                  case 0x2008:    /* PUNCTUATION SPACE */
3864                  case 0x2009:    /* THIN SPACE */
3865                  case 0x200A:    /* HAIR SPACE */
3866                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3867                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3868                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3869                  gotspace = TRUE;
3870                  break;
3871                  }
3872                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3873                eptr += len;
3874                }
3875              break;
3876    
3877              case OP_NOT_VSPACE:
3878              case OP_VSPACE:
3879              for (i = min; i < max; i++)
3880                {
3881                BOOL gotspace;
3882                int len = 1;
3883                if (eptr >= md->end_subject) break;
3884                GETCHARLEN(c, eptr, len);
3885                switch(c)
3886                  {
3887                  default: gotspace = FALSE; break;
3888                  case 0x0a:      /* LF */
3889                  case 0x0b:      /* VT */
3890                  case 0x0c:      /* FF */
3891                  case 0x0d:      /* CR */
3892                  case 0x85:      /* NEL */
3893                  case 0x2028:    /* LINE SEPARATOR */
3894                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3895                  gotspace = TRUE;
3896                  break;
3897                  }
3898                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3899                eptr += len;
3900                }
3901              break;
3902    
3903            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3904            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3905              {              {
# Line 3202  for (;;) Line 3972  for (;;)
3972    
3973          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3974    
3975            if (possessive) continue;
3976          for(;;)          for(;;)
3977            {            {
3978            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3979            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3980            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3981            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3222  for (;;) Line 3993  for (;;)
3993              {              {
3994              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3995                {                {
3996                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3997                eptr++;                eptr++;
3998                }                }
3999              break;              break;
# Line 3231  for (;;) Line 4002  for (;;)
4002    
4003            case OP_ANYBYTE:            case OP_ANYBYTE:
4004            c = max - min;            c = max - min;
4005            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
4006                c = md->end_subject - eptr;
4007            eptr += c;            eptr += c;
4008            break;            break;
4009    
4010              case OP_ANYNL:
4011              for (i = min; i < max; i++)
4012                {
4013                if (eptr >= md->end_subject) break;
4014                c = *eptr;
4015                if (c == 0x000d)
4016                  {
4017                  if (++eptr >= md->end_subject) break;
4018                  if (*eptr == 0x000a) eptr++;
4019                  }
4020                else
4021                  {
4022                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4023                    break;
4024                  eptr++;
4025                  }
4026                }
4027              break;
4028    
4029              case OP_NOT_HSPACE:
4030              for (i = min; i < max; i++)
4031                {
4032                if (eptr >= md->end_subject) break;
4033                c = *eptr;
4034                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4035                eptr++;
4036                }
4037              break;
4038    
4039              case OP_HSPACE:
4040              for (i = min; i < max; i++)
4041                {
4042                if (eptr >= md->end_subject) break;
4043                c = *eptr;
4044                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4045                eptr++;
4046                }
4047              break;
4048    
4049              case OP_NOT_VSPACE:
4050              for (i = min; i < max; i++)
4051                {
4052                if (eptr >= md->end_subject) break;
4053                c = *eptr;
4054                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4055                  break;
4056                eptr++;
4057                }
4058              break;
4059    
4060              case OP_VSPACE:
4061              for (i = min; i < max; i++)
4062                {
4063                if (eptr >= md->end_subject) break;
4064                c = *eptr;
4065                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4066                  break;
4067                eptr++;
4068                }
4069              break;
4070    
4071            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4072            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4073              {              {
# Line 3295  for (;;) Line 4128  for (;;)
4128    
4129          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4130    
4131            if (possessive) continue;
4132          while (eptr >= pp)          while (eptr >= pp)
4133            {            {
4134            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4135            eptr--;            eptr--;
4136            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4137            }            }
# Line 3309  for (;;) Line 4143  for (;;)
4143        }        }
4144      /* Control never gets here */      /* Control never gets here */
4145    
4146      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
4147      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
4148    
4149      default:      default:
4150      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
4151      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4152      }      }
4153    
4154    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3325  for (;;) Line 4157  for (;;)
4157    
4158    }             /* End of main loop */    }             /* End of main loop */
4159  /* Control never reaches here */  /* Control never reaches here */
4160    
4161    
4162    /* When compiling to use the heap rather than the stack for recursive calls to
4163    match(), the RRETURN() macro jumps here. The number that is saved in
4164    frame->Xwhere indicates which label we actually want to return to. */
4165    
4166    #ifdef NO_RECURSE
4167    #define LBL(val) case val: goto L_RM##val;
4168    HEAP_RETURN:
4169    switch (frame->Xwhere)
4170      {
4171      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4172      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4173      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4174      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4175      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4176      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4177      default:
4178      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4179      return PCRE_ERROR_INTERNAL;
4180      }
4181    #undef LBL
4182    #endif  /* NO_RECURSE */
4183  }  }
4184    
4185    
# Line 3337  Undefine all the macros that were define Line 4192  Undefine all the macros that were define
4192  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4193  #undef eptr  #undef eptr
4194  #undef ecode  #undef ecode
4195    #undef mstart
4196  #undef offset_top  #undef offset_top
4197  #undef ims  #undef ims
4198  #undef eptrb  #undef eptrb
# Line 3354  Undefine all the macros that were define Line 4210  Undefine all the macros that were define
4210    
4211  #undef cur_is_word  #undef cur_is_word
4212  #undef condition  #undef condition
 #undef minimize  
4213  #undef prev_is_word  #undef prev_is_word
4214    
4215  #undef original_ims  #undef original_ims
# Line 3410  Returns:          > 0 => success; value Line 4265  Returns:          > 0 => success; value
4265                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4266  */  */
4267    
4268  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
4269  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4270    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4271    int offsetcount)    int offsetcount)
# Line 3419  int rc, resetcount, ocount; Line 4274  int rc, resetcount, ocount;
4274  int first_byte = -1;  int first_byte = -1;
4275  int req_byte = -1;  int req_byte = -1;
4276  int req_byte2 = -1;  int req_byte2 = -1;
4277  unsigned long int ims = 0;  int newline;
4278    unsigned long int ims;
4279  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
4280  BOOL anchored;  BOOL anchored;
4281  BOOL startline;  BOOL startline;
4282  BOOL firstline;  BOOL firstline;
4283  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
4284  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
4285    BOOL utf8;
4286  match_data match_block;  match_data match_block;
4287    match_data *md = &match_block;
4288  const uschar *tables;  const uschar *tables;
4289  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4290  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4291  USPTR end_subject;  USPTR end_subject;
4292  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
4293    eptrblock eptrchain[EPTR_WORK_SIZE];
4294    
4295  pcre_study_data internal_study;  pcre_study_data internal_study;
4296  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3451  if (offsetcount < 0) return PCRE_ERROR_B Line 4310  if (offsetcount < 0) return PCRE_ERROR_B
4310  the default values. */  the default values. */
4311    
4312  study = NULL;  study = NULL;
4313  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
4314  match_block.match_limit_recursion = MATCH_LIMIT_RECURSION;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4315  match_block.callout_data = NULL;  md->callout_data = NULL;
4316    
4317  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
4318    
# Line 3465  if (extra_data != NULL) Line 4324  if (extra_data != NULL)
4324    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4325      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
4326    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4327      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
4328    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4329      match_block.match_limit_recursion = extra_data->match_limit_recursion;      md->match_limit_recursion = extra_data->match_limit_recursion;
4330    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4331      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
4332    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4333    }    }
4334    
# Line 3499  firstline = (re->options & PCRE_FIRSTLIN Line 4358  firstline = (re->options & PCRE_FIRSTLIN
4358    
4359  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
4360    
4361  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
4362    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
4363    
4364  match_block.start_subject = (USPTR)subject;  md->start_subject = (USPTR)subject;
4365  match_block.start_offset = start_offset;  md->start_offset = start_offset;
4366  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
4367  end_subject = match_block.end_subject;  end_subject = md->end_subject;
4368    
4369  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4370  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4371    
4372  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4373  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4374  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4375  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
4376  match_block.hitend = FALSE;  md->hitend = FALSE;
4377    
4378    md->recursive = NULL;                   /* No recursion at top level */
4379    md->eptrchain = eptrchain;              /* Make workspace generally available */
4380    
4381    md->lcc = tables + lcc_offset;
4382    md->ctypes = tables + ctypes_offset;
4383    
4384    /* Handle different types of newline. The three bits give eight cases. If
4385    nothing is set at run time, whatever was used at compile time applies. */
4386    
4387  match_block.recursive = NULL;                   /* No recursion at top level */  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4388           PCRE_NEWLINE_BITS)
4389      {
4390      case 0: newline = NEWLINE; break;   /* Compile-time default */
4391      case PCRE_NEWLINE_CR: newline = '\r'; break;
4392      case PCRE_NEWLINE_LF: newline = '\n'; break;
4393      case PCRE_NEWLINE_CR+
4394           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4395      case PCRE_NEWLINE_ANY: newline = -1; break;
4396      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4397      default: return PCRE_ERROR_BADNEWLINE;
4398      }
4399    
4400  match_block.lcc = tables + lcc_offset;  if (newline == -2)
4401  match_block.ctypes = tables + ctypes_offset;    {
4402      md->nltype = NLTYPE_ANYCRLF;
4403      }
4404    else if (newline < 0)
4405      {
4406      md->nltype = NLTYPE_ANY;
4407      }
4408    else
4409      {
4410      md->nltype = NLTYPE_FIXED;
4411      if (newline > 255)
4412        {
4413        md->nllen = 2;
4414        md->nl[0] = (newline >> 8) & 255;
4415        md->nl[1] = newline & 255;
4416        }
4417      else
4418        {
4419        md->nllen = 1;
4420        md->nl[0] = newline;
4421        }
4422      }
4423    
4424  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4425  moment. */  moment. */
4426    
4427  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4428    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4429    
4430  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4431  back the character offset. */  back the character offset. */
4432    
4433  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4434  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4435    {    {
4436    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4437      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3563  ocount = offsetcount - (offsetcount % 3) Line 4463  ocount = offsetcount - (offsetcount % 3)
4463  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
4464    {    {
4465    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
4466    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4467    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4468    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
4469    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
4470    }    }
4471  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
4472    
4473  match_block.offset_end = ocount;  md->offset_end = ocount;
4474  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
4475  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
4476  match_block.capture_last = -1;  md->capture_last = -1;
4477    
4478  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
4479  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3586  if (resetcount > offsetcount) resetcount Line 4486  if (resetcount > offsetcount) resetcount
4486  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
4487  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
4488    
4489  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
4490    {    {
4491    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
4492    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
4493    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
4494    }    }
# Line 3605  if (!anchored) Line 4505  if (!anchored)
4505      {      {
4506      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4507      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4508        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
4509      }      }
4510    else    else
4511      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3623  if ((re->options & PCRE_REQCHSET) != 0) Line 4523  if ((re->options & PCRE_REQCHSET) != 0)
4523    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4524    }    }
4525    
4526    
4527    /* ==========================================================================*/
4528    
4529  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4530  the loop runs just once. */  the loop runs just once. */
4531    
4532  do  for(;;)
4533    {    {
4534    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4535    
4536    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4537    
4538    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
4539      {      {
4540      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
4541      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4542      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4543      }      }
4544    
4545    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
4546    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
4547    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
4548    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
4549    */    the match fails at the newline, later code breaks this loop. */
4550    
4551    if (firstline)    if (firstline)
4552      {      {
4553      USPTR t = start_match;      USPTR t = start_match;
4554      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4555      end_subject = t;      end_subject = t;
4556      }      }
4557    
# Line 3658  do Line 4561  do
4561      {      {
4562      if (first_byte_caseless)      if (first_byte_caseless)
4563        while (start_match < end_subject &&        while (start_match < end_subject &&
4564               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4565          start_match++;          start_match++;
4566      else      else
4567        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4568          start_match++;          start_match++;
4569      }      }
4570    
4571    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4572    
4573    else if (startline)    else if (startline)
4574      {      {
4575      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4576        {        {
4577        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4578            start_match++;
4579    
4580          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4581          and we are now at a LF, advance the match position by one more character.
4582          */
4583    
4584          if (start_match[-1] == '\r' &&
4585               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4586               start_match < end_subject &&
4587               *start_match == '\n')
4588          start_match++;          start_match++;
4589        }        }
4590      }      }
# Line 3693  do Line 4606  do
4606    
4607  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4608    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4609    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4610    printf("\n");    printf("\n");
4611  #endif  #endif
4612    
# Line 3707  do Line 4620  do
4620    
4621    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4622    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4623    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4624    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4625    
4626    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4627    */    */
4628    
4629    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4630        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4631        !match_block.partial)        !md->partial)
4632      {      {
4633      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4634    
# Line 3740  do Line 4653  do
4653            }            }
4654          }          }
4655    
4656        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4657          forcing a match failure. */
4658    
4659        if (p >= end_subject) break;        if (p >= end_subject)
4660            {
4661            rc = MATCH_NOMATCH;
4662            break;
4663            }
4664    
4665        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4666        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3752  do Line 4670  do
4670        }        }
4671      }      }
4672    
4673    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup, 0);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4674    
4675    if (rc == MATCH_NOMATCH)    md->start_match_ptr = start_match;      /* Insurance */
4676      {    md->match_call_count = 0;
4677      if (firstline && *start_match == NEWLINE) break;    md->eptrn = 0;                          /* Next free eptrchain slot */
4678      start_match++;    rc = match(start_match, md->start_code, start_match, 2, md,
4679        ims, NULL, 0, 0);
4680    
4681      /* Any return other than MATCH_NOMATCH breaks the loop. */
4682    
4683      if (rc != MATCH_NOMATCH) break;
4684    
4685      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4686      newline in the subject (though it may continue over the newline). Therefore,
4687      if we have just failed to match, starting at a newline, do not continue. */
4688    
4689      if (firstline && IS_NEWLINE(start_match)) break;
4690    
4691      /* Advance the match position by one character. */
4692    
4693      start_match++;
4694  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4695      if (match_block.utf8)    if (utf8)
4696        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4697          start_match++;        start_match++;
4698  #endif  #endif
4699      continue;  
4700      }    /* Break the loop if the pattern is anchored or if we have passed the end of
4701      the subject. */
4702    
4703      if (anchored || start_match > end_subject) break;
4704    
4705      /* If we have just passed a CR and the newline option is CRLF or ANY or
4706      ANYCRLF, and we are now at a LF, advance the match position by one more
4707      character. */
4708    
4709      if (start_match[-1] == '\r' &&
4710           (md->nltype == NLTYPE_ANY ||
4711            md->nltype == NLTYPE_ANYCRLF ||
4712            md->nllen == 2) &&
4713           start_match < end_subject &&
4714           *start_match == '\n')
4715        start_match++;
4716    
4717    if (rc != MATCH_MATCH)    }   /* End of for(;;) "bumpalong" loop */
4718      {  
4719      DPRINTF((">>>> error: returning %d\n", rc));  /* ==========================================================================*/
4720      return rc;  
4721      }  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4722    conditions is true:
4723    
4724    (1) The pattern is anchored;
4725    
4726    /* We have a match! Copy the offset information from temporary store if  (2) We are past the end of the subject;
   necessary */  
4727    
4728    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4729        this option requests that a match occur at or before the first newline in
4730        the subject.
4731    
4732    When we have a match and the offset vector is big enough to deal with any
4733    backreferences, captured substring offsets will already be set up. In the case
4734    where we had to get some local store to hold offsets for backreference
4735    processing, copy those that we can. In this case there need not be overflow if
4736    certain parts of the pattern were not used, even though there are more
4737    capturing parentheses than vector slots. */
4738    
4739    if (rc == MATCH_MATCH)
4740      {
4741    if (using_temporary_offsets)    if (using_temporary_offsets)
4742      {      {
4743      if (offsetcount >= 4)      if (offsetcount >= 4)
4744        {        {
4745        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4746          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4747        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4748        }        }
4749      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4750      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4751      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4752      }      }
4753    
4754    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4755      too many to fit into the vector. */
4756    
4757      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4758    
4759      /* If there is space, set up the whole thing as substring 0. The value of
4760      md->start_match_ptr might be modified if \K was encountered on the success
4761      matching path. */
4762    
4763    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4764      {      {
4765      offsets[0] = start_match - match_block.start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4766      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4767      }      }
4768    
4769    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4770    return rc;    return rc;
4771    }    }
4772    
4773  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4774    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4775    
4776  if (using_temporary_offsets)  if (using_temporary_offsets)
4777    {    {
4778    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4779    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4780    }    }
4781    
4782  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4783      {
4784      DPRINTF((">>>> error: returning %d\n", rc));
4785      return rc;
4786      }
4787    else if (md->partial && md->hitend)
4788    {    {
4789    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4790    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.87  
changed lines
  Added in v.190

  ViewVC Help
Powered by ViewVC 1.1.5