/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 211 by ph10, Thu Aug 9 09:52:43 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include <config.h>
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 68  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 101  Returns:     nothing Line 109  Returns:     nothing
109  static void  static void
110  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111  {  {
112  int c;  unsigned int c;
113  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114  while (length-- > 0)  while (length-- > 0)
115    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 136  Returns:      TRUE if matched
136  */  */
137    
138  static BOOL  static BOOL
139  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
140    unsigned long int ims)    unsigned long int ims)
141  {  {
142  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
143    
144  #ifdef DEBUG  #ifdef DEBUG
145  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 177  return TRUE;
177  ****************************************************************************  ****************************************************************************
178                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
179    
180  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
181  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
182  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
183  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
184  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
185    fine.
186  It turns out that on non-Unix systems there are problems with programs that  
187  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
188  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
189  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
190    been known for decades.) So....
191    
192  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
193  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
194  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
195  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
196  always used to.  always used to.
197    
198    The original heap-recursive code used longjmp(). However, it seems that this
199    can be very slow on some operating systems. Following a suggestion from Stan
200    Switzer, the use of longjmp() has been abolished, at the cost of having to
201    provide a unique number for each call to RMATCH. There is no way of generating
202    a sequence of numbers at compile time in C. I have given them names, to make
203    them stand out more clearly.
204    
205    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
206    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
207    tests. Furthermore, not using longjmp() means that local dynamic variables
208    don't have indeterminate values; this has meant that the frame size can be
209    reduced because the result can be "passed back" by straight setting of the
210    variable instead of being passed in the frame.
211  ****************************************************************************  ****************************************************************************
212  ***************************************************************************/  ***************************************************************************/
213    
214    
215  /* These versions of the macros use the stack, as normal */  /* Numbers for RMATCH calls */
216    
217    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
218           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
219           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
220           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
221           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
222           RM51,  RM52, RM53 };
223    
224    
225    /* These versions of the macros use the stack, as normal. There are debugging
226    versions and production versions. Note that the "rw" argument of RMATCH isn't
227    actuall used in this definition. */
228    
229  #ifndef NO_RECURSE  #ifndef NO_RECURSE
230  #define REGISTER register  #define REGISTER register
231  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
232    #ifdef DEBUG
233    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
234      { \
235      printf("match() called in line %d\n", __LINE__); \
236      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
237      printf("to line %d\n", __LINE__); \
238      }
239    #define RRETURN(ra) \
240      { \
241      printf("match() returned %d from line %d ", ra, __LINE__); \
242      return ra; \
243      }
244    #else
245    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
246      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
247  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
248    #endif
249    
250  #else  #else
251    
252    
253  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
254  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
255  match(), which never changes. */  argument of match(), which never changes. */
256    
257  #define REGISTER  #define REGISTER
258    
259  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
260    {\    {\
261    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
262    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
263      {\    newframe->Xeptr = ra;\
264      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
265      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
266      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
267      newframe->Xims = re;\    newframe->Xims = re;\
268      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
269      newframe->Xflags = rg;\    newframe->Xflags = rg;\
270      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
271      frame = newframe;\    newframe->Xprevframe = frame;\
272      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
273      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
274      }\    goto HEAP_RECURSE;\
275    else\    L_##rw:\
276      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
277    }    }
278    
279  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 235  match(), which never changes. */ Line 283  match(), which never changes. */
283    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
284    if (frame != NULL)\    if (frame != NULL)\
285      {\      {\
286      frame->Xresult = ra;\      rrc = ra;\
287      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
288      }\      }\
289    return ra;\    return ra;\
290    }    }
# Line 252  typedef struct heapframe { Line 299  typedef struct heapframe {
299    
300    const uschar *Xeptr;    const uschar *Xeptr;
301    const uschar *Xecode;    const uschar *Xecode;
302      const uschar *Xmstart;
303    int Xoffset_top;    int Xoffset_top;
304    long int Xims;    long int Xims;
305    eptrblock *Xeptrb;    eptrblock *Xeptrb;
306    int Xflags;    int Xflags;
307      unsigned int Xrdepth;
308    
309    /* Function local variables */    /* Function local variables */
310    
# Line 271  typedef struct heapframe { Line 320  typedef struct heapframe {
320    
321    BOOL Xcur_is_word;    BOOL Xcur_is_word;
322    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
323    BOOL Xprev_is_word;    BOOL Xprev_is_word;
324    
325    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
326    
327  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
328    int Xprop_type;    int Xprop_type;
329      int Xprop_value;
330    int Xprop_fail_result;    int Xprop_fail_result;
331    int Xprop_category;    int Xprop_category;
332    int Xprop_chartype;    int Xprop_chartype;
333    int Xprop_othercase;    int Xprop_script;
334    int Xprop_test_against;    int Xoclength;
335    int *Xprop_test_variable;    uschar Xocchars[8];
336  #endif  #endif
337    
338    int Xctype;    int Xctype;
339    int Xfc;    unsigned int Xfc;
340    int Xfi;    int Xfi;
341    int Xlength;    int Xlength;
342    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 350  typedef struct heapframe {
350    
351    eptrblock Xnewptrb;    eptrblock Xnewptrb;
352    
353    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
354    
355    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
356    
357  } heapframe;  } heapframe;
358    
# Line 320  typedef struct heapframe { Line 368  typedef struct heapframe {
368  *         Match from current position            *  *         Match from current position            *
369  *************************************************/  *************************************************/
370    
371  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
372  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
373  same response.  same response.
374    
# Line 333  performance. Tests using gcc on a SPARC Line 378  performance. Tests using gcc on a SPARC
378  made performance worse.  made performance worse.
379    
380  Arguments:  Arguments:
381     eptr        pointer in subject     eptr        pointer to current character in subject
382     ecode       position in code     ecode       pointer to current position in compiled code
383       mstart      pointer to the current match start position (can be modified
384                     by encountering \K)
385     offset_top  current top pointer     offset_top  current top pointer
386     md          pointer to "static" info for the match     md          pointer to "static" info for the match
387     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 389  Arguments:
389                   brackets - for testing for empty matches                   brackets - for testing for empty matches
390     flags       can contain     flags       can contain
391                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
392                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
393                       group that can match an empty string
394       rdepth      the recursion depth
395    
396  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
397                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
398                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
399                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
400  */  */
401    
402  static int  static int
403  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
404    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
405    int flags)    int flags, unsigned int rdepth)
406  {  {
407  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
408  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
409  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
410    
411    register int  rrc;         /* Returns from recursive calls */
412    register int  i;           /* Used for loops not involving calls to RMATCH() */
413    register unsigned int c;   /* Character values not kept over RMATCH() calls */
414    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
415    
416  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
 register int  i;      /* Used for loops not involving calls to RMATCH() */  
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
417    
418  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
419  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 377  frame->Xprevframe = NULL;            /* Line 428  frame->Xprevframe = NULL;            /*
428    
429  frame->Xeptr = eptr;  frame->Xeptr = eptr;
430  frame->Xecode = ecode;  frame->Xecode = ecode;
431    frame->Xmstart = mstart;
432  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
433  frame->Xims = ims;  frame->Xims = ims;
434  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
435  frame->Xflags = flags;  frame->Xflags = flags;
436    frame->Xrdepth = rdepth;
437    
438  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
439    
# Line 390  HEAP_RECURSE: Line 443  HEAP_RECURSE:
443    
444  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
445  #define ecode              frame->Xecode  #define ecode              frame->Xecode
446    #define mstart             frame->Xmstart
447  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
448  #define ims                frame->Xims  #define ims                frame->Xims
449  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
450  #define flags              frame->Xflags  #define flags              frame->Xflags
451    #define rdepth             frame->Xrdepth
452    
453  /* Ditto for the local variables */  /* Ditto for the local variables */
454    
# Line 411  HEAP_RECURSE: Line 466  HEAP_RECURSE:
466    
467  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
468  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
469  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
470    
471  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
472    
473  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
474  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
475    #define prop_value         frame->Xprop_value
476  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
477  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
478  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
479  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
480  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
481  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
482  #endif  #endif
483    
484  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 502  HEAP_RECURSE:
502  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
503  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
504    
505  #else  #else         /* NO_RECURSE not defined */
506  #define fi i  #define fi i
507  #define fc c  #define fc c
508    
509    
510  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
511  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
512  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
513  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
514  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
515  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
516  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
517  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
518  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
519                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
520  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
521                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
522  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
523  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
524  BOOL prev_is_word;  BOOL prev_is_word;
525    
526  unsigned long int original_ims;  unsigned long int original_ims;
527    
528  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
529  int prop_type;  int prop_type;
530    int prop_value;
531  int prop_fail_result;  int prop_fail_result;
532  int prop_category;  int prop_category;
533  int prop_chartype;  int prop_chartype;
534  int prop_othercase;  int prop_script;
535  int prop_test_against;  int oclength;
536  int *prop_test_variable;  uschar occhars[8];
537  #endif  #endif
538    
539  int ctype;  int ctype;
# Line 493  int save_offset1, save_offset2, save_off Line 548  int save_offset1, save_offset2, save_off
548  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
549    
550  eptrblock newptrb;  eptrblock newptrb;
551  #endif  #endif     /* NO_RECURSE */
552    
553  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
554  variables. */  variables. */
555    
556  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
557    prop_value = 0;
558  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
559  #endif  #endif
560    
561  /* OK, now we can get on with the real code of the function. Recursion is  
562  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
563  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
564  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
565  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
566  performance when true recursion is being used. */  
567    TAIL_RECURSE:
568    
569    /* OK, now we can get on with the real code of the function. Recursive calls
570    are specified by the macro RMATCH and RRETURN is used to return. When
571    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
572    and a "return", respectively (possibly with some debugging if DEBUG is
573    defined). However, RMATCH isn't like a function call because it's quite a
574    complicated macro. It has to be used in one particular way. This shouldn't,
575    however, impact performance when true recursion is being used. */
576    
577    #ifdef SUPPORT_UTF8
578    utf8 = md->utf8;       /* Local copy of the flag */
579    #else
580    utf8 = FALSE;
581    #endif
582    
583    /* First check that we haven't called match() too many times, or that we
584    haven't exceeded the recursive call limit. */
585    
586  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
587    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
588    
589  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
590    
591  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
592  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
593  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
594  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
595    When match() is called in other circumstances, don't add to the chain. The
596    match_cbegroup flag must NOT be used with tail recursion, because the memory
597    block that is used is on the stack, so a new one may be required for each
598    match(). */
599    
600  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
601    {    {
   newptrb.epb_prev = eptrb;  
602    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
603      newptrb.epb_prev = eptrb;
604    eptrb = &newptrb;    eptrb = &newptrb;
605    }    }
606    
607  /* Now start processing the operations. */  /* Now start processing the opcodes. */
608    
609  for (;;)  for (;;)
610    {    {
611      minimize = possessive = FALSE;
612    op = *ecode;    op = *ecode;
   minimize = FALSE;  
613    
614    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
615    matching at least one subject character. */    matching at least one subject character. */
616    
617    if (md->partial &&    if (md->partial &&
618        eptr >= md->end_subject &&        eptr >= md->end_subject &&
619        eptr > md->start_match)        eptr > mstart)
620      md->hitend = TRUE;      md->hitend = TRUE;
621    
622    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
623      {      {
624      number = op - OP_BRA;      case OP_FAIL:
625        return MATCH_NOMATCH;
626    
627      /* For extended extraction brackets (large number), we have to fish out the      case OP_PRUNE:
628      number from a dummy opcode at the start. */      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
629          ims, eptrb, flags, RM51);
630      if (number > EXTRACT_BASIC_MAX)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
631        number = GET2(ecode, 2+LINK_SIZE);      return MATCH_PRUNE;
632    
633        case OP_COMMIT:
634        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
635          ims, eptrb, flags, RM52);
636        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
637        return MATCH_COMMIT;
638    
639        case OP_SKIP:
640        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
641          ims, eptrb, flags, RM53);
642        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
643        md->start_match_ptr = eptr;   /* Pass back current position */
644        return MATCH_SKIP;
645    
646        case OP_THEN:
647        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
648          ims, eptrb, flags, RM53);
649        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
650        return MATCH_THEN;
651    
652        /* Handle a capturing bracket. If there is space in the offset vector, save
653        the current subject position in the working slot at the top of the vector.
654        We mustn't change the current values of the data slot, because they may be
655        set from a previous iteration of this group, and be referred to by a
656        reference inside the group.
657    
658        If the bracket fails to match, we need to restore this value and also the
659        values of the final offsets, in case they were set by a previous iteration
660        of the same bracket.
661    
662        If there isn't enough space in the offset vector, treat this as if it were
663        a non-capturing bracket. Don't worry about setting the flag for the error
664        case here; that is handled in the code for KET. */
665    
666        case OP_CBRA:
667        case OP_SCBRA:
668        number = GET2(ecode, 1+LINK_SIZE);
669      offset = number << 1;      offset = number << 1;
670    
671  #ifdef DEBUG  #ifdef DEBUG
672      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
673        printf("subject=");
674      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
675      printf("\n");      printf("\n");
676  #endif  #endif
# Line 584  for (;;) Line 685  for (;;)
685        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
686        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
687    
688          flags = (op == OP_SCBRA)? match_cbegroup : 0;
689        do        do
690          {          {
691          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
692            match_isgroup);            ims, eptrb, flags, RM1);
693          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
694          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
695          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
696          }          }
# Line 603  for (;;) Line 705  for (;;)
705        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
706        }        }
707    
708      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
709        as a non-capturing bracket. */
710    
711      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
712      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
713    
714    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
715    
716    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
717      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
718      case OP_BRA:     /* Non-capturing bracket: optimized */  
719      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
720      do      final alternative within the brackets, we would return the result of a
721        recursive call to match() whatever happened. We can reduce stack usage by
722        turning this into a tail recursion, except in the case when match_cbegroup
723        is set.*/
724    
725        case OP_BRA:
726        case OP_SBRA:
727        DPRINTF(("start non-capturing bracket\n"));
728        flags = (op >= OP_SBRA)? match_cbegroup : 0;
729        for (;;)
730        {        {
731        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
732          match_isgroup);          {
733        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (flags == 0)    /* Not a possibly empty group */
734              {
735              ecode += _pcre_OP_lengths[*ecode];
736              DPRINTF(("bracket 0 tail recursion\n"));
737              goto TAIL_RECURSE;
738              }
739    
740            /* Possibly empty group; can't use tail recursion. */
741    
742            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
743              eptrb, flags, RM48);
744            RRETURN(rrc);
745            }
746    
747          /* For non-final alternatives, continue the loop for a NOMATCH result;
748          otherwise return. */
749    
750          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
751            eptrb, flags, RM2);
752          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
753        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
754        }        }
755      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
756    
757      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
758      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
759      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
760      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
761        obeyed, we can use tail recursion to avoid using another stack frame. */
762    
763      case OP_COND:      case OP_COND:
764      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
765        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
766          {
767          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
768          condition = md->recursive != NULL &&
769            (offset == RREF_ANY || offset == md->recursive->group_num);
770          ecode += condition? 3 : GET(ecode, 1);
771          }
772    
773        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
774        {        {
775        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
776        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
777          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
778          (offset < offset_top && md->offset_vector[offset] >= 0);        }
779        RMATCH(rrc, eptr, ecode + (condition?  
780          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
781          offset_top, md, ims, eptrb, match_isgroup);        {
782        RRETURN(rrc);        condition = FALSE;
783          ecode += GET(ecode, 1);
784        }        }
785    
786      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
787      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
788        assertion. */
789    
790      else      else
791        {        {
792        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
793            match_condassert | match_isgroup);            match_condassert, RM3);
794        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
795          {          {
796          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
797            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
798          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
799          }          }
800        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
801          {          {
802          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
803          }          }
804        else ecode += GET(ecode, 1);        else
805        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
806          match_isgroup);          condition = FALSE;
807        RRETURN(rrc);          ecode += GET(ecode, 1);
808            }
809        }        }
     /* Control never reaches here */  
810    
811      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
812      encountered. */      we can use tail recursion to avoid using another stack frame, except when
813        match_cbegroup is required for an unlimited repeat of a possibly empty
814        group. If the second alternative doesn't exist, we can just plough on. */
815    
816      case OP_CREF:      if (condition || *ecode == OP_ALT)
817      case OP_BRANUMBER:        {
818      ecode += 3;        ecode += 1 + LINK_SIZE;
819          if (op == OP_SCOND)        /* Possibly empty group */
820            {
821            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
822            RRETURN(rrc);
823            }
824          else                       /* Group must match something */
825            {
826            flags = 0;
827            goto TAIL_RECURSE;
828            }
829          }
830        else                         /* Condition false & no 2nd alternative */
831          {
832          ecode += 1 + LINK_SIZE;
833          }
834      break;      break;
835    
     /* End of the pattern. If we are in a recursion, we should restore the  
     offsets appropriately and continue from after the call. */  
836    
837        /* End of the pattern, either real or forced. If we are in a top-level
838        recursion, we should restore the offsets appropriately and continue from
839        after the call. */
840    
841        case OP_ACCEPT:
842      case OP_END:      case OP_END:
843      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
844        {        {
845        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
846        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
847        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
848        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
849          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
850        md->start_match = rec->save_start;        mstart = rec->save_start;
851        ims = original_ims;        ims = original_ims;
852        ecode = rec->after_call;        ecode = rec->after_call;
853        break;        break;
# Line 694  for (;;) Line 856  for (;;)
856      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
857      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
858    
859      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
860      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
861      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
862        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
863      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
864    
865      /* Change option settings */      /* Change option settings */
# Line 717  for (;;) Line 880  for (;;)
880      case OP_ASSERTBACK:      case OP_ASSERTBACK:
881      do      do
882        {        {
883        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
884          match_isgroup);          RM4);
885        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
886        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
887        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
888        }        }
889      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 744  for (;;) Line 907  for (;;)
907      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
908      do      do
909        {        {
910        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
911          match_isgroup);          RM5);
912        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
913        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
914        ecode += GET(ecode,1);        ecode += GET(ecode,1);
915        }        }
916      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 766  for (;;) Line 929  for (;;)
929  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
930      if (utf8)      if (utf8)
931        {        {
932        c = GET(ecode,1);        i = GET(ecode, 1);
933        for (i = 0; i < c; i++)        while (i-- > 0)
934          {          {
935          eptr--;          eptr--;
936          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
937          BACKCHAR(eptr)          BACKCHAR(eptr);
938          }          }
939        }        }
940      else      else
# Line 780  for (;;) Line 943  for (;;)
943      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
944    
945        {        {
946        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
947        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
948        }        }
949    
# Line 800  for (;;) Line 963  for (;;)
963        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
964        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
965        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
966        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
967        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
968        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
969        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
970        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
971        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 837  for (;;) Line 1000  for (;;)
1000      case OP_RECURSE:      case OP_RECURSE:
1001        {        {
1002        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1003        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
1004            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
1005    
1006        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1007    
# Line 869  for (;;) Line 1027  for (;;)
1027    
1028        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1029              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1030        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
1031        md->start_match = eptr;        mstart = eptr;
1032    
1033        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1034        restore the offset and recursion data. */        restore the offset and recursion data. */
1035    
1036        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1037          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1038        do        do
1039          {          {
1040          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1041              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1042          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1043            {            {
1044              DPRINTF(("Recursion matched\n"));
1045            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1046            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1047              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1048            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1049            }            }
1050          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1051              {
1052              DPRINTF(("Recursion gave error %d\n", rrc));
1053              RRETURN(rrc);
1054              }
1055    
1056          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1057          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 1076  for (;;)
1076      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1077    
1078      case OP_ONCE:      case OP_ONCE:
1079        {      prev = ecode;
1080        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1081    
1082        do      do
1083          {        {
1084          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1085            eptrb, match_isgroup);        if (rrc == MATCH_MATCH) break;
1086          if (rrc == MATCH_MATCH) break;        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1087          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += GET(ecode,1);
1088          ecode += GET(ecode,1);        }
1089          }      while (*ecode == OP_ALT);
       while (*ecode == OP_ALT);  
1090    
1091        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1092    
1093        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1094    
1095        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1096        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1097    
1098        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1099    
1100        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1101        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1102    
1103        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1104        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1105        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1106        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1107        course of events. */      course of events. */
1108    
1109        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1110          {        {
1111          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1112          break;        break;
1113          }        }
1114    
1115        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1116        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1117        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1118        opcode. */      any options that changed within the bracket before re-running it, so
1119        check the next opcode. */
1120    
1121        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1122          {        {
1123          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1124          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1125          }        }
1126    
1127        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1128          {        {
1129          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1130          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1131          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1132          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1133          }        goto TAIL_RECURSE;
1134        else  /* OP_KETRMAX */        }
1135          {      else  /* OP_KETRMAX */
1136          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1137          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1138          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1139          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1140          }        flags = 0;
1141          goto TAIL_RECURSE;
1142        }        }
1143      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1144    
1145      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1146      bracketed group and go to there. */      bracketed group and go to there. */
# Line 994  for (;;) Line 1158  for (;;)
1158      case OP_BRAZERO:      case OP_BRAZERO:
1159        {        {
1160        next = ecode+1;        next = ecode+1;
1161        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1162        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1163        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1164        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1165        }        }
1166      break;      break;
1167    
1168      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1169        {        {
1170        next = ecode+1;        next = ecode+1;
1171        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1172        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1173        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1174        ecode++;        ecode++;
1175        }        }
1176      break;      break;
1177    
1178      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1179    
1180      case OP_KET:      case OP_KET:
1181      case OP_KETRMIN:      case OP_KETRMIN:
1182      case OP_KETRMAX:      case OP_KETRMAX:
1183        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1184    
1185        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1186        infinite repeats of empty string matches, retrieve the subject start from
1187        the chain. Otherwise, set it NULL. */
1188    
1189        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1190          {
1191        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1192            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1193            *prev == OP_ONCE)        }
1194          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1195    
1196        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1197        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1198        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1199    
1200        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1201          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1202          number = *prev - OP_BRA;          *prev == OP_ONCE)
1203          {
1204          md->end_match_ptr = eptr;      /* For ONCE */
1205          md->end_offset_top = offset_top;
1206          RRETURN(MATCH_MATCH);
1207          }
1208    
1209          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1210          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1211        bumping the high water mark. Note that whole-pattern recursion is coded as
1212        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1213        when the OP_END is reached. Other recursion is handled here. */
1214    
1215          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1216          offset = number << 1;        {
1217          number = GET2(prev, 1+LINK_SIZE);
1218          offset = number << 1;
1219    
1220  #ifdef DEBUG  #ifdef DEBUG
1221          printf("end bracket %d", number);        printf("end bracket %d", number);
1222          printf("\n");        printf("\n");
1223  #endif  #endif
1224    
1225          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1226          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1227          into group 0, so it won't be picked up here. Instead, we catch it when          {
1228          the OP_END is reached. */          md->offset_vector[offset] =
1229              md->offset_vector[md->offset_end - number];
1230          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1231            {          if (offset_top <= offset) offset_top = offset + 2;
1232            md->capture_last = number;          }
1233            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1234              {        /* Handle a recursively called group. Restore the offsets
1235              md->offset_vector[offset] =        appropriately and continue from after the call. */
1236                md->offset_vector[md->offset_end - number];  
1237              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1238              if (offset_top <= offset) offset_top = offset + 2;          {
1239              }          recursion_info *rec = md->recursive;
1240            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1241            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1242            appropriately and continue from after the call. */          mstart = rec->save_start;
1243            memcpy(md->offset_vector, rec->offset_save,
1244            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1245              {          ecode = rec->after_call;
1246              recursion_info *rec = md->recursive;          ims = original_ims;
1247              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1248          }          }
1249          }
1250    
1251        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1252        the group. */      flags, in case they got changed during the group. */
1253    
1254        ims = original_ims;      ims = original_ims;
1255        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1256    
1257        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1258        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1259        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1260        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1261        course of events. */      course of events. */
1262    
1263        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1264          {        {
1265          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1266          break;        break;
1267          }        }
1268    
1269        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1270        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1271        tail recursion to avoid using another stack frame, unless we have an
1272        unlimited repeat of a group that can match an empty string. */
1273    
1274        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1275          {  
1276          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);      if (*ecode == OP_KETRMIN)
1277          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1278          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1279          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1280          }        if (flags != 0)    /* Could match an empty string */
       else  /* OP_KETRMAX */  
1281          {          {
1282          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1283          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RRETURN(rrc);
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1284          }          }
1285          ecode = prev;
1286          goto TAIL_RECURSE;
1287        }        }
1288        else  /* OP_KETRMAX */
1289      RRETURN(MATCH_NOMATCH);        {
1290          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1291          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1292          ecode += 1 + LINK_SIZE;
1293          flags = 0;
1294          goto TAIL_RECURSE;
1295          }
1296        /* Control never gets here */
1297    
1298      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1299    
# Line 1135  for (;;) Line 1301  for (;;)
1301      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1302      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1303        {        {
1304        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1305              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1306          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1307        ecode++;        ecode++;
1308        break;        break;
# Line 1156  for (;;) Line 1323  for (;;)
1323      ecode++;      ecode++;
1324      break;      break;
1325    
1326        /* Reset the start of match point */
1327    
1328        case OP_SET_SOM:
1329        mstart = eptr;
1330        ecode++;
1331        break;
1332    
1333      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1334      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1335    
# Line 1163  for (;;) Line 1337  for (;;)
1337      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1338        {        {
1339        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1340          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1341        else        else
1342          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1343        ecode++;        ecode++;
# Line 1174  for (;;) Line 1348  for (;;)
1348        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1349        if (!md->endonly)        if (!md->endonly)
1350          {          {
1351          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1352             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1353            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1354          ecode++;          ecode++;
1355          break;          break;
1356          }          }
1357        }        }
1358      /* ... else fall through */      /* ... else fall through for endonly */
1359    
1360      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1361    
# Line 1193  for (;;) Line 1367  for (;;)
1367      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1368    
1369      case OP_EODN:      case OP_EODN:
1370      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1371         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1372          RRETURN(MATCH_NOMATCH);
1373      ecode++;      ecode++;
1374      break;      break;
1375    
# Line 1247  for (;;) Line 1422  for (;;)
1422      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1423    
1424      case OP_ANY:      case OP_ANY:
1425      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1426        RRETURN(MATCH_NOMATCH);        {
1427          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1428          }
1429      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1430      if (utf8)      if (utf8)
1431        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1432      ecode++;      ecode++;
1433      break;      break;
1434    
# Line 1343  for (;;) Line 1518  for (;;)
1518      ecode++;      ecode++;
1519      break;      break;
1520    
1521        case OP_ANYNL:
1522        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1523        GETCHARINCTEST(c, eptr);
1524        switch(c)
1525          {
1526          default: RRETURN(MATCH_NOMATCH);
1527          case 0x000d:
1528          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1529          break;
1530          case 0x000a:
1531          case 0x000b:
1532          case 0x000c:
1533          case 0x0085:
1534          case 0x2028:
1535          case 0x2029:
1536          break;
1537          }
1538        ecode++;
1539        break;
1540    
1541        case OP_NOT_HSPACE:
1542        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1543        GETCHARINCTEST(c, eptr);
1544        switch(c)
1545          {
1546          default: break;
1547          case 0x09:      /* HT */
1548          case 0x20:      /* SPACE */
1549          case 0xa0:      /* NBSP */
1550          case 0x1680:    /* OGHAM SPACE MARK */
1551          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1552          case 0x2000:    /* EN QUAD */
1553          case 0x2001:    /* EM QUAD */
1554          case 0x2002:    /* EN SPACE */
1555          case 0x2003:    /* EM SPACE */
1556          case 0x2004:    /* THREE-PER-EM SPACE */
1557          case 0x2005:    /* FOUR-PER-EM SPACE */
1558          case 0x2006:    /* SIX-PER-EM SPACE */
1559          case 0x2007:    /* FIGURE SPACE */
1560          case 0x2008:    /* PUNCTUATION SPACE */
1561          case 0x2009:    /* THIN SPACE */
1562          case 0x200A:    /* HAIR SPACE */
1563          case 0x202f:    /* NARROW NO-BREAK SPACE */
1564          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1565          case 0x3000:    /* IDEOGRAPHIC SPACE */
1566          RRETURN(MATCH_NOMATCH);
1567          }
1568        ecode++;
1569        break;
1570    
1571        case OP_HSPACE:
1572        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1573        GETCHARINCTEST(c, eptr);
1574        switch(c)
1575          {
1576          default: RRETURN(MATCH_NOMATCH);
1577          case 0x09:      /* HT */
1578          case 0x20:      /* SPACE */
1579          case 0xa0:      /* NBSP */
1580          case 0x1680:    /* OGHAM SPACE MARK */
1581          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1582          case 0x2000:    /* EN QUAD */
1583          case 0x2001:    /* EM QUAD */
1584          case 0x2002:    /* EN SPACE */
1585          case 0x2003:    /* EM SPACE */
1586          case 0x2004:    /* THREE-PER-EM SPACE */
1587          case 0x2005:    /* FOUR-PER-EM SPACE */
1588          case 0x2006:    /* SIX-PER-EM SPACE */
1589          case 0x2007:    /* FIGURE SPACE */
1590          case 0x2008:    /* PUNCTUATION SPACE */
1591          case 0x2009:    /* THIN SPACE */
1592          case 0x200A:    /* HAIR SPACE */
1593          case 0x202f:    /* NARROW NO-BREAK SPACE */
1594          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1595          case 0x3000:    /* IDEOGRAPHIC SPACE */
1596          break;
1597          }
1598        ecode++;
1599        break;
1600    
1601        case OP_NOT_VSPACE:
1602        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1603        GETCHARINCTEST(c, eptr);
1604        switch(c)
1605          {
1606          default: break;
1607          case 0x0a:      /* LF */
1608          case 0x0b:      /* VT */
1609          case 0x0c:      /* FF */
1610          case 0x0d:      /* CR */
1611          case 0x85:      /* NEL */
1612          case 0x2028:    /* LINE SEPARATOR */
1613          case 0x2029:    /* PARAGRAPH SEPARATOR */
1614          RRETURN(MATCH_NOMATCH);
1615          }
1616        ecode++;
1617        break;
1618    
1619        case OP_VSPACE:
1620        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1621        GETCHARINCTEST(c, eptr);
1622        switch(c)
1623          {
1624          default: RRETURN(MATCH_NOMATCH);
1625          case 0x0a:      /* LF */
1626          case 0x0b:      /* VT */
1627          case 0x0c:      /* FF */
1628          case 0x0d:      /* CR */
1629          case 0x85:      /* NEL */
1630          case 0x2028:    /* LINE SEPARATOR */
1631          case 0x2029:    /* PARAGRAPH SEPARATOR */
1632          break;
1633          }
1634        ecode++;
1635        break;
1636    
1637  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1638      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1639      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1352  for (;;) Line 1643  for (;;)
1643      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1644      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1645        {        {
1646        int chartype, rqdtype;        int chartype, script;
1647        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
1648    
1649        if (rqdtype >= 128)        switch(ecode[1])
1650          {          {
1651          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1652            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1653            break;
1654    
1655            case PT_LAMP:
1656            if ((chartype == ucp_Lu ||
1657                 chartype == ucp_Ll ||
1658                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1659            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1660          }           break;
1661        else  
1662          {          case PT_GC:
1663          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1664              RRETURN(MATCH_NOMATCH);
1665            break;
1666    
1667            case PT_PC:
1668            if ((ecode[2] != chartype) == (op == OP_PROP))
1669              RRETURN(MATCH_NOMATCH);
1670            break;
1671    
1672            case PT_SC:
1673            if ((ecode[2] != script) == (op == OP_PROP))
1674            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1675            break;
1676    
1677            default:
1678            RRETURN(PCRE_ERROR_INTERNAL);
1679          }          }
1680    
1681          ecode += 3;
1682        }        }
1683      break;      break;
1684    
# Line 1379  for (;;) Line 1689  for (;;)
1689      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1690      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1691        {        {
1692        int chartype;        int chartype, script;
1693        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1694        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1695        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1696          {          {
# Line 1390  for (;;) Line 1699  for (;;)
1699            {            {
1700            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1701            }            }
1702          category = _pcre_ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1703          if (category != ucp_M) break;          if (category != ucp_M) break;
1704          eptr += len;          eptr += len;
1705          }          }
# Line 1480  for (;;) Line 1789  for (;;)
1789          {          {
1790          for (fi = min;; fi++)          for (fi = min;; fi++)
1791            {            {
1792            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1793            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1794            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1795              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1501  for (;;) Line 1810  for (;;)
1810            }            }
1811          while (eptr >= pp)          while (eptr >= pp)
1812            {            {
1813            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1814            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1815            eptr -= length;            eptr -= length;
1816            }            }
# Line 1606  for (;;) Line 1915  for (;;)
1915            {            {
1916            for (fi = min;; fi++)            for (fi = min;; fi++)
1917              {              {
1918              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1919              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1920              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1921              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1626  for (;;) Line 1935  for (;;)
1935            {            {
1936            for (fi = min;; fi++)            for (fi = min;; fi++)
1937              {              {
1938              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1939              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1940              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1941              c = *eptr++;              c = *eptr++;
# Line 1663  for (;;) Line 1972  for (;;)
1972              }              }
1973            for (;;)            for (;;)
1974              {              {
1975              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1976              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1977              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1978              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1682  for (;;) Line 1991  for (;;)
1991              }              }
1992            while (eptr >= pp)            while (eptr >= pp)
1993              {              {
1994              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
1995              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1996                eptr--;
1997              }              }
1998            }            }
1999    
# Line 1753  for (;;) Line 2062  for (;;)
2062          {          {
2063          for (fi = min;; fi++)          for (fi = min;; fi++)
2064            {            {
2065            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2066            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2067            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2068            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1777  for (;;) Line 2086  for (;;)
2086            }            }
2087          for(;;)          for(;;)
2088            {            {
2089            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2090            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2091            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2092            BACKCHAR(eptr)            BACKCHAR(eptr);
2093            }            }
2094          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2095          }          }
# Line 1836  for (;;) Line 2145  for (;;)
2145    
2146        else        else
2147          {          {
2148          int dc;          unsigned int dc;
2149          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2150          ecode += length;          ecode += length;
2151    
2152          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2153          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2154    
2155          if (fc != dc)          if (fc != dc)
2156            {            {
2157  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2158            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2159  #endif  #endif
2160              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2161            }            }
# Line 1867  for (;;) Line 2172  for (;;)
2172        }        }
2173      break;      break;
2174    
2175      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2176    
2177      case OP_EXACT:      case OP_EXACT:
2178      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2179      ecode += 3;      ecode += 3;
2180      goto REPEATCHAR;      goto REPEATCHAR;
2181    
2182        case OP_POSUPTO:
2183        possessive = TRUE;
2184        /* Fall through */
2185    
2186      case OP_UPTO:      case OP_UPTO:
2187      case OP_MINUPTO:      case OP_MINUPTO:
2188      min = 0;      min = 0;
# Line 1882  for (;;) Line 2191  for (;;)
2191      ecode += 3;      ecode += 3;
2192      goto REPEATCHAR;      goto REPEATCHAR;
2193    
2194        case OP_POSSTAR:
2195        possessive = TRUE;
2196        min = 0;
2197        max = INT_MAX;
2198        ecode++;
2199        goto REPEATCHAR;
2200    
2201        case OP_POSPLUS:
2202        possessive = TRUE;
2203        min = 1;
2204        max = INT_MAX;
2205        ecode++;
2206        goto REPEATCHAR;
2207    
2208        case OP_POSQUERY:
2209        possessive = TRUE;
2210        min = 0;
2211        max = 1;
2212        ecode++;
2213        goto REPEATCHAR;
2214    
2215      case OP_STAR:      case OP_STAR:
2216      case OP_MINSTAR:      case OP_MINSTAR:
2217      case OP_PLUS:      case OP_PLUS:
# Line 1913  for (;;) Line 2243  for (;;)
2243    
2244        if (length > 1)        if (length > 1)
2245          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2246  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2247          int othercase;          unsigned int othercase;
         int chartype;  
2248          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2249               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase > 0)  
2250            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2251            else oclength = 0;
2252  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2253    
2254          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2255            {            {
2256            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2257    #ifdef SUPPORT_UCP
2258            /* Need braces because of following else */            /* Need braces because of following else */
2259            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2260            else            else
# Line 1935  for (;;) Line 2262  for (;;)
2262              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2263              eptr += oclength;              eptr += oclength;
2264              }              }
2265    #else   /* without SUPPORT_UCP */
2266              else { RRETURN(MATCH_NOMATCH); }
2267    #endif  /* SUPPORT_UCP */
2268            }            }
2269    
2270          if (min == max) continue;          if (min == max) continue;
# Line 1943  for (;;) Line 2273  for (;;)
2273            {            {
2274            for (fi = min;; fi++)            for (fi = min;; fi++)
2275              {              {
2276              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2277              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2278              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2279              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2280    #ifdef SUPPORT_UCP
2281              /* Need braces because of following else */              /* Need braces because of following else */
2282              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2283              else              else
# Line 1954  for (;;) Line 2285  for (;;)
2285                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2286                eptr += oclength;                eptr += oclength;
2287                }                }
2288    #else   /* without SUPPORT_UCP */
2289                else { RRETURN (MATCH_NOMATCH); }
2290    #endif  /* SUPPORT_UCP */
2291              }              }
2292            /* Control never gets here */            /* Control never gets here */
2293            }            }
2294          else  
2295            else  /* Maximize */
2296            {            {
2297            pp = eptr;            pp = eptr;
2298            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2299              {              {
2300              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2301              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2302    #ifdef SUPPORT_UCP
2303              else if (oclength == 0) break;              else if (oclength == 0) break;
2304              else              else
2305                {                {
2306                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2307                eptr += oclength;                eptr += oclength;
2308                }                }
2309    #else   /* without SUPPORT_UCP */
2310                else break;
2311    #endif  /* SUPPORT_UCP */
2312              }              }
2313            while (eptr >= pp)  
2314             {            if (possessive) continue;
2315             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            for(;;)
2316               {
2317               RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2318             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2319               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2320    #ifdef SUPPORT_UCP
2321               eptr--;
2322               BACKCHAR(eptr);
2323    #else   /* without SUPPORT_UCP */
2324             eptr -= length;             eptr -= length;
2325    #endif  /* SUPPORT_UCP */
2326             }             }
           RRETURN(MATCH_NOMATCH);  
2327            }            }
2328          /* Control never gets here */          /* Control never gets here */
2329          }          }
# Line 2017  for (;;) Line 2363  for (;;)
2363          {          {
2364          for (fi = min;; fi++)          for (fi = min;; fi++)
2365            {            {
2366            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2367            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2368            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2369                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2025  for (;;) Line 2371  for (;;)
2371            }            }
2372          /* Control never gets here */          /* Control never gets here */
2373          }          }
2374        else        else  /* Maximize */
2375          {          {
2376          pp = eptr;          pp = eptr;
2377          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2379  for (;;)
2379            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2380            eptr++;            eptr++;
2381            }            }
2382            if (possessive) continue;
2383          while (eptr >= pp)          while (eptr >= pp)
2384            {            {
2385            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2386            eptr--;            eptr--;
2387            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2388            }            }
# Line 2054  for (;;) Line 2401  for (;;)
2401          {          {
2402          for (fi = min;; fi++)          for (fi = min;; fi++)
2403            {            {
2404            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2405            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2406            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2407              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2408            }            }
2409          /* Control never gets here */          /* Control never gets here */
2410          }          }
2411        else        else  /* Maximize */
2412          {          {
2413          pp = eptr;          pp = eptr;
2414          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2416  for (;;)
2416            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2417            eptr++;            eptr++;
2418            }            }
2419            if (possessive) continue;
2420          while (eptr >= pp)          while (eptr >= pp)
2421            {            {
2422            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2423            eptr--;            eptr--;
2424            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2425            }            }
# Line 2121  for (;;) Line 2469  for (;;)
2469      ecode += 3;      ecode += 3;
2470      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2471    
2472        case OP_NOTPOSSTAR:
2473        possessive = TRUE;
2474        min = 0;
2475        max = INT_MAX;
2476        ecode++;
2477        goto REPEATNOTCHAR;
2478    
2479        case OP_NOTPOSPLUS:
2480        possessive = TRUE;
2481        min = 1;
2482        max = INT_MAX;
2483        ecode++;
2484        goto REPEATNOTCHAR;
2485    
2486        case OP_NOTPOSQUERY:
2487        possessive = TRUE;
2488        min = 0;
2489        max = 1;
2490        ecode++;
2491        goto REPEATNOTCHAR;
2492    
2493        case OP_NOTPOSUPTO:
2494        possessive = TRUE;
2495        min = 0;
2496        max = GET2(ecode, 1);
2497        ecode += 3;
2498        goto REPEATNOTCHAR;
2499    
2500      case OP_NOTSTAR:      case OP_NOTSTAR:
2501      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2502      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2160  for (;;) Line 2536  for (;;)
2536        /* UTF-8 mode */        /* UTF-8 mode */
2537        if (utf8)        if (utf8)
2538          {          {
2539          register int d;          register unsigned int d;
2540          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2541            {            {
2542            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2185  for (;;) Line 2561  for (;;)
2561          /* UTF-8 mode */          /* UTF-8 mode */
2562          if (utf8)          if (utf8)
2563            {            {
2564            register int d;            register unsigned int d;
2565            for (fi = min;; fi++)            for (fi = min;; fi++)
2566              {              {
2567              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2568              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2569              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2570              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2202  for (;;) Line 2578  for (;;)
2578            {            {
2579            for (fi = min;; fi++)            for (fi = min;; fi++)
2580              {              {
2581              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2582              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2583              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2584                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2221  for (;;) Line 2597  for (;;)
2597          /* UTF-8 mode */          /* UTF-8 mode */
2598          if (utf8)          if (utf8)
2599            {            {
2600            register int d;            register unsigned int d;
2601            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2602              {              {
2603              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2607  for (;;)
2607              if (fc == d) break;              if (fc == d) break;
2608              eptr += len;              eptr += len;
2609              }              }
2610            for(;;)          if (possessive) continue;
2611            for(;;)
2612              {              {
2613              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2614              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2615              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2616              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2248  for (;;) Line 2625  for (;;)
2625              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2626              eptr++;              eptr++;
2627              }              }
2628              if (possessive) continue;
2629            while (eptr >= pp)            while (eptr >= pp)
2630              {              {
2631              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2632              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2633              eptr--;              eptr--;
2634              }              }
# Line 2269  for (;;) Line 2647  for (;;)
2647        /* UTF-8 mode */        /* UTF-8 mode */
2648        if (utf8)        if (utf8)
2649          {          {
2650          register int d;          register unsigned int d;
2651          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2652            {            {
2653            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2292  for (;;) Line 2670  for (;;)
2670          /* UTF-8 mode */          /* UTF-8 mode */
2671          if (utf8)          if (utf8)
2672            {            {
2673            register int d;            register unsigned int d;
2674            for (fi = min;; fi++)            for (fi = min;; fi++)
2675              {              {
2676              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2677              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2678              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2679              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2308  for (;;) Line 2686  for (;;)
2686            {            {
2687            for (fi = min;; fi++)            for (fi = min;; fi++)
2688              {              {
2689              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2690              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2691              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2692                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2327  for (;;) Line 2705  for (;;)
2705          /* UTF-8 mode */          /* UTF-8 mode */
2706          if (utf8)          if (utf8)
2707            {            {
2708            register int d;            register unsigned int d;
2709            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2710              {              {
2711              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2714  for (;;)
2714              if (fc == d) break;              if (fc == d) break;
2715              eptr += len;              eptr += len;
2716              }              }
2717              if (possessive) continue;
2718            for(;;)            for(;;)
2719              {              {
2720              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2721              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2722              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2723              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2353  for (;;) Line 2732  for (;;)
2732              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2733              eptr++;              eptr++;
2734              }              }
2735              if (possessive) continue;
2736            while (eptr >= pp)            while (eptr >= pp)
2737              {              {
2738              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2739              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2740              eptr--;              eptr--;
2741              }              }
# Line 2384  for (;;) Line 2764  for (;;)
2764      ecode += 3;      ecode += 3;
2765      goto REPEATTYPE;      goto REPEATTYPE;
2766    
2767        case OP_TYPEPOSSTAR:
2768        possessive = TRUE;
2769        min = 0;
2770        max = INT_MAX;
2771        ecode++;
2772        goto REPEATTYPE;
2773    
2774        case OP_TYPEPOSPLUS:
2775        possessive = TRUE;
2776        min = 1;
2777        max = INT_MAX;
2778        ecode++;
2779        goto REPEATTYPE;
2780    
2781        case OP_TYPEPOSQUERY:
2782        possessive = TRUE;
2783        min = 0;
2784        max = 1;
2785        ecode++;
2786        goto REPEATTYPE;
2787    
2788        case OP_TYPEPOSUPTO:
2789        possessive = TRUE;
2790        min = 0;
2791        max = GET2(ecode, 1);
2792        ecode += 3;
2793        goto REPEATTYPE;
2794    
2795      case OP_TYPESTAR:      case OP_TYPESTAR:
2796      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2797      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 2816  for (;;)
2816        {        {
2817        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2818        prop_type = *ecode++;        prop_type = *ecode++;
2819        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2820        }        }
2821      else prop_type = -1;      else prop_type = -1;
2822  #endif  #endif
# Line 2434  for (;;) Line 2833  for (;;)
2833      if (min > 0)      if (min > 0)
2834        {        {
2835  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2836        if (prop_type > 0)        if (prop_type >= 0)
2837          {          {
2838          for (i = 1; i <= min; i++)          switch(prop_type)
2839            {            {
2840            GETCHARINC(c, eptr);            case PT_ANY:
2841            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2842            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2843              RRETURN(MATCH_NOMATCH);              {
2844                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2845                GETCHARINCTEST(c, eptr);
2846                }
2847              break;
2848    
2849              case PT_LAMP:
2850              for (i = 1; i <= min; i++)
2851                {
2852                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2853                GETCHARINCTEST(c, eptr);
2854                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2855                if ((prop_chartype == ucp_Lu ||
2856                     prop_chartype == ucp_Ll ||
2857                     prop_chartype == ucp_Lt) == prop_fail_result)
2858                  RRETURN(MATCH_NOMATCH);
2859                }
2860              break;
2861    
2862              case PT_GC:
2863              for (i = 1; i <= min; i++)
2864                {
2865                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2866                GETCHARINCTEST(c, eptr);
2867                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2868                if ((prop_category == prop_value) == prop_fail_result)
2869                  RRETURN(MATCH_NOMATCH);
2870                }
2871              break;
2872    
2873              case PT_PC:
2874              for (i = 1; i <= min; i++)
2875                {
2876                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2877                GETCHARINCTEST(c, eptr);
2878                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2879                if ((prop_chartype == prop_value) == prop_fail_result)
2880                  RRETURN(MATCH_NOMATCH);
2881                }
2882              break;
2883    
2884              case PT_SC:
2885              for (i = 1; i <= min; i++)
2886                {
2887                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2888                GETCHARINCTEST(c, eptr);
2889                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2890                if ((prop_script == prop_value) == prop_fail_result)
2891                  RRETURN(MATCH_NOMATCH);
2892                }
2893              break;
2894    
2895              default:
2896              RRETURN(PCRE_ERROR_INTERNAL);
2897            }            }
2898          }          }
2899    
# Line 2453  for (;;) Line 2905  for (;;)
2905          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2906            {            {
2907            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2908            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2909            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2910            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2911              {              {
# Line 2462  for (;;) Line 2914  for (;;)
2914                {                {
2915                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2916                }                }
2917              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2918              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2919              eptr += len;              eptr += len;
2920              }              }
# Line 2481  for (;;) Line 2933  for (;;)
2933          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2934            {            {
2935            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2936               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2937              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2938              eptr++;
2939            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2940            }            }
2941          break;          break;
# Line 2491  for (;;) Line 2944  for (;;)
2944          eptr += min;          eptr += min;
2945          break;          break;
2946    
2947            case OP_ANYNL:
2948            for (i = 1; i <= min; i++)
2949              {
2950              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951              GETCHARINC(c, eptr);
2952              switch(c)
2953                {
2954                default: RRETURN(MATCH_NOMATCH);
2955                case 0x000d:
2956                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2957                break;
2958                case 0x000a:
2959                case 0x000b:
2960                case 0x000c:
2961                case 0x0085:
2962                case 0x2028:
2963                case 0x2029:
2964                break;
2965                }
2966              }
2967            break;
2968    
2969            case OP_NOT_HSPACE:
2970            for (i = 1; i <= min; i++)
2971              {
2972              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2973              GETCHARINC(c, eptr);
2974              switch(c)
2975                {
2976                default: break;
2977                case 0x09:      /* HT */
2978                case 0x20:      /* SPACE */
2979                case 0xa0:      /* NBSP */
2980                case 0x1680:    /* OGHAM SPACE MARK */
2981                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2982                case 0x2000:    /* EN QUAD */
2983                case 0x2001:    /* EM QUAD */
2984                case 0x2002:    /* EN SPACE */
2985                case 0x2003:    /* EM SPACE */
2986                case 0x2004:    /* THREE-PER-EM SPACE */
2987                case 0x2005:    /* FOUR-PER-EM SPACE */
2988                case 0x2006:    /* SIX-PER-EM SPACE */
2989                case 0x2007:    /* FIGURE SPACE */
2990                case 0x2008:    /* PUNCTUATION SPACE */
2991                case 0x2009:    /* THIN SPACE */
2992                case 0x200A:    /* HAIR SPACE */
2993                case 0x202f:    /* NARROW NO-BREAK SPACE */
2994                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2995                case 0x3000:    /* IDEOGRAPHIC SPACE */
2996                RRETURN(MATCH_NOMATCH);
2997                }
2998              }
2999            break;
3000    
3001            case OP_HSPACE:
3002            for (i = 1; i <= min; i++)
3003              {
3004              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3005              GETCHARINC(c, eptr);
3006              switch(c)
3007                {
3008                default: RRETURN(MATCH_NOMATCH);
3009                case 0x09:      /* HT */
3010                case 0x20:      /* SPACE */
3011                case 0xa0:      /* NBSP */
3012                case 0x1680:    /* OGHAM SPACE MARK */
3013                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3014                case 0x2000:    /* EN QUAD */
3015                case 0x2001:    /* EM QUAD */
3016                case 0x2002:    /* EN SPACE */
3017                case 0x2003:    /* EM SPACE */
3018                case 0x2004:    /* THREE-PER-EM SPACE */
3019                case 0x2005:    /* FOUR-PER-EM SPACE */
3020                case 0x2006:    /* SIX-PER-EM SPACE */
3021                case 0x2007:    /* FIGURE SPACE */
3022                case 0x2008:    /* PUNCTUATION SPACE */
3023                case 0x2009:    /* THIN SPACE */
3024                case 0x200A:    /* HAIR SPACE */
3025                case 0x202f:    /* NARROW NO-BREAK SPACE */
3026                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3027                case 0x3000:    /* IDEOGRAPHIC SPACE */
3028                break;
3029                }
3030              }
3031            break;
3032    
3033            case OP_NOT_VSPACE:
3034            for (i = 1; i <= min; i++)
3035              {
3036              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3037              GETCHARINC(c, eptr);
3038              switch(c)
3039                {
3040                default: break;
3041                case 0x0a:      /* LF */
3042                case 0x0b:      /* VT */
3043                case 0x0c:      /* FF */
3044                case 0x0d:      /* CR */
3045                case 0x85:      /* NEL */
3046                case 0x2028:    /* LINE SEPARATOR */
3047                case 0x2029:    /* PARAGRAPH SEPARATOR */
3048                RRETURN(MATCH_NOMATCH);
3049                }
3050              }
3051            break;
3052    
3053            case OP_VSPACE:
3054            for (i = 1; i <= min; i++)
3055              {
3056              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3057              GETCHARINC(c, eptr);
3058              switch(c)
3059                {
3060                default: RRETURN(MATCH_NOMATCH);
3061                case 0x0a:      /* LF */
3062                case 0x0b:      /* VT */
3063                case 0x0c:      /* FF */
3064                case 0x0d:      /* CR */
3065                case 0x85:      /* NEL */
3066                case 0x2028:    /* LINE SEPARATOR */
3067                case 0x2029:    /* PARAGRAPH SEPARATOR */
3068                break;
3069                }
3070              }
3071            break;
3072    
3073          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3074          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3075            {            {
# Line 2559  for (;;) Line 3138  for (;;)
3138  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3139    
3140        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3141        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3142          number of bytes present, as this was tested above. */
3143    
3144        switch(ctype)        switch(ctype)
3145          {          {
# Line 2567  for (;;) Line 3147  for (;;)
3147          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
3148            {            {
3149            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3150              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
3151                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3152                eptr++;
3153                }
3154            }            }
3155          else eptr += min;          else eptr += min;
3156          break;          break;
# Line 2576  for (;;) Line 3159  for (;;)
3159          eptr += min;          eptr += min;
3160          break;          break;
3161    
3162            /* Because of the CRLF case, we can't assume the minimum number of
3163            bytes are present in this case. */
3164    
3165            case OP_ANYNL:
3166            for (i = 1; i <= min; i++)
3167              {
3168              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3169              switch(*eptr++)
3170                {
3171                default: RRETURN(MATCH_NOMATCH);
3172                case 0x000d:
3173                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3174                break;
3175                case 0x000a:
3176                case 0x000b:
3177                case 0x000c:
3178                case 0x0085:
3179                break;
3180                }
3181              }
3182            break;
3183    
3184            case OP_NOT_HSPACE:
3185            for (i = 1; i <= min; i++)
3186              {
3187              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3188              switch(*eptr++)
3189                {
3190                default: break;
3191                case 0x09:      /* HT */
3192                case 0x20:      /* SPACE */
3193                case 0xa0:      /* NBSP */
3194                RRETURN(MATCH_NOMATCH);
3195                }
3196              }
3197            break;
3198    
3199            case OP_HSPACE:
3200            for (i = 1; i <= min; i++)
3201              {
3202              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3203              switch(*eptr++)
3204                {
3205                default: RRETURN(MATCH_NOMATCH);
3206                case 0x09:      /* HT */
3207                case 0x20:      /* SPACE */
3208                case 0xa0:      /* NBSP */
3209                break;
3210                }
3211              }
3212            break;
3213    
3214            case OP_NOT_VSPACE:
3215            for (i = 1; i <= min; i++)
3216              {
3217              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3218              switch(*eptr++)
3219                {
3220                default: break;
3221                case 0x0a:      /* LF */
3222                case 0x0b:      /* VT */
3223                case 0x0c:      /* FF */
3224                case 0x0d:      /* CR */
3225                case 0x85:      /* NEL */
3226                RRETURN(MATCH_NOMATCH);
3227                }
3228              }
3229            break;
3230    
3231            case OP_VSPACE:
3232            for (i = 1; i <= min; i++)
3233              {
3234              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3235              switch(*eptr++)
3236                {
3237                default: RRETURN(MATCH_NOMATCH);
3238                case 0x0a:      /* LF */
3239                case 0x0b:      /* VT */
3240                case 0x0c:      /* FF */
3241                case 0x0d:      /* CR */
3242                case 0x85:      /* NEL */
3243                break;
3244                }
3245              }
3246            break;
3247    
3248          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3249          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3250            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2624  for (;;) Line 3293  for (;;)
3293      if (minimize)      if (minimize)
3294        {        {
3295  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3296        if (prop_type > 0)        if (prop_type >= 0)
3297          {          {
3298          for (fi = min;; fi++)          switch(prop_type)
3299            {            {
3300            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
3301            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
3302            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
3303            GETCHARINC(c, eptr);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3304            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3305            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3306              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
3307                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3308                }
3309              /* Control never gets here */
3310    
3311              case PT_LAMP:
3312              for (fi = min;; fi++)
3313                {
3314                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3315                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3316                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3317                GETCHARINC(c, eptr);
3318                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3319                if ((prop_chartype == ucp_Lu ||
3320                     prop_chartype == ucp_Ll ||
3321                     prop_chartype == ucp_Lt) == prop_fail_result)
3322                  RRETURN(MATCH_NOMATCH);
3323                }
3324              /* Control never gets here */
3325    
3326              case PT_GC:
3327              for (fi = min;; fi++)
3328                {
3329                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3330                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3331                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3332                GETCHARINC(c, eptr);
3333                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3334                if ((prop_category == prop_value) == prop_fail_result)
3335                  RRETURN(MATCH_NOMATCH);
3336                }
3337              /* Control never gets here */
3338    
3339              case PT_PC:
3340              for (fi = min;; fi++)
3341                {
3342                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3343                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3344                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3345                GETCHARINC(c, eptr);
3346                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3347                if ((prop_chartype == prop_value) == prop_fail_result)
3348                  RRETURN(MATCH_NOMATCH);
3349                }
3350              /* Control never gets here */
3351    
3352              case PT_SC:
3353              for (fi = min;; fi++)
3354                {
3355                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3356                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3357                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3358                GETCHARINC(c, eptr);
3359                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3360                if ((prop_script == prop_value) == prop_fail_result)
3361                  RRETURN(MATCH_NOMATCH);
3362                }
3363              /* Control never gets here */
3364    
3365              default:
3366              RRETURN(PCRE_ERROR_INTERNAL);
3367            }            }
3368          }          }
3369    
# Line 2645  for (;;) Line 3374  for (;;)
3374          {          {
3375          for (fi = min;; fi++)          for (fi = min;; fi++)
3376            {            {
3377            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3378            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3379            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3380            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3381            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3382            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3383            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3384              {              {
# Line 2658  for (;;) Line 3387  for (;;)
3387                {                {
3388                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3389                }                }
3390              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3391              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3392              eptr += len;              eptr += len;
3393              }              }
# Line 2674  for (;;) Line 3403  for (;;)
3403          {          {
3404          for (fi = min;; fi++)          for (fi = min;; fi++)
3405            {            {
3406            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3407            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3408            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3409                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3410                    IS_NEWLINE(eptr)))
3411                RRETURN(MATCH_NOMATCH);
3412    
3413            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3414            switch(ctype)            switch(ctype)
3415              {              {
3416              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3417              break;              break;
3418    
3419              case OP_ANYBYTE:              case OP_ANYBYTE:
3420              break;              break;
3421    
3422                case OP_ANYNL:
3423                switch(c)
3424                  {
3425                  default: RRETURN(MATCH_NOMATCH);
3426                  case 0x000d:
3427                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3428                  break;
3429                  case 0x000a:
3430                  case 0x000b:
3431                  case 0x000c:
3432                  case 0x0085:
3433                  case 0x2028:
3434                  case 0x2029:
3435                  break;
3436                  }
3437                break;
3438    
3439                case OP_NOT_HSPACE:
3440                switch(c)
3441                  {
3442                  default: break;
3443                  case 0x09:      /* HT */
3444                  case 0x20:      /* SPACE */
3445                  case 0xa0:      /* NBSP */
3446                  case 0x1680:    /* OGHAM SPACE MARK */
3447                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3448                  case 0x2000:    /* EN QUAD */
3449                  case 0x2001:    /* EM QUAD */
3450                  case 0x2002:    /* EN SPACE */
3451                  case 0x2003:    /* EM SPACE */
3452                  case 0x2004:    /* THREE-PER-EM SPACE */
3453                  case 0x2005:    /* FOUR-PER-EM SPACE */
3454                  case 0x2006:    /* SIX-PER-EM SPACE */
3455                  case 0x2007:    /* FIGURE SPACE */
3456                  case 0x2008:    /* PUNCTUATION SPACE */
3457                  case 0x2009:    /* THIN SPACE */
3458                  case 0x200A:    /* HAIR SPACE */
3459                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3460                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3461                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3462                  RRETURN(MATCH_NOMATCH);
3463                  }
3464                break;
3465    
3466                case OP_HSPACE:
3467                switch(c)
3468                  {
3469                  default: RRETURN(MATCH_NOMATCH);
3470                  case 0x09:      /* HT */
3471                  case 0x20:      /* SPACE */
3472                  case 0xa0:      /* NBSP */
3473                  case 0x1680:    /* OGHAM SPACE MARK */
3474                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3475                  case 0x2000:    /* EN QUAD */
3476                  case 0x2001:    /* EM QUAD */
3477                  case 0x2002:    /* EN SPACE */
3478                  case 0x2003:    /* EM SPACE */
3479                  case 0x2004:    /* THREE-PER-EM SPACE */
3480                  case 0x2005:    /* FOUR-PER-EM SPACE */
3481                  case 0x2006:    /* SIX-PER-EM SPACE */
3482                  case 0x2007:    /* FIGURE SPACE */
3483                  case 0x2008:    /* PUNCTUATION SPACE */
3484                  case 0x2009:    /* THIN SPACE */
3485                  case 0x200A:    /* HAIR SPACE */
3486                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3487                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3488                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3489                  break;
3490                  }
3491                break;
3492    
3493                case OP_NOT_VSPACE:
3494                switch(c)
3495                  {
3496                  default: break;
3497                  case 0x0a:      /* LF */
3498                  case 0x0b:      /* VT */
3499                  case 0x0c:      /* FF */
3500                  case 0x0d:      /* CR */
3501                  case 0x85:      /* NEL */
3502                  case 0x2028:    /* LINE SEPARATOR */
3503                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3504                  RRETURN(MATCH_NOMATCH);
3505                  }
3506                break;
3507    
3508                case OP_VSPACE:
3509                switch(c)
3510                  {
3511                  default: RRETURN(MATCH_NOMATCH);
3512                  case 0x0a:      /* LF */
3513                  case 0x0b:      /* VT */
3514                  case 0x0c:      /* FF */
3515                  case 0x0d:      /* CR */
3516                  case 0x85:      /* NEL */
3517                  case 0x2028:    /* LINE SEPARATOR */
3518                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3519                  break;
3520                  }
3521                break;
3522    
3523              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3524              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3525                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2729  for (;;) Line 3561  for (;;)
3561          {          {
3562          for (fi = min;; fi++)          for (fi = min;; fi++)
3563            {            {
3564            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3565            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3566            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3567                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3568                RRETURN(MATCH_NOMATCH);
3569    
3570            c = *eptr++;            c = *eptr++;
3571            switch(ctype)            switch(ctype)
3572              {              {
3573              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3574              break;              break;
3575    
3576              case OP_ANYBYTE:              case OP_ANYBYTE:
3577              break;              break;
3578    
3579                case OP_ANYNL:
3580                switch(c)
3581                  {
3582                  default: RRETURN(MATCH_NOMATCH);
3583                  case 0x000d:
3584                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3585                  break;
3586                  case 0x000a:
3587                  case 0x000b:
3588                  case 0x000c:
3589                  case 0x0085:
3590                  break;
3591                  }
3592                break;
3593    
3594                case OP_NOT_HSPACE:
3595                switch(c)
3596                  {
3597                  default: break;
3598                  case 0x09:      /* HT */
3599                  case 0x20:      /* SPACE */
3600                  case 0xa0:      /* NBSP */
3601                  RRETURN(MATCH_NOMATCH);
3602                  }
3603                break;
3604    
3605                case OP_HSPACE:
3606                switch(c)
3607                  {
3608                  default: RRETURN(MATCH_NOMATCH);
3609                  case 0x09:      /* HT */
3610                  case 0x20:      /* SPACE */
3611                  case 0xa0:      /* NBSP */
3612                  break;
3613                  }
3614                break;
3615    
3616                case OP_NOT_VSPACE:
3617                switch(c)
3618                  {
3619                  default: break;
3620                  case 0x0a:      /* LF */
3621                  case 0x0b:      /* VT */
3622                  case 0x0c:      /* FF */
3623                  case 0x0d:      /* CR */
3624                  case 0x85:      /* NEL */
3625                  RRETURN(MATCH_NOMATCH);
3626                  }
3627                break;
3628    
3629                case OP_VSPACE:
3630                switch(c)
3631                  {
3632                  default: RRETURN(MATCH_NOMATCH);
3633                  case 0x0a:      /* LF */
3634                  case 0x0b:      /* VT */
3635                  case 0x0c:      /* FF */
3636                  case 0x0d:      /* CR */
3637                  case 0x85:      /* NEL */
3638                  break;
3639                  }
3640                break;
3641    
3642              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3643              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3644              break;              break;
# Line 2774  for (;;) Line 3671  for (;;)
3671        /* Control never gets here */        /* Control never gets here */
3672        }        }
3673    
3674      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3675      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3676      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3677    
# Line 2783  for (;;) Line 3680  for (;;)
3680        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
3681    
3682  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3683        if (prop_type > 0)        if (prop_type >= 0)
3684          {          {
3685          for (i = min; i < max; i++)          switch(prop_type)
3686            {            {
3687            int len = 1;            case PT_ANY:
3688            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
3689            GETCHARLEN(c, eptr, len);              {
3690            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
3691            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
3692              break;              GETCHARLEN(c, eptr, len);
3693            eptr+= len;              if (prop_fail_result) break;
3694                eptr+= len;
3695                }
3696              break;
3697    
3698              case PT_LAMP:
3699              for (i = min; i < max; i++)
3700                {
3701                int len = 1;
3702                if (eptr >= md->end_subject) break;
3703                GETCHARLEN(c, eptr, len);
3704                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3705                if ((prop_chartype == ucp_Lu ||
3706                     prop_chartype == ucp_Ll ||
3707                     prop_chartype == ucp_Lt) == prop_fail_result)
3708                  break;
3709                eptr+= len;
3710                }
3711              break;
3712    
3713              case PT_GC:
3714              for (i = min; i < max; i++)
3715                {
3716                int len = 1;
3717                if (eptr >= md->end_subject) break;
3718                GETCHARLEN(c, eptr, len);
3719                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3720                if ((prop_category == prop_value) == prop_fail_result)
3721                  break;
3722                eptr+= len;
3723                }
3724              break;
3725    
3726              case PT_PC:
3727              for (i = min; i < max; i++)
3728                {
3729                int len = 1;
3730                if (eptr >= md->end_subject) break;
3731                GETCHARLEN(c, eptr, len);
3732                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3733                if ((prop_chartype == prop_value) == prop_fail_result)
3734                  break;
3735                eptr+= len;
3736                }
3737              break;
3738    
3739              case PT_SC:
3740              for (i = min; i < max; i++)
3741                {
3742                int len = 1;
3743                if (eptr >= md->end_subject) break;
3744                GETCHARLEN(c, eptr, len);
3745                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3746                if ((prop_script == prop_value) == prop_fail_result)
3747                  break;
3748                eptr+= len;
3749                }
3750              break;
3751            }            }
3752    
3753          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3754    
3755            if (possessive) continue;
3756          for(;;)          for(;;)
3757            {            {
3758            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3759            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3760            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3761            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
3762            }            }
3763          }          }
3764    
# Line 2816  for (;;) Line 3771  for (;;)
3771            {            {
3772            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3773            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3774            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3775            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3776            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3777              {              {
# Line 2825  for (;;) Line 3780  for (;;)
3780                {                {
3781                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3782                }                }
3783              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3784              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3785              eptr += len;              eptr += len;
3786              }              }
# Line 2833  for (;;) Line 3788  for (;;)
3788    
3789          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3790    
3791            if (possessive) continue;
3792          for(;;)          for(;;)
3793            {            {
3794            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3795            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3796            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3797            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
3798              {              {
3799              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
3800              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
3801                {                {
3802                  BACKCHAR(eptr);
3803                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3804                }                }
3805              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3806              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3807              eptr--;              eptr--;
3808              }              }
# Line 2864  for (;;) Line 3820  for (;;)
3820          switch(ctype)          switch(ctype)
3821            {            {
3822            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is unlimited  
           we don't need it, so we repeat the non-UTF8 code. This is probably  
           worth it, because .* is quite a common idiom. */  
   
3823            if (max < INT_MAX)            if (max < INT_MAX)
3824              {              {
3825              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
3826                {                {
3827                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3828                  {                  {
3829                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3830                  eptr++;                  eptr++;
3831                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3832                  }                  }
# Line 2884  for (;;) Line 3835  for (;;)
3835                {                {
3836                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3837                  {                  {
3838                    if (eptr >= md->end_subject) break;
3839                  eptr++;                  eptr++;
3840                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3841                  }                  }
# Line 2898  for (;;) Line 3850  for (;;)
3850                {                {
3851                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3852                  {                  {
3853                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3854                  eptr++;                  eptr++;
3855                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3856                  }                  }
               break;  
3857                }                }
3858              else              else
3859                {                {
3860                c = max - min;                eptr = md->end_subject;
               if (c > md->end_subject - eptr) c = md->end_subject - eptr;  
               eptr += c;  
3861                }                }
3862              }              }
3863            break;            break;
# Line 2916  for (;;) Line 3866  for (;;)
3866    
3867            case OP_ANYBYTE:            case OP_ANYBYTE:
3868            c = max - min;            c = max - min;
3869            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3870                c = md->end_subject - eptr;
3871            eptr += c;            eptr += c;
3872            break;            break;
3873    
3874              case OP_ANYNL:
3875              for (i = min; i < max; i++)
3876                {
3877                int len = 1;
3878                if (eptr >= md->end_subject) break;
3879                GETCHARLEN(c, eptr, len);
3880                if (c == 0x000d)
3881                  {
3882                  if (++eptr >= md->end_subject) break;
3883                  if (*eptr == 0x000a) eptr++;
3884                  }
3885                else
3886                  {
3887                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3888                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3889                    break;
3890                  eptr += len;
3891                  }
3892                }
3893              break;
3894    
3895              case OP_NOT_HSPACE:
3896              case OP_HSPACE:
3897              for (i = min; i < max; i++)
3898                {
3899                BOOL gotspace;
3900                int len = 1;
3901                if (eptr >= md->end_subject) break;
3902                GETCHARLEN(c, eptr, len);
3903                switch(c)
3904                  {
3905                  default: gotspace = FALSE; break;
3906                  case 0x09:      /* HT */
3907                  case 0x20:      /* SPACE */
3908                  case 0xa0:      /* NBSP */
3909                  case 0x1680:    /* OGHAM SPACE MARK */
3910                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3911                  case 0x2000:    /* EN QUAD */
3912                  case 0x2001:    /* EM QUAD */
3913                  case 0x2002:    /* EN SPACE */
3914                  case 0x2003:    /* EM SPACE */
3915                  case 0x2004:    /* THREE-PER-EM SPACE */
3916                  case 0x2005:    /* FOUR-PER-EM SPACE */
3917                  case 0x2006:    /* SIX-PER-EM SPACE */
3918                  case 0x2007:    /* FIGURE SPACE */
3919                  case 0x2008:    /* PUNCTUATION SPACE */
3920                  case 0x2009:    /* THIN SPACE */
3921                  case 0x200A:    /* HAIR SPACE */
3922                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3923                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3924                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3925                  gotspace = TRUE;
3926                  break;
3927                  }
3928                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3929                eptr += len;
3930                }
3931              break;
3932    
3933              case OP_NOT_VSPACE:
3934              case OP_VSPACE:
3935              for (i = min; i < max; i++)
3936                {
3937                BOOL gotspace;
3938                int len = 1;
3939                if (eptr >= md->end_subject) break;
3940                GETCHARLEN(c, eptr, len);
3941                switch(c)
3942                  {
3943                  default: gotspace = FALSE; break;
3944                  case 0x0a:      /* LF */
3945                  case 0x0b:      /* VT */
3946                  case 0x0c:      /* FF */
3947                  case 0x0d:      /* CR */
3948                  case 0x85:      /* NEL */
3949                  case 0x2028:    /* LINE SEPARATOR */
3950                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3951                  gotspace = TRUE;
3952                  break;
3953                  }
3954                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3955                eptr += len;
3956                }
3957              break;
3958    
3959            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3960            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3961              {              {
# Line 2992  for (;;) Line 4028  for (;;)
4028    
4029          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4030    
4031            if (possessive) continue;
4032          for(;;)          for(;;)
4033            {            {
4034            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4035            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4036            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4037            BACKCHAR(eptr);            BACKCHAR(eptr);
4038            }            }
4039          }          }
4040        else        else
4041  #endif  #endif  /* SUPPORT_UTF8 */
4042    
4043        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4044          {          {
# Line 3012  for (;;) Line 4049  for (;;)
4049              {              {
4050              for (i = min; i < max; i++)              for (i = min; i < max; i++)
4051                {                {
4052                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4053                eptr++;                eptr++;
4054                }                }
4055              break;              break;
# Line 3021  for (;;) Line 4058  for (;;)
4058    
4059            case OP_ANYBYTE:            case OP_ANYBYTE:
4060            c = max - min;            c = max - min;
4061            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
4062                c = md->end_subject - eptr;
4063            eptr += c;            eptr += c;
4064            break;            break;
4065    
4066              case OP_ANYNL:
4067              for (i = min; i < max; i++)
4068                {
4069                if (eptr >= md->end_subject) break;
4070                c = *eptr;
4071                if (c == 0x000d)
4072                  {
4073                  if (++eptr >= md->end_subject) break;
4074                  if (*eptr == 0x000a) eptr++;
4075                  }
4076                else
4077                  {
4078                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4079                    break;
4080                  eptr++;
4081                  }
4082                }
4083              break;
4084    
4085              case OP_NOT_HSPACE:
4086              for (i = min; i < max; i++)
4087                {
4088                if (eptr >= md->end_subject) break;
4089                c = *eptr;
4090                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4091                eptr++;
4092                }
4093              break;
4094    
4095              case OP_HSPACE:
4096              for (i = min; i < max; i++)
4097                {
4098                if (eptr >= md->end_subject) break;
4099                c = *eptr;
4100                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4101                eptr++;
4102                }
4103              break;
4104    
4105              case OP_NOT_VSPACE:
4106              for (i = min; i < max; i++)
4107                {
4108                if (eptr >= md->end_subject) break;
4109                c = *eptr;
4110                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4111                  break;
4112                eptr++;
4113                }
4114              break;
4115    
4116              case OP_VSPACE:
4117              for (i = min; i < max; i++)
4118                {
4119                if (eptr >= md->end_subject) break;
4120                c = *eptr;
4121                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4122                  break;
4123                eptr++;
4124                }
4125              break;
4126    
4127            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4128            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4129              {              {
# Line 3085  for (;;) Line 4184  for (;;)
4184    
4185          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4186    
4187            if (possessive) continue;
4188          while (eptr >= pp)          while (eptr >= pp)
4189            {            {
4190            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4191            eptr--;            eptr--;
4192            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4193            }            }
# Line 3099  for (;;) Line 4199  for (;;)
4199        }        }
4200      /* Control never gets here */      /* Control never gets here */
4201    
4202      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
4203      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
4204    
4205      default:      default:
4206      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
4207      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4208      }      }
4209    
4210    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3115  for (;;) Line 4213  for (;;)
4213    
4214    }             /* End of main loop */    }             /* End of main loop */
4215  /* Control never reaches here */  /* Control never reaches here */
4216    
4217    
4218    /* When compiling to use the heap rather than the stack for recursive calls to
4219    match(), the RRETURN() macro jumps here. The number that is saved in
4220    frame->Xwhere indicates which label we actually want to return to. */
4221    
4222    #ifdef NO_RECURSE
4223    #define LBL(val) case val: goto L_RM##val;
4224    HEAP_RETURN:
4225    switch (frame->Xwhere)
4226      {
4227      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4228      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4229      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4230      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4231      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4232      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4233      default:
4234      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4235      return PCRE_ERROR_INTERNAL;
4236      }
4237    #undef LBL
4238    #endif  /* NO_RECURSE */
4239  }  }
4240    
4241    
# Line 3127  Undefine all the macros that were define Line 4248  Undefine all the macros that were define
4248  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4249  #undef eptr  #undef eptr
4250  #undef ecode  #undef ecode
4251    #undef mstart
4252  #undef offset_top  #undef offset_top
4253  #undef ims  #undef ims
4254  #undef eptrb  #undef eptrb
# Line 3144  Undefine all the macros that were define Line 4266  Undefine all the macros that were define
4266    
4267  #undef cur_is_word  #undef cur_is_word
4268  #undef condition  #undef condition
 #undef minimize  
4269  #undef prev_is_word  #undef prev_is_word
4270    
4271  #undef original_ims  #undef original_ims
# Line 3200  Returns:          > 0 => success; value Line 4321  Returns:          > 0 => success; value
4321                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4322  */  */
4323    
4324  PCRE_EXPORT int  PCRE_EXP_DEFN int
4325  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4326    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4327    int offsetcount)    int offsetcount)
4328  {  {
4329  int rc, resetcount, ocount;  int rc, resetcount, ocount;
4330  int first_byte = -1;  int first_byte = -1;
4331  int req_byte = -1;  int req_byte = -1;
4332  int req_byte2 = -1;  int req_byte2 = -1;
4333  unsigned long int ims = 0;  int newline;
4334    unsigned long int ims;
4335  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
4336  BOOL anchored;  BOOL anchored;
4337  BOOL startline;  BOOL startline;
4338  BOOL firstline;  BOOL firstline;
4339  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
4340  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
4341    BOOL utf8;
4342  match_data match_block;  match_data match_block;
4343    match_data *md = &match_block;
4344  const uschar *tables;  const uschar *tables;
4345  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4346  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4347  const uschar *end_subject;  USPTR end_subject;
4348  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
4349    
4350  pcre_study_data internal_study;  pcre_study_data internal_study;
4351  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3241  if (offsetcount < 0) return PCRE_ERROR_B Line 4365  if (offsetcount < 0) return PCRE_ERROR_B
4365  the default values. */  the default values. */
4366    
4367  study = NULL;  study = NULL;
4368  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
4369  match_block.callout_data = NULL;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4370    md->callout_data = NULL;
4371    
4372  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
4373    
# Line 3254  if (extra_data != NULL) Line 4379  if (extra_data != NULL)
4379    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4380      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
4381    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4382      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
4383      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4384        md->match_limit_recursion = extra_data->match_limit_recursion;
4385    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4386      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
4387    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4388    }    }
4389    
# Line 3286  firstline = (re->options & PCRE_FIRSTLIN Line 4413  firstline = (re->options & PCRE_FIRSTLIN
4413    
4414  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
4415    
4416  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
4417    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
4418    
4419  match_block.start_subject = (const uschar *)subject;  md->start_subject = (USPTR)subject;
4420  match_block.start_offset = start_offset;  md->start_offset = start_offset;
4421  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
4422  end_subject = match_block.end_subject;  end_subject = md->end_subject;
4423    
4424  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4425  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4426    
4427  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4428  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4429  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4430  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
4431  match_block.hitend = FALSE;  md->hitend = FALSE;
4432    
4433    md->recursive = NULL;                   /* No recursion at top level */
4434    
4435    md->lcc = tables + lcc_offset;
4436    md->ctypes = tables + ctypes_offset;
4437    
4438  match_block.recursive = NULL;                   /* No recursion at top level */  /* Handle different types of newline. The three bits give eight cases. If
4439    nothing is set at run time, whatever was used at compile time applies. */
4440    
4441  match_block.lcc = tables + lcc_offset;  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4442  match_block.ctypes = tables + ctypes_offset;         PCRE_NEWLINE_BITS)
4443      {
4444      case 0: newline = NEWLINE; break;   /* Compile-time default */
4445      case PCRE_NEWLINE_CR: newline = '\r'; break;
4446      case PCRE_NEWLINE_LF: newline = '\n'; break;
4447      case PCRE_NEWLINE_CR+
4448           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4449      case PCRE_NEWLINE_ANY: newline = -1; break;
4450      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4451      default: return PCRE_ERROR_BADNEWLINE;
4452      }
4453    
4454    if (newline == -2)
4455      {
4456      md->nltype = NLTYPE_ANYCRLF;
4457      }
4458    else if (newline < 0)
4459      {
4460      md->nltype = NLTYPE_ANY;
4461      }
4462    else
4463      {
4464      md->nltype = NLTYPE_FIXED;
4465      if (newline > 255)
4466        {
4467        md->nllen = 2;
4468        md->nl[0] = (newline >> 8) & 255;
4469        md->nl[1] = newline & 255;
4470        }
4471      else
4472        {
4473        md->nllen = 1;
4474        md->nl[0] = newline;
4475        }
4476      }
4477    
4478  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4479  moment. */  moment. */
4480    
4481  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4482    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4483    
4484  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4485  back the character offset. */  back the character offset. */
4486    
4487  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4488  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4489    {    {
4490    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4491      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3350  ocount = offsetcount - (offsetcount % 3) Line 4517  ocount = offsetcount - (offsetcount % 3)
4517  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
4518    {    {
4519    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
4520    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4521    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4522    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
4523    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
4524    }    }
4525  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
4526    
4527  match_block.offset_end = ocount;  md->offset_end = ocount;
4528  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
4529  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
4530  match_block.capture_last = -1;  md->capture_last = -1;
4531    
4532  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
4533  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3373  if (resetcount > offsetcount) resetcount Line 4540  if (resetcount > offsetcount) resetcount
4540  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
4541  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
4542    
4543  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
4544    {    {
4545    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
4546    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
4547    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
4548    }    }
# Line 3392  if (!anchored) Line 4559  if (!anchored)
4559      {      {
4560      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4561      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4562        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
4563      }      }
4564    else    else
4565      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3410  if ((re->options & PCRE_REQCHSET) != 0) Line 4577  if ((re->options & PCRE_REQCHSET) != 0)
4577    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4578    }    }
4579    
4580    
4581    /* ==========================================================================*/
4582    
4583  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4584  the loop runs just once. */  the loop runs just once. */
4585    
4586  do  for(;;)
4587    {    {
4588    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4589      USPTR new_start_match;
4590    
4591    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4592    
4593    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
4594      {      {
4595      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
4596      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4597      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4598      }      }
4599    
4600    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
4601    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
4602    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
4603    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
4604    */    the match fails at the newline, later code breaks this loop. */
4605    
4606    if (firstline)    if (firstline)
4607      {      {
4608      const uschar *t = start_match;      USPTR t = start_match;
4609      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4610      end_subject = t;      end_subject = t;
4611      }      }
4612    
# Line 3445  do Line 4616  do
4616      {      {
4617      if (first_byte_caseless)      if (first_byte_caseless)
4618        while (start_match < end_subject &&        while (start_match < end_subject &&
4619               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4620          start_match++;          start_match++;
4621      else      else
4622        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4623          start_match++;          start_match++;
4624      }      }
4625    
4626    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4627    
4628    else if (startline)    else if (startline)
4629      {      {
4630      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4631        {        {
4632        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4633            start_match++;
4634    
4635          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4636          and we are now at a LF, advance the match position by one more character.
4637          */
4638    
4639          if (start_match[-1] == '\r' &&
4640               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4641               start_match < end_subject &&
4642               *start_match == '\n')
4643          start_match++;          start_match++;
4644        }        }
4645      }      }
# Line 3480  do Line 4661  do
4661    
4662  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4663    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4664    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4665    printf("\n");    printf("\n");
4666  #endif  #endif
4667    
# Line 3494  do Line 4675  do
4675    
4676    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4677    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4678    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4679    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4680    
4681    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4682    */    */
4683    
4684    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4685        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4686        !match_block.partial)        !md->partial)
4687      {      {
4688      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4689    
4690      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
4691      place we found it at last time. */      place we found it at last time. */
# Line 3527  do Line 4708  do
4708            }            }
4709          }          }
4710    
4711        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4712          forcing a match failure. */
4713    
4714        if (p >= end_subject) break;        if (p >= end_subject)
4715            {
4716            rc = MATCH_NOMATCH;
4717            break;
4718            }
4719    
4720        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4721        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3539  do Line 4725  do
4725        }        }
4726      }      }
4727    
4728    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
4729    we just need to set up the whole thing as substring 0 before returning. If  
4730    there were too many extractions, set the return code to zero. In the case    md->start_match_ptr = start_match;
4731    where we had to get some local store to hold offsets for backreferences, copy    md->match_call_count = 0;
4732    those back references that we can. In this case there need not be overflow    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4733    
4734    if (rc == MATCH_NOMATCH)    switch(rc)
4735      {      {
4736      if (firstline && *start_match == NEWLINE) break;      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4737      start_match++;      exactly like PRUNE. */
4738    
4739        case MATCH_NOMATCH:
4740        case MATCH_PRUNE:
4741        case MATCH_THEN:
4742        new_start_match = start_match + 1;
4743  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4744      if (match_block.utf8)      if (utf8)
4745        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4746          start_match++;          new_start_match++;
4747  #endif  #endif
4748      continue;      break;
     }  
4749    
4750    if (rc != MATCH_MATCH)      /* SKIP passes back the next starting point explicitly. */
4751      {  
4752      DPRINTF((">>>> error: returning %d\n", rc));      case MATCH_SKIP:
4753      return rc;      new_start_match = md->start_match_ptr;
4754        break;
4755    
4756        /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4757    
4758        case MATCH_COMMIT:
4759        rc = MATCH_NOMATCH;
4760        goto ENDLOOP;
4761    
4762        /* Any other return is some kind of error. */
4763    
4764        default:
4765        goto ENDLOOP;
4766      }      }
4767    
4768    /* We have a match! Copy the offset information from temporary store if    /* Control reaches here for the various types of "no match at this point"
4769    necessary */    result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4770    
4771      rc = MATCH_NOMATCH;
4772    
4773      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4774      newline in the subject (though it may continue over the newline). Therefore,
4775      if we have just failed to match, starting at a newline, do not continue. */
4776    
4777      if (firstline && IS_NEWLINE(start_match)) break;
4778    
4779      /* Advance to new matching position */
4780    
4781      start_match = new_start_match;
4782    
4783      /* Break the loop if the pattern is anchored or if we have passed the end of
4784      the subject. */
4785    
4786      if (anchored || start_match > end_subject) break;
4787    
4788      /* If we have just passed a CR and the newline option is CRLF or ANY or
4789      ANYCRLF, and we are now at a LF, advance the match position by one more
4790      character. */
4791    
4792      if (start_match[-1] == '\r' &&
4793           (md->nltype == NLTYPE_ANY ||
4794            md->nltype == NLTYPE_ANYCRLF ||
4795            md->nllen == 2) &&
4796           start_match < end_subject &&
4797           *start_match == '\n')
4798        start_match++;
4799    
4800      }   /* End of for(;;) "bumpalong" loop */
4801    
4802    /* ==========================================================================*/
4803    
4804    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4805    conditions is true:
4806    
4807    (1) The pattern is anchored or the match was failed by (*COMMIT);
4808    
4809    (2) We are past the end of the subject;
4810    
4811    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4812        this option requests that a match occur at or before the first newline in
4813        the subject.
4814    
4815    When we have a match and the offset vector is big enough to deal with any
4816    backreferences, captured substring offsets will already be set up. In the case
4817    where we had to get some local store to hold offsets for backreference
4818    processing, copy those that we can. In this case there need not be overflow if
4819    certain parts of the pattern were not used, even though there are more
4820    capturing parentheses than vector slots. */
4821    
4822    ENDLOOP:
4823    
4824    if (rc == MATCH_MATCH)
4825      {
4826    if (using_temporary_offsets)    if (using_temporary_offsets)
4827      {      {
4828      if (offsetcount >= 4)      if (offsetcount >= 4)
4829        {        {
4830        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4831          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4832        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4833        }        }