/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include <config.h>
47    #endif
48    
49    #define NLBLOCK md             /* Block containing newline information */
50    #define PSSTART start_subject  /* Field containing processed string start */
51    #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    /* Undefine some potentially clashing cpp symbols */
56    
57  /* Structure for building a chain of data that actually lives on the  #undef min
58  stack, for holding the values of the subject pointer at the start of each  #undef max
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 101  Returns:     nothing
101  static void  static void
102  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
103  {  {
104  int c;  unsigned int c;
105  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
106  while (length-- > 0)  while (length-- > 0)
107    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 128  Returns:      TRUE if matched
128  */  */
129    
130  static BOOL  static BOOL
131  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
132    unsigned long int ims)    unsigned long int ims)
133  {  {
134  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
135    
136  #ifdef DEBUG  #ifdef DEBUG
137  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 169  return TRUE;
169  ****************************************************************************  ****************************************************************************
170                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
171    
172  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
173  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
174  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
175  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
176  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
177    fine.
178  It turns out that on non-Unix systems there are problems with programs that  
179  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
180  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
181  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
182    been known for decades.) So....
183    
184  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
185  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
186  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
187  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
188  always used to.  always used to.
189    
190    The original heap-recursive code used longjmp(). However, it seems that this
191    can be very slow on some operating systems. Following a suggestion from Stan
192    Switzer, the use of longjmp() has been abolished, at the cost of having to
193    provide a unique number for each call to RMATCH. There is no way of generating
194    a sequence of numbers at compile time in C. I have given them names, to make
195    them stand out more clearly.
196    
197    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
198    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
199    tests. Furthermore, not using longjmp() means that local dynamic variables
200    don't have indeterminate values; this has meant that the frame size can be
201    reduced because the result can be "passed back" by straight setting of the
202    variable instead of being passed in the frame.
203  ****************************************************************************  ****************************************************************************
204  ***************************************************************************/  ***************************************************************************/
205    
206    
207  /* These versions of the macros use the stack, as normal */  /* Numbers for RMATCH calls */
208    
209    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
210           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
211           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
212           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
213           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50 };
214    
215    
216    /* These versions of the macros use the stack, as normal. There are debugging
217    versions and production versions. Note that the "rw" argument of RMATCH isn't
218    actuall used in this definition. */
219    
220  #ifndef NO_RECURSE  #ifndef NO_RECURSE
221  #define REGISTER register  #define REGISTER register
222  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
223    #ifdef DEBUG
224    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
225      { \
226      printf("match() called in line %d\n", __LINE__); \
227      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
228      printf("to line %d\n", __LINE__); \
229      }
230    #define RRETURN(ra) \
231      { \
232      printf("match() returned %d from line %d ", ra, __LINE__); \
233      return ra; \
234      }
235    #else
236    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
237      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
238  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
239    #endif
240    
241  #else  #else
242    
243    
244  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
245  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
246  match(), which never changes. */  argument of match(), which never changes. */
247    
248  #define REGISTER  #define REGISTER
249    
250  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
251    {\    {\
252    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
253    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
254      {\    newframe->Xeptr = ra;\
255      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
256      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
257      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
258      newframe->Xims = re;\    newframe->Xims = re;\
259      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
260      newframe->Xflags = rg;\    newframe->Xflags = rg;\
261      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
262      frame = newframe;\    newframe->Xprevframe = frame;\
263      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
264      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
265      }\    goto HEAP_RECURSE;\
266    else\    L_##rw:\
267      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
268    }    }
269    
270  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 235  match(), which never changes. */ Line 274  match(), which never changes. */
274    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
275    if (frame != NULL)\    if (frame != NULL)\
276      {\      {\
277      frame->Xresult = ra;\      rrc = ra;\
278      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
279      }\      }\
280    return ra;\    return ra;\
281    }    }
# Line 252  typedef struct heapframe { Line 290  typedef struct heapframe {
290    
291    const uschar *Xeptr;    const uschar *Xeptr;
292    const uschar *Xecode;    const uschar *Xecode;
293      const uschar *Xmstart;
294    int Xoffset_top;    int Xoffset_top;
295    long int Xims;    long int Xims;
296    eptrblock *Xeptrb;    eptrblock *Xeptrb;
297    int Xflags;    int Xflags;
298      unsigned int Xrdepth;
299    
300    /* Function local variables */    /* Function local variables */
301    
# Line 271  typedef struct heapframe { Line 311  typedef struct heapframe {
311    
312    BOOL Xcur_is_word;    BOOL Xcur_is_word;
313    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
314    BOOL Xprev_is_word;    BOOL Xprev_is_word;
315    
316    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
317    
318  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
319    int Xprop_type;    int Xprop_type;
320      int Xprop_value;
321    int Xprop_fail_result;    int Xprop_fail_result;
322    int Xprop_category;    int Xprop_category;
323    int Xprop_chartype;    int Xprop_chartype;
324    int Xprop_othercase;    int Xprop_script;
325    int Xprop_test_against;    int Xoclength;
326    int *Xprop_test_variable;    uschar Xocchars[8];
327  #endif  #endif
328    
329    int Xctype;    int Xctype;
330    int Xfc;    unsigned int Xfc;
331    int Xfi;    int Xfi;
332    int Xlength;    int Xlength;
333    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 341  typedef struct heapframe {
341    
342    eptrblock Xnewptrb;    eptrblock Xnewptrb;
343    
344    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
345    
346    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
347    
348  } heapframe;  } heapframe;
349    
# Line 320  typedef struct heapframe { Line 359  typedef struct heapframe {
359  *         Match from current position            *  *         Match from current position            *
360  *************************************************/  *************************************************/
361    
362  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
363  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
364  same response.  same response.
365    
# Line 333  performance. Tests using gcc on a SPARC Line 369  performance. Tests using gcc on a SPARC
369  made performance worse.  made performance worse.
370    
371  Arguments:  Arguments:
372     eptr        pointer in subject     eptr        pointer to current character in subject
373     ecode       position in code     ecode       pointer to current position in compiled code
374       mstart      pointer to the current match start position (can be modified
375                     by encountering \K)
376     offset_top  current top pointer     offset_top  current top pointer
377     md          pointer to "static" info for the match     md          pointer to "static" info for the match
378     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 380  Arguments:
380                   brackets - for testing for empty matches                   brackets - for testing for empty matches
381     flags       can contain     flags       can contain
382                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
383                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
384                       group that can match an empty string
385       rdepth      the recursion depth
386    
387  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
388                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
389                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
390                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
391  */  */
392    
393  static int  static int
394  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
395    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
396    int flags)    int flags, unsigned int rdepth)
397  {  {
398  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
399  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
400  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
401    
402    register int  rrc;         /* Returns from recursive calls */
403    register int  i;           /* Used for loops not involving calls to RMATCH() */
404    register unsigned int c;   /* Character values not kept over RMATCH() calls */
405    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
406    
407  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
 register int  i;      /* Used for loops not involving calls to RMATCH() */  
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
408    
409  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
410  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 377  frame->Xprevframe = NULL;            /* Line 419  frame->Xprevframe = NULL;            /*
419    
420  frame->Xeptr = eptr;  frame->Xeptr = eptr;
421  frame->Xecode = ecode;  frame->Xecode = ecode;
422    frame->Xmstart = mstart;
423  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
424  frame->Xims = ims;  frame->Xims = ims;
425  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
426  frame->Xflags = flags;  frame->Xflags = flags;
427    frame->Xrdepth = rdepth;
428    
429  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
430    
# Line 390  HEAP_RECURSE: Line 434  HEAP_RECURSE:
434    
435  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
436  #define ecode              frame->Xecode  #define ecode              frame->Xecode
437    #define mstart             frame->Xmstart
438  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
439  #define ims                frame->Xims  #define ims                frame->Xims
440  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
441  #define flags              frame->Xflags  #define flags              frame->Xflags
442    #define rdepth             frame->Xrdepth
443    
444  /* Ditto for the local variables */  /* Ditto for the local variables */
445    
# Line 411  HEAP_RECURSE: Line 457  HEAP_RECURSE:
457    
458  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
459  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
460  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
461    
462  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
463    
464  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
465  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
466    #define prop_value         frame->Xprop_value
467  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
468  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
469  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
470  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
471  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
472  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
473  #endif  #endif
474    
475  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 493  HEAP_RECURSE:
493  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
494  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
495    
496  #else  #else         /* NO_RECURSE not defined */
497  #define fi i  #define fi i
498  #define fc c  #define fc c
499    
500    
501  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
502  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
503  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
504  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
505  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
506  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
507  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
508  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
509  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
510                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
511  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
512                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
513  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
514  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
515  BOOL prev_is_word;  BOOL prev_is_word;
516    
517  unsigned long int original_ims;  unsigned long int original_ims;
518    
519  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
520  int prop_type;  int prop_type;
521    int prop_value;
522  int prop_fail_result;  int prop_fail_result;
523  int prop_category;  int prop_category;
524  int prop_chartype;  int prop_chartype;
525  int prop_othercase;  int prop_script;
526  int prop_test_against;  int oclength;
527  int *prop_test_variable;  uschar occhars[8];
528  #endif  #endif
529    
530  int ctype;  int ctype;
# Line 493  int save_offset1, save_offset2, save_off Line 539  int save_offset1, save_offset2, save_off
539  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
540    
541  eptrblock newptrb;  eptrblock newptrb;
542  #endif  #endif     /* NO_RECURSE */
543    
544  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
545  variables. */  variables. */
546    
547  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
548    prop_value = 0;
549  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
550  #endif  #endif
551    
552  /* OK, now we can get on with the real code of the function. Recursion is  
553  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
554  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
555  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
556  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
557  performance when true recursion is being used. */  
558    TAIL_RECURSE:
559    
560    /* OK, now we can get on with the real code of the function. Recursive calls
561    are specified by the macro RMATCH and RRETURN is used to return. When
562    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
563    and a "return", respectively (possibly with some debugging if DEBUG is
564    defined). However, RMATCH isn't like a function call because it's quite a
565    complicated macro. It has to be used in one particular way. This shouldn't,
566    however, impact performance when true recursion is being used. */
567    
568    #ifdef SUPPORT_UTF8
569    utf8 = md->utf8;       /* Local copy of the flag */
570    #else
571    utf8 = FALSE;
572    #endif
573    
574    /* First check that we haven't called match() too many times, or that we
575    haven't exceeded the recursive call limit. */
576    
577  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
578    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
579    
580  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
581    
582  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
583  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
584  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
585  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
586    When match() is called in other circumstances, don't add to the chain. The
587    match_cbegroup flag must NOT be used with tail recursion, because the memory
588    block that is used is on the stack, so a new one may be required for each
589    match(). */
590    
591  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
592    {    {
   newptrb.epb_prev = eptrb;  
593    newptrb.epb_saved_eptr = eptr;    newptrb.epb_saved_eptr = eptr;
594      newptrb.epb_prev = eptrb;
595    eptrb = &newptrb;    eptrb = &newptrb;
596    }    }
597    
598  /* Now start processing the operations. */  /* Now start processing the opcodes. */
599    
600  for (;;)  for (;;)
601    {    {
602      minimize = possessive = FALSE;
603    op = *ecode;    op = *ecode;
   minimize = FALSE;  
604    
605    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
606    matching at least one subject character. */    matching at least one subject character. */
607    
608    if (md->partial &&    if (md->partial &&
609        eptr >= md->end_subject &&        eptr >= md->end_subject &&
610        eptr > md->start_match)        eptr > mstart)
611      md->hitend = TRUE;      md->hitend = TRUE;
612    
613    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
614      {      {
615      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
616        the current subject position in the working slot at the top of the vector.
617      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
618      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
619        reference inside the group.
620      if (number > EXTRACT_BASIC_MAX)  
621        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
622        values of the final offsets, in case they were set by a previous iteration
623        of the same bracket.
624    
625        If there isn't enough space in the offset vector, treat this as if it were
626        a non-capturing bracket. Don't worry about setting the flag for the error
627        case here; that is handled in the code for KET. */
628    
629        case OP_CBRA:
630        case OP_SCBRA:
631        number = GET2(ecode, 1+LINK_SIZE);
632      offset = number << 1;      offset = number << 1;
633    
634  #ifdef DEBUG  #ifdef DEBUG
635      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
636        printf("subject=");
637      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
638      printf("\n");      printf("\n");
639  #endif  #endif
# Line 584  for (;;) Line 648  for (;;)
648        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
649        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
650    
651          flags = (op == OP_SCBRA)? match_cbegroup : 0;
652        do        do
653          {          {
654          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
655            match_isgroup);            ims, eptrb, flags, RM1);
656          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
657          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
658          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 603  for (;;) Line 668  for (;;)
668        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
669        }        }
670    
671      /* Insufficient room for saving captured contents */      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
672        as a non-capturing bracket. */
673    
674      else op = OP_BRA;      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
675      }      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
676    
677    /* Other types of node can be handled by a switch */      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
678    
679    switch(op)      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
680      {      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
681      case OP_BRA:     /* Non-capturing bracket: optimized */  
682      DPRINTF(("start bracket 0\n"));      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
683      do      final alternative within the brackets, we would return the result of a
684        recursive call to match() whatever happened. We can reduce stack usage by
685        turning this into a tail recursion, except in the case when match_cbegroup
686        is set.*/
687    
688        case OP_BRA:
689        case OP_SBRA:
690        DPRINTF(("start non-capturing bracket\n"));
691        flags = (op >= OP_SBRA)? match_cbegroup : 0;
692        for (;;)
693        {        {
694        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
695          match_isgroup);          {
696            if (flags == 0)    /* Not a possibly empty group */
697              {
698              ecode += _pcre_OP_lengths[*ecode];
699              DPRINTF(("bracket 0 tail recursion\n"));
700              goto TAIL_RECURSE;
701              }
702    
703            /* Possibly empty group; can't use tail recursion. */
704    
705            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
706              eptrb, flags, RM48);
707            RRETURN(rrc);
708            }
709    
710          /* For non-final alternatives, continue the loop for a NOMATCH result;
711          otherwise return. */
712    
713          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
714            eptrb, flags, RM2);
715        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
716        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
717        }        }
718      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
719    
720      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
721      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
722      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
723      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
724        obeyed, we can use tail recursion to avoid using another stack frame. */
725    
726      case OP_COND:      case OP_COND:
727      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
728        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
729          {
730          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
731          condition = md->recursive != NULL &&
732            (offset == RREF_ANY || offset == md->recursive->group_num);
733          ecode += condition? 3 : GET(ecode, 1);
734          }
735    
736        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
737        {        {
738        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
739        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
740          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
741          (offset < offset_top && md->offset_vector[offset] >= 0);        }
742        RMATCH(rrc, eptr, ecode + (condition?  
743          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
744          offset_top, md, ims, eptrb, match_isgroup);        {
745        RRETURN(rrc);        condition = FALSE;
746          ecode += GET(ecode, 1);
747        }        }
748    
749      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
750      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
751        assertion. */
752    
753      else      else
754        {        {
755        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
756            match_condassert | match_isgroup);            match_condassert, RM3);
757        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
758          {          {
759          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
760            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
761          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
762          }          }
763        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
764          {          {
765          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
766          }          }
767        else ecode += GET(ecode, 1);        else
768        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
769          match_isgroup);          condition = FALSE;
770        RRETURN(rrc);          ecode += GET(ecode, 1);
771            }
772        }        }
     /* Control never reaches here */  
773    
774      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
775      encountered. */      we can use tail recursion to avoid using another stack frame, except when
776        match_cbegroup is required for an unlimited repeat of a possibly empty
777        group. If the second alternative doesn't exist, we can just plough on. */
778    
779      case OP_CREF:      if (condition || *ecode == OP_ALT)
780      case OP_BRANUMBER:        {
781      ecode += 3;        ecode += 1 + LINK_SIZE;
782          if (op == OP_SCOND)        /* Possibly empty group */
783            {
784            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
785            RRETURN(rrc);
786            }
787          else                       /* Group must match something */
788            {
789            flags = 0;
790            goto TAIL_RECURSE;
791            }
792          }
793        else                         /* Condition false & no 2nd alternative */
794          {
795          ecode += 1 + LINK_SIZE;
796          }
797      break;      break;
798    
799      /* End of the pattern. If we are in a recursion, we should restore the  
800      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
801        restore the offsets appropriately and continue from after the call. */
802    
803      case OP_END:      case OP_END:
804      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
805        {        {
806        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
807        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
808        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
809        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
810          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
811        md->start_match = rec->save_start;        mstart = rec->save_start;
812        ims = original_ims;        ims = original_ims;
813        ecode = rec->after_call;        ecode = rec->after_call;
814        break;        break;
# Line 694  for (;;) Line 817  for (;;)
817      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
818      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
819    
820      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
821      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
822      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
823        md->start_match_ptr = mstart;  /* and the start (\K can modify) */
824      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
825    
826      /* Change option settings */      /* Change option settings */
# Line 717  for (;;) Line 841  for (;;)
841      case OP_ASSERTBACK:      case OP_ASSERTBACK:
842      do      do
843        {        {
844        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
845          match_isgroup);          RM4);
846        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
847        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
848        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 744  for (;;) Line 868  for (;;)
868      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
869      do      do
870        {        {
871        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
872          match_isgroup);          RM5);
873        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
874        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
875        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 766  for (;;) Line 890  for (;;)
890  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
891      if (utf8)      if (utf8)
892        {        {
893        c = GET(ecode,1);        i = GET(ecode, 1);
894        for (i = 0; i < c; i++)        while (i-- > 0)
895          {          {
896          eptr--;          eptr--;
897          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 780  for (;;) Line 904  for (;;)
904      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
905    
906        {        {
907        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
908        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
909        }        }
910    
# Line 800  for (;;) Line 924  for (;;)
924        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
925        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
926        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
927        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
928        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
929        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
930        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
931        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
932        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 837  for (;;) Line 961  for (;;)
961      case OP_RECURSE:      case OP_RECURSE:
962        {        {
963        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
964        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
965            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
966    
967        /* Add to "recursing stack" */        /* Add to "recursing stack" */
968    
# Line 869  for (;;) Line 988  for (;;)
988    
989        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
990              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
991        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
992        md->start_match = eptr;        mstart = eptr;
993    
994        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
995        restore the offset and recursion data. */        restore the offset and recursion data. */
996    
997        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
998          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
999        do        do
1000          {          {
1001          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1002              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
1003          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1004            {            {
1005              DPRINTF(("Recursion matched\n"));
1006            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1007            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1008              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1009            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1010            }            }
1011          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH)
1012              {
1013              DPRINTF(("Recursion gave error %d\n", rrc));
1014              RRETURN(rrc);
1015              }
1016    
1017          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1018          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 1037  for (;;)
1037      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1038    
1039      case OP_ONCE:      case OP_ONCE:
1040        {      prev = ecode;
1041        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1042    
1043        do      do
1044          {        {
1045          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1046            eptrb, match_isgroup);        if (rrc == MATCH_MATCH) break;
1047          if (rrc == MATCH_MATCH) break;        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1048          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += GET(ecode,1);
1049          ecode += GET(ecode,1);        }
1050          }      while (*ecode == OP_ALT);
       while (*ecode == OP_ALT);  
1051    
1052        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1053    
1054        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1055    
1056        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1057        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1058    
1059        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1060    
1061        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1062        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1063    
1064        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1065        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1066        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1067        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1068        course of events. */      course of events. */
1069    
1070        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1071          {        {
1072          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1073          break;        break;
1074          }        }
1075    
1076        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1077        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1078        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1079        opcode. */      any options that changed within the bracket before re-running it, so
1080        check the next opcode. */
1081    
1082        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1083          {        {
1084          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1085          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1086          }        }
1087    
1088        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1089          {        {
1090          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1091          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1092          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1093          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = 0;
1094          }        goto TAIL_RECURSE;
1095        else  /* OP_KETRMAX */        }
1096          {      else  /* OP_KETRMAX */
1097          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1098          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1099          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1100          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1101          }        flags = 0;
1102          goto TAIL_RECURSE;
1103        }        }
1104      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1105    
1106      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1107      bracketed group and go to there. */      bracketed group and go to there. */
# Line 994  for (;;) Line 1119  for (;;)
1119      case OP_BRAZERO:      case OP_BRAZERO:
1120        {        {
1121        next = ecode+1;        next = ecode+1;
1122        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1123        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1124        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1125        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1126        }        }
1127      break;      break;
1128    
1129      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1130        {        {
1131        next = ecode+1;        next = ecode+1;
1132        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1133        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1134        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1135        ecode++;        ecode++;
1136        }        }
1137      break;      break;
1138    
1139      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1140    
1141      case OP_KET:      case OP_KET:
1142      case OP_KETRMIN:      case OP_KETRMIN:
1143      case OP_KETRMAX:      case OP_KETRMAX:
1144        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1145    
1146        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1147        infinite repeats of empty string matches, retrieve the subject start from
1148        the chain. Otherwise, set it NULL. */
1149    
1150        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1151          {
1152        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1153            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1154            *prev == OP_ONCE)        }
1155          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1156    
1157        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1158        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1159        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1160    
1161        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1162          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1163          number = *prev - OP_BRA;          *prev == OP_ONCE)
1164          {
1165          md->end_match_ptr = eptr;      /* For ONCE */
1166          md->end_offset_top = offset_top;
1167          RRETURN(MATCH_MATCH);
1168          }
1169    
1170          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1171          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1172        bumping the high water mark. Note that whole-pattern recursion is coded as
1173        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1174        when the OP_END is reached. Other recursion is handled here. */
1175    
1176          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1177          offset = number << 1;        {
1178          number = GET2(prev, 1+LINK_SIZE);
1179          offset = number << 1;
1180    
1181  #ifdef DEBUG  #ifdef DEBUG
1182          printf("end bracket %d", number);        printf("end bracket %d", number);
1183          printf("\n");        printf("\n");
1184  #endif  #endif
1185    
1186          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1187          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1188          into group 0, so it won't be picked up here. Instead, we catch it when          {
1189          the OP_END is reached. */          md->offset_vector[offset] =
1190              md->offset_vector[md->offset_end - number];
1191          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1192            {          if (offset_top <= offset) offset_top = offset + 2;
1193            md->capture_last = number;          }
1194            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1195              {        /* Handle a recursively called group. Restore the offsets
1196              md->offset_vector[offset] =        appropriately and continue from after the call. */
1197                md->offset_vector[md->offset_end - number];  
1198              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1199              if (offset_top <= offset) offset_top = offset + 2;          {
1200              }          recursion_info *rec = md->recursive;
1201            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1202            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1203            appropriately and continue from after the call. */          mstart = rec->save_start;
1204            memcpy(md->offset_vector, rec->offset_save,
1205            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1206              {          ecode = rec->after_call;
1207              recursion_info *rec = md->recursive;          ims = original_ims;
1208              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1209          }          }
1210          }
1211    
1212        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1213        the group. */      flags, in case they got changed during the group. */
1214    
1215        ims = original_ims;      ims = original_ims;
1216        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1217    
1218        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1219        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1220        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1221        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1222        course of events. */      course of events. */
1223    
1224        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1225          {        {
1226          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1227          break;        break;
1228          }        }
1229    
1230        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1231        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1232        tail recursion to avoid using another stack frame, unless we have an
1233        unlimited repeat of a group that can match an empty string. */
1234    
1235        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1236          {  
1237          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);      if (*ecode == OP_KETRMIN)
1238          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        {
1239          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1240          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1241          }        if (flags != 0)    /* Could match an empty string */
       else  /* OP_KETRMAX */  
1242          {          {
1243          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1244          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RRETURN(rrc);
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
1245          }          }
1246          ecode = prev;
1247          goto TAIL_RECURSE;
1248        }        }
1249        else  /* OP_KETRMAX */
1250      RRETURN(MATCH_NOMATCH);        {
1251          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1252          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1253          ecode += 1 + LINK_SIZE;
1254          flags = 0;
1255          goto TAIL_RECURSE;
1256          }
1257        /* Control never gets here */
1258    
1259      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1260    
# Line 1135  for (;;) Line 1262  for (;;)
1262      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1263      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1264        {        {
1265        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1266              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1267          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1268        ecode++;        ecode++;
1269        break;        break;
# Line 1156  for (;;) Line 1284  for (;;)
1284      ecode++;      ecode++;
1285      break;      break;
1286    
1287        /* Reset the start of match point */
1288    
1289        case OP_SET_SOM:
1290        mstart = eptr;
1291        ecode++;
1292        break;
1293    
1294      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1295      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1296    
# Line 1163  for (;;) Line 1298  for (;;)
1298      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1299        {        {
1300        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1301          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1302        else        else
1303          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1304        ecode++;        ecode++;
# Line 1174  for (;;) Line 1309  for (;;)
1309        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1310        if (!md->endonly)        if (!md->endonly)
1311          {          {
1312          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1313             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1314            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1315          ecode++;          ecode++;
1316          break;          break;
1317          }          }
1318        }        }
1319      /* ... else fall through */      /* ... else fall through for endonly */
1320    
1321      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1322    
# Line 1193  for (;;) Line 1328  for (;;)
1328      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1329    
1330      case OP_EODN:      case OP_EODN:
1331      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1332         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1333          RRETURN(MATCH_NOMATCH);
1334      ecode++;      ecode++;
1335      break;      break;
1336    
# Line 1247  for (;;) Line 1383  for (;;)
1383      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1384    
1385      case OP_ANY:      case OP_ANY:
1386      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1387        RRETURN(MATCH_NOMATCH);        {
1388          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1389          }
1390      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1391      if (utf8)      if (utf8)
1392        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1393      ecode++;      ecode++;
1394      break;      break;
1395    
# Line 1343  for (;;) Line 1479  for (;;)
1479      ecode++;      ecode++;
1480      break;      break;
1481    
1482        case OP_ANYNL:
1483        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1484        GETCHARINCTEST(c, eptr);
1485        switch(c)
1486          {
1487          default: RRETURN(MATCH_NOMATCH);
1488          case 0x000d:
1489          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1490          break;
1491          case 0x000a:
1492          case 0x000b:
1493          case 0x000c:
1494          case 0x0085:
1495          case 0x2028:
1496          case 0x2029:
1497          break;
1498          }
1499        ecode++;
1500        break;
1501    
1502        case OP_NOT_HSPACE:
1503        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1504        GETCHARINCTEST(c, eptr);
1505        switch(c)
1506          {
1507          default: break;
1508          case 0x09:      /* HT */
1509          case 0x20:      /* SPACE */
1510          case 0xa0:      /* NBSP */
1511          case 0x1680:    /* OGHAM SPACE MARK */
1512          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1513          case 0x2000:    /* EN QUAD */
1514          case 0x2001:    /* EM QUAD */
1515          case 0x2002:    /* EN SPACE */
1516          case 0x2003:    /* EM SPACE */
1517          case 0x2004:    /* THREE-PER-EM SPACE */
1518          case 0x2005:    /* FOUR-PER-EM SPACE */
1519          case 0x2006:    /* SIX-PER-EM SPACE */
1520          case 0x2007:    /* FIGURE SPACE */
1521          case 0x2008:    /* PUNCTUATION SPACE */
1522          case 0x2009:    /* THIN SPACE */
1523          case 0x200A:    /* HAIR SPACE */
1524          case 0x202f:    /* NARROW NO-BREAK SPACE */
1525          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1526          case 0x3000:    /* IDEOGRAPHIC SPACE */
1527          RRETURN(MATCH_NOMATCH);
1528          }
1529        ecode++;
1530        break;
1531    
1532        case OP_HSPACE:
1533        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1534        GETCHARINCTEST(c, eptr);
1535        switch(c)
1536          {
1537          default: RRETURN(MATCH_NOMATCH);
1538          case 0x09:      /* HT */
1539          case 0x20:      /* SPACE */
1540          case 0xa0:      /* NBSP */
1541          case 0x1680:    /* OGHAM SPACE MARK */
1542          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1543          case 0x2000:    /* EN QUAD */
1544          case 0x2001:    /* EM QUAD */
1545          case 0x2002:    /* EN SPACE */
1546          case 0x2003:    /* EM SPACE */
1547          case 0x2004:    /* THREE-PER-EM SPACE */
1548          case 0x2005:    /* FOUR-PER-EM SPACE */
1549          case 0x2006:    /* SIX-PER-EM SPACE */
1550          case 0x2007:    /* FIGURE SPACE */
1551          case 0x2008:    /* PUNCTUATION SPACE */
1552          case 0x2009:    /* THIN SPACE */
1553          case 0x200A:    /* HAIR SPACE */
1554          case 0x202f:    /* NARROW NO-BREAK SPACE */
1555          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1556          case 0x3000:    /* IDEOGRAPHIC SPACE */
1557          break;
1558          }
1559        ecode++;
1560        break;
1561    
1562        case OP_NOT_VSPACE:
1563        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1564        GETCHARINCTEST(c, eptr);
1565        switch(c)
1566          {
1567          default: break;
1568          case 0x0a:      /* LF */
1569          case 0x0b:      /* VT */
1570          case 0x0c:      /* FF */
1571          case 0x0d:      /* CR */
1572          case 0x85:      /* NEL */
1573          case 0x2028:    /* LINE SEPARATOR */
1574          case 0x2029:    /* PARAGRAPH SEPARATOR */
1575          RRETURN(MATCH_NOMATCH);
1576          }
1577        ecode++;
1578        break;
1579    
1580        case OP_VSPACE:
1581        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1582        GETCHARINCTEST(c, eptr);
1583        switch(c)
1584          {
1585          default: RRETURN(MATCH_NOMATCH);
1586          case 0x0a:      /* LF */
1587          case 0x0b:      /* VT */
1588          case 0x0c:      /* FF */
1589          case 0x0d:      /* CR */
1590          case 0x85:      /* NEL */
1591          case 0x2028:    /* LINE SEPARATOR */
1592          case 0x2029:    /* PARAGRAPH SEPARATOR */
1593          break;
1594          }
1595        ecode++;
1596        break;
1597    
1598  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1599      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1600      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1352  for (;;) Line 1604  for (;;)
1604      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1605      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1606        {        {
1607        int chartype, rqdtype;        int chartype, script;
1608        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
1609    
1610        if (rqdtype >= 128)        switch(ecode[1])
1611          {          {
1612          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1613            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1614            break;
1615    
1616            case PT_LAMP:
1617            if ((chartype == ucp_Lu ||
1618                 chartype == ucp_Ll ||
1619                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1620            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1621          }           break;
1622        else  
1623          {          case PT_GC:
1624          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1625              RRETURN(MATCH_NOMATCH);
1626            break;
1627    
1628            case PT_PC:
1629            if ((ecode[2] != chartype) == (op == OP_PROP))
1630            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1631            break;
1632    
1633            case PT_SC:
1634            if ((ecode[2] != script) == (op == OP_PROP))
1635              RRETURN(MATCH_NOMATCH);
1636            break;
1637    
1638            default:
1639            RRETURN(PCRE_ERROR_INTERNAL);
1640          }          }
1641    
1642          ecode += 3;
1643        }        }
1644      break;      break;
1645    
# Line 1379  for (;;) Line 1650  for (;;)
1650      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1651      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1652        {        {
1653        int chartype;        int chartype, script;
1654        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1655        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1656        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1657          {          {
# Line 1390  for (;;) Line 1660  for (;;)
1660            {            {
1661            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1662            }            }
1663          category = _pcre_ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1664          if (category != ucp_M) break;          if (category != ucp_M) break;
1665          eptr += len;          eptr += len;
1666          }          }
# Line 1480  for (;;) Line 1750  for (;;)
1750          {          {
1751          for (fi = min;; fi++)          for (fi = min;; fi++)
1752            {            {
1753            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1754            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1755            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1756              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1501  for (;;) Line 1771  for (;;)
1771            }            }
1772          while (eptr >= pp)          while (eptr >= pp)
1773            {            {
1774            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1775            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1776            eptr -= length;            eptr -= length;
1777            }            }
# Line 1606  for (;;) Line 1876  for (;;)
1876            {            {
1877            for (fi = min;; fi++)            for (fi = min;; fi++)
1878              {              {
1879              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1880              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1881              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1882              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1626  for (;;) Line 1896  for (;;)
1896            {            {
1897            for (fi = min;; fi++)            for (fi = min;; fi++)
1898              {              {
1899              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1900              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1901              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1902              c = *eptr++;              c = *eptr++;
# Line 1663  for (;;) Line 1933  for (;;)
1933              }              }
1934            for (;;)            for (;;)
1935              {              {
1936              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1937              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1938              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1939              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1682  for (;;) Line 1952  for (;;)
1952              }              }
1953            while (eptr >= pp)            while (eptr >= pp)
1954              {              {
1955              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
1956              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1957                eptr--;
1958              }              }
1959            }            }
1960    
# Line 1753  for (;;) Line 2023  for (;;)
2023          {          {
2024          for (fi = min;; fi++)          for (fi = min;; fi++)
2025            {            {
2026            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2027            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2028            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2029            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1777  for (;;) Line 2047  for (;;)
2047            }            }
2048          for(;;)          for(;;)
2049            {            {
2050            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2051            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2052            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2053            BACKCHAR(eptr)            BACKCHAR(eptr)
# Line 1836  for (;;) Line 2106  for (;;)
2106    
2107        else        else
2108          {          {
2109          int dc;          unsigned int dc;
2110          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
2111          ecode += length;          ecode += length;
2112    
2113          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2114          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2115    
2116          if (fc != dc)          if (fc != dc)
2117            {            {
2118  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2119            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2120  #endif  #endif
2121              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2122            }            }
# Line 1867  for (;;) Line 2133  for (;;)
2133        }        }
2134      break;      break;
2135    
2136      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2137    
2138      case OP_EXACT:      case OP_EXACT:
2139      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2140      ecode += 3;      ecode += 3;
2141      goto REPEATCHAR;      goto REPEATCHAR;
2142    
2143        case OP_POSUPTO:
2144        possessive = TRUE;
2145        /* Fall through */
2146    
2147      case OP_UPTO:      case OP_UPTO:
2148      case OP_MINUPTO:      case OP_MINUPTO:
2149      min = 0;      min = 0;
# Line 1882  for (;;) Line 2152  for (;;)
2152      ecode += 3;      ecode += 3;
2153      goto REPEATCHAR;      goto REPEATCHAR;
2154    
2155        case OP_POSSTAR:
2156        possessive = TRUE;
2157        min = 0;
2158        max = INT_MAX;
2159        ecode++;
2160        goto REPEATCHAR;
2161    
2162        case OP_POSPLUS:
2163        possessive = TRUE;
2164        min = 1;
2165        max = INT_MAX;
2166        ecode++;
2167        goto REPEATCHAR;
2168    
2169        case OP_POSQUERY:
2170        possessive = TRUE;
2171        min = 0;
2172        max = 1;
2173        ecode++;
2174        goto REPEATCHAR;
2175    
2176      case OP_STAR:      case OP_STAR:
2177      case OP_MINSTAR:      case OP_MINSTAR:
2178      case OP_PLUS:      case OP_PLUS:
# Line 1913  for (;;) Line 2204  for (;;)
2204    
2205        if (length > 1)        if (length > 1)
2206          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2207  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2208          int othercase;          unsigned int othercase;
         int chartype;  
2209          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2210               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase > 0)  
2211            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2212            else oclength = 0;
2213  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2214    
2215          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2216            {            {
2217            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2218    #ifdef SUPPORT_UCP
2219            /* Need braces because of following else */            /* Need braces because of following else */
2220            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2221            else            else
# Line 1935  for (;;) Line 2223  for (;;)
2223              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2224              eptr += oclength;              eptr += oclength;
2225              }              }
2226    #else   /* without SUPPORT_UCP */
2227              else { RRETURN(MATCH_NOMATCH); }
2228    #endif  /* SUPPORT_UCP */
2229            }            }
2230    
2231          if (min == max) continue;          if (min == max) continue;
# Line 1943  for (;;) Line 2234  for (;;)
2234            {            {
2235            for (fi = min;; fi++)            for (fi = min;; fi++)
2236              {              {
2237              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2238              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2239              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2240              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2241    #ifdef SUPPORT_UCP
2242              /* Need braces because of following else */              /* Need braces because of following else */
2243              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2244              else              else
# Line 1954  for (;;) Line 2246  for (;;)
2246                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2247                eptr += oclength;                eptr += oclength;
2248                }                }
2249    #else   /* without SUPPORT_UCP */
2250                else { RRETURN (MATCH_NOMATCH); }
2251    #endif  /* SUPPORT_UCP */
2252              }              }
2253            /* Control never gets here */            /* Control never gets here */
2254            }            }
2255          else  
2256            else  /* Maximize */
2257            {            {
2258            pp = eptr;            pp = eptr;
2259            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2260              {              {
2261              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2262              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2263    #ifdef SUPPORT_UCP
2264              else if (oclength == 0) break;              else if (oclength == 0) break;
2265              else              else
2266                {                {
2267                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2268                eptr += oclength;                eptr += oclength;
2269                }                }
2270    #else   /* without SUPPORT_UCP */
2271                else break;
2272    #endif  /* SUPPORT_UCP */
2273              }              }
2274            while (eptr >= pp)  
2275              if (possessive) continue;
2276              for(;;)
2277             {             {
2278             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2279             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2280               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2281    #ifdef SUPPORT_UCP
2282               eptr--;
2283               BACKCHAR(eptr);
2284    #else   /* without SUPPORT_UCP */
2285             eptr -= length;             eptr -= length;
2286    #endif  /* SUPPORT_UCP */
2287             }             }
           RRETURN(MATCH_NOMATCH);  
2288            }            }
2289          /* Control never gets here */          /* Control never gets here */
2290          }          }
# Line 2017  for (;;) Line 2324  for (;;)
2324          {          {
2325          for (fi = min;; fi++)          for (fi = min;; fi++)
2326            {            {
2327            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2328            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2329            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2330                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2025  for (;;) Line 2332  for (;;)
2332            }            }
2333          /* Control never gets here */          /* Control never gets here */
2334          }          }
2335        else        else  /* Maximize */
2336          {          {
2337          pp = eptr;          pp = eptr;
2338          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2340  for (;;)
2340            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2341            eptr++;            eptr++;
2342            }            }
2343            if (possessive) continue;
2344          while (eptr >= pp)          while (eptr >= pp)
2345            {            {
2346            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2347            eptr--;            eptr--;
2348            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2349            }            }
# Line 2054  for (;;) Line 2362  for (;;)
2362          {          {
2363          for (fi = min;; fi++)          for (fi = min;; fi++)
2364            {            {
2365            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2366            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2367            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2368              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2369            }            }
2370          /* Control never gets here */          /* Control never gets here */
2371          }          }
2372        else        else  /* Maximize */
2373          {          {
2374          pp = eptr;          pp = eptr;
2375          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2377  for (;;)
2377            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2378            eptr++;            eptr++;
2379            }            }
2380            if (possessive) continue;
2381          while (eptr >= pp)          while (eptr >= pp)
2382            {            {
2383            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2384            eptr--;            eptr--;
2385            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386            }            }
# Line 2121  for (;;) Line 2430  for (;;)
2430      ecode += 3;      ecode += 3;
2431      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2432    
2433        case OP_NOTPOSSTAR:
2434        possessive = TRUE;
2435        min = 0;
2436        max = INT_MAX;
2437        ecode++;
2438        goto REPEATNOTCHAR;
2439    
2440        case OP_NOTPOSPLUS:
2441        possessive = TRUE;
2442        min = 1;
2443        max = INT_MAX;
2444        ecode++;
2445        goto REPEATNOTCHAR;
2446    
2447        case OP_NOTPOSQUERY:
2448        possessive = TRUE;
2449        min = 0;
2450        max = 1;
2451        ecode++;
2452        goto REPEATNOTCHAR;
2453    
2454        case OP_NOTPOSUPTO:
2455        possessive = TRUE;
2456        min = 0;
2457        max = GET2(ecode, 1);
2458        ecode += 3;
2459        goto REPEATNOTCHAR;
2460    
2461      case OP_NOTSTAR:      case OP_NOTSTAR:
2462      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2463      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2160  for (;;) Line 2497  for (;;)
2497        /* UTF-8 mode */        /* UTF-8 mode */
2498        if (utf8)        if (utf8)
2499          {          {
2500          register int d;          register unsigned int d;
2501          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2502            {            {
2503            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2185  for (;;) Line 2522  for (;;)
2522          /* UTF-8 mode */          /* UTF-8 mode */
2523          if (utf8)          if (utf8)
2524            {            {
2525            register int d;            register unsigned int d;
2526            for (fi = min;; fi++)            for (fi = min;; fi++)
2527              {              {
2528              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2529              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2530              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2531              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2202  for (;;) Line 2539  for (;;)
2539            {            {
2540            for (fi = min;; fi++)            for (fi = min;; fi++)
2541              {              {
2542              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2543              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2544              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2545                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2221  for (;;) Line 2558  for (;;)
2558          /* UTF-8 mode */          /* UTF-8 mode */
2559          if (utf8)          if (utf8)
2560            {            {
2561            register int d;            register unsigned int d;
2562            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2563              {              {
2564              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2568  for (;;)
2568              if (fc == d) break;              if (fc == d) break;
2569              eptr += len;              eptr += len;
2570              }              }
2571            for(;;)          if (possessive) continue;
2572            for(;;)
2573              {              {
2574              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2575              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2576              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2577              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2248  for (;;) Line 2586  for (;;)
2586              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2587              eptr++;              eptr++;
2588              }              }
2589              if (possessive) continue;
2590            while (eptr >= pp)            while (eptr >= pp)
2591              {              {
2592              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2593              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2594              eptr--;              eptr--;
2595              }              }
# Line 2269  for (;;) Line 2608  for (;;)
2608        /* UTF-8 mode */        /* UTF-8 mode */
2609        if (utf8)        if (utf8)
2610          {          {
2611          register int d;          register unsigned int d;
2612          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2613            {            {
2614            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2292  for (;;) Line 2631  for (;;)
2631          /* UTF-8 mode */          /* UTF-8 mode */
2632          if (utf8)          if (utf8)
2633            {            {
2634            register int d;            register unsigned int d;
2635            for (fi = min;; fi++)            for (fi = min;; fi++)
2636              {              {
2637              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2638              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2639              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2640              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2308  for (;;) Line 2647  for (;;)
2647            {            {
2648            for (fi = min;; fi++)            for (fi = min;; fi++)
2649              {              {
2650              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2651              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2652              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2653                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2327  for (;;) Line 2666  for (;;)
2666          /* UTF-8 mode */          /* UTF-8 mode */
2667          if (utf8)          if (utf8)
2668            {            {
2669            register int d;            register unsigned int d;
2670            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2671              {              {
2672              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2675  for (;;)
2675              if (fc == d) break;              if (fc == d) break;
2676              eptr += len;              eptr += len;
2677              }              }
2678              if (possessive) continue;
2679            for(;;)            for(;;)
2680              {              {
2681              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2682              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2683              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2684              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2353  for (;;) Line 2693  for (;;)
2693              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2694              eptr++;              eptr++;
2695              }              }
2696              if (possessive) continue;
2697            while (eptr >= pp)            while (eptr >= pp)
2698              {              {
2699              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2700              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2701              eptr--;              eptr--;
2702              }              }
# Line 2384  for (;;) Line 2725  for (;;)
2725      ecode += 3;      ecode += 3;
2726      goto REPEATTYPE;      goto REPEATTYPE;
2727    
2728        case OP_TYPEPOSSTAR:
2729        possessive = TRUE;
2730        min = 0;
2731        max = INT_MAX;
2732        ecode++;
2733        goto REPEATTYPE;
2734    
2735        case OP_TYPEPOSPLUS:
2736        possessive = TRUE;
2737        min = 1;
2738        max = INT_MAX;
2739        ecode++;
2740        goto REPEATTYPE;
2741    
2742        case OP_TYPEPOSQUERY:
2743        possessive = TRUE;
2744        min = 0;
2745        max = 1;
2746        ecode++;
2747        goto REPEATTYPE;
2748    
2749        case OP_TYPEPOSUPTO:
2750        possessive = TRUE;
2751        min = 0;
2752        max = GET2(ecode, 1);
2753        ecode += 3;
2754        goto REPEATTYPE;
2755    
2756      case OP_TYPESTAR:      case OP_TYPESTAR:
2757      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2758      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 2777  for (;;)
2777        {        {
2778        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2779        prop_type = *ecode++;        prop_type = *ecode++;
2780        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2781        }        }
2782      else prop_type = -1;      else prop_type = -1;
2783  #endif  #endif
# Line 2434  for (;;) Line 2794  for (;;)
2794      if (min > 0)      if (min > 0)
2795        {        {
2796  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2797        if (prop_type > 0)        if (prop_type >= 0)
2798          {          {
2799          for (i = 1; i <= min; i++)          switch(prop_type)
2800            {            {
2801            GETCHARINC(c, eptr);            case PT_ANY:
2802            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2803            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2804              RRETURN(MATCH_NOMATCH);              {
2805                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2806                GETCHARINCTEST(c, eptr);
2807                }
2808              break;
2809    
2810              case PT_LAMP:
2811              for (i = 1; i <= min; i++)
2812                {
2813                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2814                GETCHARINCTEST(c, eptr);
2815                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2816                if ((prop_chartype == ucp_Lu ||
2817                     prop_chartype == ucp_Ll ||
2818                     prop_chartype == ucp_Lt) == prop_fail_result)
2819                  RRETURN(MATCH_NOMATCH);
2820                }
2821              break;
2822    
2823              case PT_GC:
2824              for (i = 1; i <= min; i++)
2825                {
2826                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2827                GETCHARINCTEST(c, eptr);
2828                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2829                if ((prop_category == prop_value) == prop_fail_result)
2830                  RRETURN(MATCH_NOMATCH);
2831                }
2832              break;
2833    
2834              case PT_PC:
2835              for (i = 1; i <= min; i++)
2836                {
2837                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2838                GETCHARINCTEST(c, eptr);
2839                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2840                if ((prop_chartype == prop_value) == prop_fail_result)
2841                  RRETURN(MATCH_NOMATCH);
2842                }
2843              break;
2844    
2845              case PT_SC:
2846              for (i = 1; i <= min; i++)
2847                {
2848                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2849                GETCHARINCTEST(c, eptr);
2850                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2851                if ((prop_script == prop_value) == prop_fail_result)
2852                  RRETURN(MATCH_NOMATCH);
2853                }
2854              break;
2855    
2856              default:
2857              RRETURN(PCRE_ERROR_INTERNAL);
2858            }            }
2859          }          }
2860    
# Line 2453  for (;;) Line 2866  for (;;)
2866          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2867            {            {
2868            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2869            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2870            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2871            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2872              {              {
# Line 2462  for (;;) Line 2875  for (;;)
2875                {                {
2876                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2877                }                }
2878              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2879              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2880              eptr += len;              eptr += len;
2881              }              }
# Line 2481  for (;;) Line 2894  for (;;)
2894          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2895            {            {
2896            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2897               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2898              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2899              eptr++;
2900            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2901            }            }
2902          break;          break;
# Line 2491  for (;;) Line 2905  for (;;)
2905          eptr += min;          eptr += min;
2906          break;          break;
2907    
2908            case OP_ANYNL:
2909            for (i = 1; i <= min; i++)
2910              {
2911              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2912              GETCHARINC(c, eptr);
2913              switch(c)
2914                {
2915                default: RRETURN(MATCH_NOMATCH);
2916                case 0x000d:
2917                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2918                break;
2919                case 0x000a:
2920                case 0x000b:
2921                case 0x000c:
2922                case 0x0085:
2923                case 0x2028:
2924                case 0x2029:
2925                break;
2926                }
2927              }
2928            break;
2929    
2930            case OP_NOT_HSPACE:
2931            for (i = 1; i <= min; i++)
2932              {
2933              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2934              GETCHARINC(c, eptr);
2935              switch(c)
2936                {
2937                default: break;
2938                case 0x09:      /* HT */
2939                case 0x20:      /* SPACE */
2940                case 0xa0:      /* NBSP */
2941                case 0x1680:    /* OGHAM SPACE MARK */
2942                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2943                case 0x2000:    /* EN QUAD */
2944                case 0x2001:    /* EM QUAD */
2945                case 0x2002:    /* EN SPACE */
2946                case 0x2003:    /* EM SPACE */
2947                case 0x2004:    /* THREE-PER-EM SPACE */
2948                case 0x2005:    /* FOUR-PER-EM SPACE */
2949                case 0x2006:    /* SIX-PER-EM SPACE */
2950                case 0x2007:    /* FIGURE SPACE */
2951                case 0x2008:    /* PUNCTUATION SPACE */
2952                case 0x2009:    /* THIN SPACE */
2953                case 0x200A:    /* HAIR SPACE */
2954                case 0x202f:    /* NARROW NO-BREAK SPACE */
2955                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2956                case 0x3000:    /* IDEOGRAPHIC SPACE */
2957                RRETURN(MATCH_NOMATCH);
2958                }
2959              }
2960            break;
2961    
2962            case OP_HSPACE:
2963            for (i = 1; i <= min; i++)
2964              {
2965              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2966              GETCHARINC(c, eptr);
2967              switch(c)
2968                {
2969                default: RRETURN(MATCH_NOMATCH);
2970                case 0x09:      /* HT */
2971                case 0x20:      /* SPACE */
2972                case 0xa0:      /* NBSP */
2973                case 0x1680:    /* OGHAM SPACE MARK */
2974                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2975                case 0x2000:    /* EN QUAD */
2976                case 0x2001:    /* EM QUAD */
2977                case 0x2002:    /* EN SPACE */
2978                case 0x2003:    /* EM SPACE */
2979                case 0x2004:    /* THREE-PER-EM SPACE */
2980                case 0x2005:    /* FOUR-PER-EM SPACE */
2981                case 0x2006:    /* SIX-PER-EM SPACE */
2982                case 0x2007:    /* FIGURE SPACE */
2983                case 0x2008:    /* PUNCTUATION SPACE */
2984                case 0x2009:    /* THIN SPACE */
2985                case 0x200A:    /* HAIR SPACE */
2986                case 0x202f:    /* NARROW NO-BREAK SPACE */
2987                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2988                case 0x3000:    /* IDEOGRAPHIC SPACE */
2989                break;
2990                }
2991              }
2992            break;
2993    
2994            case OP_NOT_VSPACE:
2995            for (i = 1; i <= min; i++)
2996              {
2997              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2998              GETCHARINC(c, eptr);
2999              switch(c)
3000                {
3001                default: break;
3002                case 0x0a:      /* LF */
3003                case 0x0b:      /* VT */
3004                case 0x0c:      /* FF */
3005                case 0x0d:      /* CR */
3006                case 0x85:      /* NEL */
3007                case 0x2028:    /* LINE SEPARATOR */
3008                case 0x2029:    /* PARAGRAPH SEPARATOR */
3009                RRETURN(MATCH_NOMATCH);
3010                }
3011              }
3012            break;
3013    
3014            case OP_VSPACE:
3015            for (i = 1; i <= min; i++)
3016              {
3017              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3018              GETCHARINC(c, eptr);
3019              switch(c)
3020                {
3021                default: RRETURN(MATCH_NOMATCH);
3022                case 0x0a:      /* LF */
3023                case 0x0b:      /* VT */
3024                case 0x0c:      /* FF */
3025                case 0x0d:      /* CR */
3026                case 0x85:      /* NEL */
3027                case 0x2028:    /* LINE SEPARATOR */
3028                case 0x2029:    /* PARAGRAPH SEPARATOR */
3029                break;
3030                }
3031              }
3032            break;
3033    
3034          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3035          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3036            {            {
# Line 2559  for (;;) Line 3099  for (;;)
3099  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3100    
3101        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3102        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3103          number of bytes present, as this was tested above. */
3104    
3105        switch(ctype)        switch(ctype)
3106          {          {
# Line 2567  for (;;) Line 3108  for (;;)
3108          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
3109            {            {
3110            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3111              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
3112                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3113                eptr++;
3114                }
3115            }            }
3116          else eptr += min;          else eptr += min;
3117          break;          break;
# Line 2576  for (;;) Line 3120  for (;;)
3120          eptr += min;          eptr += min;
3121          break;          break;
3122    
3123            /* Because of the CRLF case, we can't assume the minimum number of
3124            bytes are present in this case. */
3125    
3126            case OP_ANYNL:
3127            for (i = 1; i <= min; i++)
3128              {
3129              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3130              switch(*eptr++)
3131                {
3132                default: RRETURN(MATCH_NOMATCH);
3133                case 0x000d:
3134                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3135                break;
3136                case 0x000a:
3137                case 0x000b:
3138                case 0x000c:
3139                case 0x0085:
3140                break;
3141                }
3142              }
3143            break;
3144    
3145            case OP_NOT_HSPACE:
3146            for (i = 1; i <= min; i++)
3147              {
3148              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3149              switch(*eptr++)
3150                {
3151                default: break;
3152                case 0x09:      /* HT */
3153                case 0x20:      /* SPACE */
3154                case 0xa0:      /* NBSP */
3155                RRETURN(MATCH_NOMATCH);
3156                }
3157              }
3158            break;
3159    
3160            case OP_HSPACE:
3161            for (i = 1; i <= min; i++)
3162              {
3163              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3164              switch(*eptr++)
3165                {
3166                default: RRETURN(MATCH_NOMATCH);
3167                case 0x09:      /* HT */
3168                case 0x20:      /* SPACE */
3169                case 0xa0:      /* NBSP */
3170                break;
3171                }
3172              }
3173            break;
3174    
3175            case OP_NOT_VSPACE:
3176            for (i = 1; i <= min; i++)
3177              {
3178              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3179              switch(*eptr++)
3180                {
3181                default: break;
3182                case 0x0a:      /* LF */
3183                case 0x0b:      /* VT */
3184                case 0x0c:      /* FF */
3185                case 0x0d:      /* CR */
3186                case 0x85:      /* NEL */
3187                RRETURN(MATCH_NOMATCH);
3188                }
3189              }
3190            break;
3191    
3192            case OP_VSPACE:
3193            for (i = 1; i <= min; i++)
3194              {
3195              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3196              switch(*eptr++)
3197                {
3198                default: RRETURN(MATCH_NOMATCH);
3199                case 0x0a:      /* LF */
3200                case 0x0b:      /* VT */
3201                case 0x0c:      /* FF */
3202                case 0x0d:      /* CR */
3203                case 0x85:      /* NEL */
3204                break;
3205                }
3206              }
3207            break;
3208    
3209          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3210          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3211            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2624  for (;;) Line 3254  for (;;)
3254      if (minimize)      if (minimize)
3255        {        {
3256  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3257        if (prop_type > 0)        if (prop_type >= 0)
3258          {          {
3259          for (fi = min;; fi++)          switch(prop_type)
3260            {            {
3261            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
3262            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
3263            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
3264            GETCHARINC(c, eptr);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3265            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3266            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3267              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
3268                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3269                }
3270              /* Control never gets here */
3271    
3272              case PT_LAMP:
3273              for (fi = min;; fi++)
3274                {
3275                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3276                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3277                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3278                GETCHARINC(c, eptr);
3279                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3280                if ((prop_chartype == ucp_Lu ||
3281                     prop_chartype == ucp_Ll ||
3282                     prop_chartype == ucp_Lt) == prop_fail_result)
3283                  RRETURN(MATCH_NOMATCH);
3284                }
3285              /* Control never gets here */
3286    
3287              case PT_GC:
3288              for (fi = min;; fi++)
3289                {
3290                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3291                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3292                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3293                GETCHARINC(c, eptr);
3294                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3295                if ((prop_category == prop_value) == prop_fail_result)
3296                  RRETURN(MATCH_NOMATCH);
3297                }
3298              /* Control never gets here */
3299    
3300              case PT_PC:
3301              for (fi = min;; fi++)
3302                {
3303                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3304                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3305                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3306                GETCHARINC(c, eptr);
3307                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3308                if ((prop_chartype == prop_value) == prop_fail_result)
3309                  RRETURN(MATCH_NOMATCH);
3310                }
3311              /* Control never gets here */
3312    
3313              case PT_SC:
3314              for (fi = min;; fi++)
3315                {
3316                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3317                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3318                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3319                GETCHARINC(c, eptr);
3320                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3321                if ((prop_script == prop_value) == prop_fail_result)
3322                  RRETURN(MATCH_NOMATCH);
3323                }
3324              /* Control never gets here */
3325    
3326              default:
3327              RRETURN(PCRE_ERROR_INTERNAL);
3328            }            }
3329          }          }
3330    
# Line 2645  for (;;) Line 3335  for (;;)
3335          {          {
3336          for (fi = min;; fi++)          for (fi = min;; fi++)
3337            {            {
3338            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3339            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3340            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3341            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3342            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3343            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3344            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3345              {              {
# Line 2658  for (;;) Line 3348  for (;;)
3348                {                {
3349                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3350                }                }
3351              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3352              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3353              eptr += len;              eptr += len;
3354              }              }
# Line 2674  for (;;) Line 3364  for (;;)
3364          {          {
3365          for (fi = min;; fi++)          for (fi = min;; fi++)
3366            {            {
3367            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3368            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3369            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3370                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3371                    IS_NEWLINE(eptr)))
3372                RRETURN(MATCH_NOMATCH);
3373    
3374            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3375            switch(ctype)            switch(ctype)
3376              {              {
3377              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3378              break;              break;
3379    
3380              case OP_ANYBYTE:              case OP_ANYBYTE:
3381              break;              break;
3382    
3383                case OP_ANYNL:
3384                switch(c)
3385                  {
3386                  default: RRETURN(MATCH_NOMATCH);
3387                  case 0x000d:
3388                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3389                  break;
3390                  case 0x000a:
3391                  case 0x000b:
3392                  case 0x000c:
3393                  case 0x0085:
3394                  case 0x2028:
3395                  case 0x2029:
3396                  break;
3397                  }
3398                break;
3399    
3400                case OP_NOT_HSPACE:
3401                switch(c)
3402                  {
3403                  default: break;
3404                  case 0x09:      /* HT */
3405                  case 0x20:      /* SPACE */
3406                  case 0xa0:      /* NBSP */
3407                  case 0x1680:    /* OGHAM SPACE MARK */
3408                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3409                  case 0x2000:    /* EN QUAD */
3410                  case 0x2001:    /* EM QUAD */
3411                  case 0x2002:    /* EN SPACE */
3412                  case 0x2003:    /* EM SPACE */
3413                  case 0x2004:    /* THREE-PER-EM SPACE */
3414                  case 0x2005:    /* FOUR-PER-EM SPACE */
3415                  case 0x2006:    /* SIX-PER-EM SPACE */
3416                  case 0x2007:    /* FIGURE SPACE */
3417                  case 0x2008:    /* PUNCTUATION SPACE */
3418                  case 0x2009:    /* THIN SPACE */
3419                  case 0x200A:    /* HAIR SPACE */
3420                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3421                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3422                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3423                  RRETURN(MATCH_NOMATCH);
3424                  }
3425                break;
3426    
3427                case OP_HSPACE:
3428                switch(c)
3429                  {
3430                  default: RRETURN(MATCH_NOMATCH);
3431                  case 0x09:      /* HT */
3432                  case 0x20:      /* SPACE */
3433                  case 0xa0:      /* NBSP */
3434                  case 0x1680:    /* OGHAM SPACE MARK */
3435                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3436                  case 0x2000:    /* EN QUAD */
3437                  case 0x2001:    /* EM QUAD */
3438                  case 0x2002:    /* EN SPACE */
3439                  case 0x2003:    /* EM SPACE */
3440                  case 0x2004:    /* THREE-PER-EM SPACE */
3441                  case 0x2005:    /* FOUR-PER-EM SPACE */
3442                  case 0x2006:    /* SIX-PER-EM SPACE */
3443                  case 0x2007:    /* FIGURE SPACE */
3444                  case 0x2008:    /* PUNCTUATION SPACE */
3445                  case 0x2009:    /* THIN SPACE */
3446                  case 0x200A:    /* HAIR SPACE */
3447                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3448                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3449                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3450                  break;
3451                  }
3452                break;
3453    
3454                case OP_NOT_VSPACE:
3455                switch(c)
3456                  {
3457                  default: break;
3458                  case 0x0a:      /* LF */
3459                  case 0x0b:      /* VT */
3460                  case 0x0c:      /* FF */
3461                  case 0x0d:      /* CR */
3462                  case 0x85:      /* NEL */
3463                  case 0x2028:    /* LINE SEPARATOR */
3464                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3465                  RRETURN(MATCH_NOMATCH);
3466                  }
3467                break;
3468    
3469                case OP_VSPACE:
3470                switch(c)
3471                  {
3472                  default: RRETURN(MATCH_NOMATCH);
3473                  case 0x0a:      /* LF */
3474                  case 0x0b:      /* VT */
3475                  case 0x0c:      /* FF */
3476                  case 0x0d:      /* CR */
3477                  case 0x85:      /* NEL */
3478                  case 0x2028:    /* LINE SEPARATOR */
3479                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3480                  break;
3481                  }
3482                break;
3483    
3484              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3485              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3486                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2729  for (;;) Line 3522  for (;;)
3522          {          {
3523          for (fi = min;; fi++)          for (fi = min;; fi++)
3524            {            {
3525            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3526            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3527            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3528                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3529                RRETURN(MATCH_NOMATCH);
3530    
3531            c = *eptr++;            c = *eptr++;
3532            switch(ctype)            switch(ctype)
3533              {              {
3534              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3535              break;              break;
3536    
3537              case OP_ANYBYTE:              case OP_ANYBYTE:
3538              break;              break;
3539    
3540                case OP_ANYNL:
3541                switch(c)
3542                  {
3543                  default: RRETURN(MATCH_NOMATCH);
3544                  case 0x000d:
3545                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3546                  break;
3547                  case 0x000a:
3548                  case 0x000b:
3549                  case 0x000c:
3550                  case 0x0085:
3551                  break;
3552                  }
3553                break;
3554    
3555                case OP_NOT_HSPACE:
3556                switch(c)
3557                  {
3558                  default: break;
3559                  case 0x09:      /* HT */
3560                  case 0x20:      /* SPACE */
3561                  case 0xa0:      /* NBSP */
3562                  RRETURN(MATCH_NOMATCH);
3563                  }
3564                break;
3565    
3566                case OP_HSPACE:
3567                switch(c)
3568                  {
3569                  default: RRETURN(MATCH_NOMATCH);
3570                  case 0x09:      /* HT */
3571                  case 0x20:      /* SPACE */
3572                  case 0xa0:      /* NBSP */
3573                  break;
3574                  }
3575                break;
3576    
3577                case OP_NOT_VSPACE:
3578                switch(c)
3579                  {
3580                  default: break;
3581                  case 0x0a:      /* LF */
3582                  case 0x0b:      /* VT */
3583                  case 0x0c:      /* FF */
3584                  case 0x0d:      /* CR */
3585                  case 0x85:      /* NEL */
3586                  RRETURN(MATCH_NOMATCH);
3587                  }
3588                break;
3589    
3590                case OP_VSPACE:
3591                switch(c)
3592                  {
3593                  default: RRETURN(MATCH_NOMATCH);
3594                  case 0x0a:      /* LF */
3595                  case 0x0b:      /* VT */
3596                  case 0x0c:      /* FF */
3597                  case 0x0d:      /* CR */
3598                  case 0x85:      /* NEL */
3599                  break;
3600                  }
3601                break;
3602    
3603              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3604              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3605              break;              break;
# Line 2774  for (;;) Line 3632  for (;;)
3632        /* Control never gets here */        /* Control never gets here */
3633        }        }
3634    
3635      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3636      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3637      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3638    
# Line 2783  for (;;) Line 3641  for (;;)
3641        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
3642    
3643  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3644        if (prop_type > 0)        if (prop_type >= 0)
3645          {          {
3646          for (i = min; i < max; i++)          switch(prop_type)
3647            {            {
3648            int len = 1;            case PT_ANY:
3649            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
3650            GETCHARLEN(c, eptr, len);              {
3651            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
3652            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
3653              break;              GETCHARLEN(c, eptr, len);
3654            eptr+= len;              if (prop_fail_result) break;
3655                eptr+= len;
3656                }
3657              break;
3658    
3659              case PT_LAMP:
3660              for (i = min; i < max; i++)
3661                {
3662                int len = 1;
3663                if (eptr >= md->end_subject) break;
3664                GETCHARLEN(c, eptr, len);
3665                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3666                if ((prop_chartype == ucp_Lu ||
3667                     prop_chartype == ucp_Ll ||
3668                     prop_chartype == ucp_Lt) == prop_fail_result)
3669                  break;
3670                eptr+= len;
3671                }
3672              break;
3673    
3674              case PT_GC:
3675              for (i = min; i < max; i++)
3676                {
3677                int len = 1;
3678                if (eptr >= md->end_subject) break;
3679                GETCHARLEN(c, eptr, len);
3680                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3681                if ((prop_category == prop_value) == prop_fail_result)
3682                  break;
3683                eptr+= len;
3684                }
3685              break;
3686    
3687              case PT_PC:
3688              for (i = min; i < max; i++)
3689                {
3690                int len = 1;
3691                if (eptr >= md->end_subject) break;
3692                GETCHARLEN(c, eptr, len);
3693                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3694                if ((prop_chartype == prop_value) == prop_fail_result)
3695                  break;
3696                eptr+= len;
3697                }
3698              break;
3699    
3700              case PT_SC:
3701              for (i = min; i < max; i++)
3702                {
3703                int len = 1;
3704                if (eptr >= md->end_subject) break;
3705                GETCHARLEN(c, eptr, len);
3706                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3707                if ((prop_script == prop_value) == prop_fail_result)
3708                  break;
3709                eptr+= len;
3710                }
3711              break;
3712            }            }
3713    
3714          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3715    
3716            if (possessive) continue;
3717          for(;;)          for(;;)
3718            {            {
3719            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3720            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3721            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3722            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 2816  for (;;) Line 3732  for (;;)
3732            {            {
3733            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3734            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3735            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3736            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3737            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3738              {              {
# Line 2825  for (;;) Line 3741  for (;;)
3741                {                {
3742                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3743                }                }
3744              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3745              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3746              eptr += len;              eptr += len;
3747              }              }
# Line 2833  for (;;) Line 3749  for (;;)
3749    
3750          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3751    
3752            if (possessive) continue;
3753          for(;;)          for(;;)
3754            {            {
3755            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3756            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3757            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3758            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 2846  for (;;) Line 3763  for (;;)
3763                {                {
3764                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3765                }                }
3766              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3767              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3768              eptr--;              eptr--;
3769              }              }
# Line 2864  for (;;) Line 3781  for (;;)
3781          switch(ctype)          switch(ctype)
3782            {            {
3783            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is unlimited  
           we don't need it, so we repeat the non-UTF8 code. This is probably  
           worth it, because .* is quite a common idiom. */  
   
3784            if (max < INT_MAX)            if (max < INT_MAX)
3785              {              {
3786              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
3787                {                {
3788                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3789                  {                  {
3790                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3791                  eptr++;                  eptr++;
3792                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3793                  }                  }
# Line 2884  for (;;) Line 3796  for (;;)
3796                {                {
3797                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3798                  {                  {
3799                    if (eptr >= md->end_subject) break;
3800                  eptr++;                  eptr++;
3801                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3802                  }                  }
# Line 2898  for (;;) Line 3811  for (;;)
3811                {                {
3812                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3813                  {                  {
3814                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3815                  eptr++;                  eptr++;
3816                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3817                  }                  }
               break;  
3818                }                }
3819              else              else
3820                {                {
3821                c = max - min;                eptr = md->end_subject;
               if (c > md->end_subject - eptr) c = md->end_subject - eptr;  
               eptr += c;  
3822                }                }
3823              }              }
3824            break;            break;
# Line 2916  for (;;) Line 3827  for (;;)
3827    
3828            case OP_ANYBYTE:            case OP_ANYBYTE:
3829            c = max - min;            c = max - min;
3830            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3831                c = md->end_subject - eptr;
3832            eptr += c;            eptr += c;
3833            break;            break;
3834    
3835              case OP_ANYNL:
3836              for (i = min; i < max; i++)
3837                {
3838                int len = 1;
3839                if (eptr >= md->end_subject) break;
3840                GETCHARLEN(c, eptr, len);
3841                if (c == 0x000d)
3842                  {
3843                  if (++eptr >= md->end_subject) break;
3844                  if (*eptr == 0x000a) eptr++;
3845                  }
3846                else
3847                  {
3848                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3849                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3850                    break;
3851                  eptr += len;
3852                  }
3853                }
3854              break;
3855    
3856              case OP_NOT_HSPACE:
3857              case OP_HSPACE:
3858              for (i = min; i < max; i++)
3859                {
3860                BOOL gotspace;
3861                int len = 1;
3862                if (eptr >= md->end_subject) break;
3863                GETCHARLEN(c, eptr, len);
3864                switch(c)
3865                  {
3866                  default: gotspace = FALSE; break;
3867                  case 0x09:      /* HT */
3868                  case 0x20:      /* SPACE */
3869                  case 0xa0:      /* NBSP */
3870                  case 0x1680:    /* OGHAM SPACE MARK */
3871                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3872                  case 0x2000:    /* EN QUAD */
3873                  case 0x2001:    /* EM QUAD */
3874                  case 0x2002:    /* EN SPACE */
3875                  case 0x2003:    /* EM SPACE */
3876                  case 0x2004:    /* THREE-PER-EM SPACE */
3877                  case 0x2005:    /* FOUR-PER-EM SPACE */
3878                  case 0x2006:    /* SIX-PER-EM SPACE */
3879                  case 0x2007:    /* FIGURE SPACE */
3880                  case 0x2008:    /* PUNCTUATION SPACE */
3881                  case 0x2009:    /* THIN SPACE */
3882                  case 0x200A:    /* HAIR SPACE */
3883                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3884                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3885                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3886                  gotspace = TRUE;
3887                  break;
3888                  }
3889                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3890                eptr += len;
3891                }
3892              break;
3893    
3894              case OP_NOT_VSPACE:
3895              case OP_VSPACE:
3896              for (i = min; i < max; i++)
3897                {
3898                BOOL gotspace;
3899                int len = 1;
3900                if (eptr >= md->end_subject) break;
3901                GETCHARLEN(c, eptr, len);
3902                switch(c)
3903                  {
3904                  default: gotspace = FALSE; break;
3905                  case 0x0a:      /* LF */
3906                  case 0x0b:      /* VT */
3907                  case 0x0c:      /* FF */
3908                  case 0x0d:      /* CR */
3909                  case 0x85:      /* NEL */
3910                  case 0x2028:    /* LINE SEPARATOR */
3911                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3912                  gotspace = TRUE;
3913                  break;
3914                  }
3915                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3916                eptr += len;
3917                }
3918              break;
3919    
3920            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3921            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3922              {              {
# Line 2992  for (;;) Line 3989  for (;;)
3989    
3990          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3991    
3992            if (possessive) continue;
3993          for(;;)          for(;;)
3994            {            {
3995            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3996            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3997            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3998            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3012  for (;;) Line 4010  for (;;)
4010              {              {
4011              for (i = min; i < max; i++)              for (i = min; i < max; i++)
4012                {                {
4013                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4014                eptr++;                eptr++;
4015                }                }
4016              break;              break;
# Line 3021  for (;;) Line 4019  for (;;)
4019    
4020            case OP_ANYBYTE:            case OP_ANYBYTE:
4021            c = max - min;            c = max - min;
4022            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
4023                c = md->end_subject - eptr;
4024            eptr += c;            eptr += c;
4025            break;            break;
4026    
4027              case OP_ANYNL:
4028              for (i = min; i < max; i++)
4029                {
4030                if (eptr >= md->end_subject) break;
4031                c = *eptr;
4032                if (c == 0x000d)
4033                  {
4034                  if (++eptr >= md->end_subject) break;
4035                  if (*eptr == 0x000a) eptr++;
4036                  }
4037                else
4038                  {
4039                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4040                    break;
4041                  eptr++;
4042                  }
4043                }
4044              break;
4045    
4046              case OP_NOT_HSPACE:
4047              for (i = min; i < max; i++)
4048                {
4049                if (eptr >= md->end_subject) break;
4050                c = *eptr;
4051                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4052                eptr++;
4053                }
4054              break;
4055    
4056              case OP_HSPACE:
4057              for (i = min; i < max; i++)
4058                {
4059                if (eptr >= md->end_subject) break;
4060                c = *eptr;
4061                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4062                eptr++;
4063                }
4064              break;
4065    
4066              case OP_NOT_VSPACE:
4067              for (i = min; i < max; i++)
4068                {
4069                if (eptr >= md->end_subject) break;
4070                c = *eptr;
4071                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4072                  break;
4073                eptr++;
4074                }
4075              break;
4076    
4077              case OP_VSPACE:
4078              for (i = min; i < max; i++)
4079                {
4080                if (eptr >= md->end_subject) break;
4081                c = *eptr;
4082                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4083                  break;
4084                eptr++;
4085                }
4086              break;
4087    
4088            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4089            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4090              {              {
# Line 3085  for (;;) Line 4145  for (;;)
4145    
4146          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4147    
4148            if (possessive) continue;
4149          while (eptr >= pp)          while (eptr >= pp)
4150            {            {
4151            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4152            eptr--;            eptr--;
4153            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4154            }            }
# Line 3099  for (;;) Line 4160  for (;;)
4160        }        }
4161      /* Control never gets here */      /* Control never gets here */
4162    
4163      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
4164      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
4165    
4166      default:      default:
4167      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
4168      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4169      }      }
4170    
4171    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3115  for (;;) Line 4174  for (;;)
4174    
4175    }             /* End of main loop */    }             /* End of main loop */
4176  /* Control never reaches here */  /* Control never reaches here */
4177    
4178    
4179    /* When compiling to use the heap rather than the stack for recursive calls to
4180    match(), the RRETURN() macro jumps here. The number that is saved in
4181    frame->Xwhere indicates which label we actually want to return to. */
4182    
4183    #ifdef NO_RECURSE
4184    #define LBL(val) case val: goto L_RM##val;
4185    HEAP_RETURN:
4186    switch (frame->Xwhere)
4187      {
4188      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4189      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4190      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4191      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4192      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4193      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4194      default:
4195      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4196      return PCRE_ERROR_INTERNAL;
4197      }
4198    #undef LBL
4199    #endif  /* NO_RECURSE */
4200  }  }
4201    
4202    
# Line 3127  Undefine all the macros that were define Line 4209  Undefine all the macros that were define
4209  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4210  #undef eptr  #undef eptr
4211  #undef ecode  #undef ecode
4212    #undef mstart
4213  #undef offset_top  #undef offset_top
4214  #undef ims  #undef ims
4215  #undef eptrb  #undef eptrb
# Line 3144  Undefine all the macros that were define Line 4227  Undefine all the macros that were define
4227    
4228  #undef cur_is_word  #undef cur_is_word
4229  #undef condition  #undef condition
 #undef minimize  
4230  #undef prev_is_word  #undef prev_is_word
4231    
4232  #undef original_ims  #undef original_ims
# Line 3200  Returns:          > 0 => success; value Line 4282  Returns:          > 0 => success; value
4282                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4283  */  */
4284    
4285  PCRE_EXPORT int  PCRE_EXP_DEFN int
4286  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4287    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4288    int offsetcount)    int offsetcount)
4289  {  {
4290  int rc, resetcount, ocount;  int rc, resetcount, ocount;
4291  int first_byte = -1;  int first_byte = -1;
4292  int req_byte = -1;  int req_byte = -1;
4293  int req_byte2 = -1;  int req_byte2 = -1;
4294  unsigned long int ims = 0;  int newline;
4295    unsigned long int ims;
4296  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
4297  BOOL anchored;  BOOL anchored;
4298  BOOL startline;  BOOL startline;
4299  BOOL firstline;  BOOL firstline;
4300  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
4301  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
4302    BOOL utf8;
4303  match_data match_block;  match_data match_block;
4304    match_data *md = &match_block;
4305  const uschar *tables;  const uschar *tables;
4306  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4307  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4308  const uschar *end_subject;  USPTR end_subject;
4309  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
4310    
4311  pcre_study_data internal_study;  pcre_study_data internal_study;
4312  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3241  if (offsetcount < 0) return PCRE_ERROR_B Line 4326  if (offsetcount < 0) return PCRE_ERROR_B
4326  the default values. */  the default values. */
4327    
4328  study = NULL;  study = NULL;
4329  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
4330  match_block.callout_data = NULL;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4331    md->callout_data = NULL;
4332    
4333  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
4334    
# Line 3254  if (extra_data != NULL) Line 4340  if (extra_data != NULL)
4340    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4341      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
4342    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4343      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
4344      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4345        md->match_limit_recursion = extra_data->match_limit_recursion;
4346    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4347      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
4348    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4349    }    }
4350    
# Line 3286  firstline = (re->options & PCRE_FIRSTLIN Line 4374  firstline = (re->options & PCRE_FIRSTLIN
4374    
4375  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
4376    
4377  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
4378    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
4379    
4380  match_block.start_subject = (const uschar *)subject;  md->start_subject = (USPTR)subject;
4381  match_block.start_offset = start_offset;  md->start_offset = start_offset;
4382  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
4383  end_subject = match_block.end_subject;  end_subject = md->end_subject;
4384    
4385  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4386  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4387    
4388  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4389  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4390  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4391  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
4392  match_block.hitend = FALSE;  md->hitend = FALSE;
4393    
4394    md->recursive = NULL;                   /* No recursion at top level */
4395    
4396  match_block.recursive = NULL;                   /* No recursion at top level */  md->lcc = tables + lcc_offset;
4397    md->ctypes = tables + ctypes_offset;
4398    
4399  match_block.lcc = tables + lcc_offset;  /* Handle different types of newline. The three bits give eight cases. If
4400  match_block.ctypes = tables + ctypes_offset;  nothing is set at run time, whatever was used at compile time applies. */
4401    
4402    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4403           PCRE_NEWLINE_BITS)
4404      {
4405      case 0: newline = NEWLINE; break;   /* Compile-time default */
4406      case PCRE_NEWLINE_CR: newline = '\r'; break;
4407      case PCRE_NEWLINE_LF: newline = '\n'; break;
4408      case PCRE_NEWLINE_CR+
4409           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4410      case PCRE_NEWLINE_ANY: newline = -1; break;
4411      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4412      default: return PCRE_ERROR_BADNEWLINE;
4413      }
4414    
4415    if (newline == -2)
4416      {
4417      md->nltype = NLTYPE_ANYCRLF;
4418      }
4419    else if (newline < 0)
4420      {
4421      md->nltype = NLTYPE_ANY;
4422      }
4423    else
4424      {
4425      md->nltype = NLTYPE_FIXED;
4426      if (newline > 255)
4427        {
4428        md->nllen = 2;
4429        md->nl[0] = (newline >> 8) & 255;
4430        md->nl[1] = newline & 255;
4431        }
4432      else
4433        {
4434        md->nllen = 1;
4435        md->nl[0] = newline;
4436        }
4437      }
4438    
4439  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4440  moment. */  moment. */
4441    
4442  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4443    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4444    
4445  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4446  back the character offset. */  back the character offset. */
4447    
4448  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4449  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4450    {    {
4451    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4452      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3350  ocount = offsetcount - (offsetcount % 3) Line 4478  ocount = offsetcount - (offsetcount % 3)
4478  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
4479    {    {
4480    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
4481    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4482    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4483    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
4484    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
4485    }    }
4486  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
4487    
4488  match_block.offset_end = ocount;  md->offset_end = ocount;
4489  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
4490  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
4491  match_block.capture_last = -1;  md->capture_last = -1;
4492    
4493  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
4494  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3373  if (resetcount > offsetcount) resetcount Line 4501  if (resetcount > offsetcount) resetcount
4501  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
4502  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
4503    
4504  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
4505    {    {
4506    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
4507    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
4508    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
4509    }    }
# Line 3392  if (!anchored) Line 4520  if (!anchored)
4520      {      {
4521      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4522      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4523        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
4524      }      }
4525    else    else
4526      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3410  if ((re->options & PCRE_REQCHSET) != 0) Line 4538  if ((re->options & PCRE_REQCHSET) != 0)
4538    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4539    }    }
4540    
4541    
4542    /* ==========================================================================*/
4543    
4544  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4545  the loop runs just once. */  the loop runs just once. */
4546    
4547  do  for(;;)
4548    {    {
4549    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4550    
4551    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4552    
4553    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
4554      {      {
4555      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
4556      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4557      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4558      }      }
4559    
4560    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
4561    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
4562    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
4563    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
4564    */    the match fails at the newline, later code breaks this loop. */
4565    
4566    if (firstline)    if (firstline)
4567      {      {
4568      const uschar *t = start_match;      USPTR t = start_match;
4569      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4570      end_subject = t;      end_subject = t;
4571      }      }
4572    
# Line 3445  do Line 4576  do
4576      {      {
4577      if (first_byte_caseless)      if (first_byte_caseless)
4578        while (start_match < end_subject &&        while (start_match < end_subject &&
4579               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4580          start_match++;          start_match++;
4581      else      else
4582        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4583          start_match++;          start_match++;
4584      }      }
4585    
4586    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4587    
4588    else if (startline)    else if (startline)
4589      {      {
4590      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4591        {        {
4592        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4593            start_match++;
4594    
4595          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4596          and we are now at a LF, advance the match position by one more character.
4597          */
4598    
4599          if (start_match[-1] == '\r' &&
4600               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4601               start_match < end_subject &&
4602               *start_match == '\n')
4603          start_match++;          start_match++;
4604        }        }
4605      }      }
# Line 3480  do Line 4621  do
4621    
4622  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4623    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4624    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4625    printf("\n");    printf("\n");
4626  #endif  #endif
4627    
# Line 3494  do Line 4635  do
4635    
4636    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4637    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4638    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4639    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4640    
4641    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4642    */    */
4643    
4644    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4645        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4646        !match_block.partial)        !md->partial)
4647      {      {
4648      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4649    
4650      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
4651      place we found it at last time. */      place we found it at last time. */
# Line 3527  do Line 4668  do
4668            }            }
4669          }          }
4670    
4671        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4672          forcing a match failure. */
4673    
4674        if (p >= end_subject) break;        if (p >= end_subject)
4675            {
4676            rc = MATCH_NOMATCH;
4677            break;
4678            }
4679    
4680        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4681        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3539  do Line 4685  do
4685        }        }
4686      }      }
4687    
4688    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4689    
4690    if (rc == MATCH_NOMATCH)    md->start_match_ptr = start_match;      /* Insurance */
4691      {    md->match_call_count = 0;
4692      if (firstline && *start_match == NEWLINE) break;    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4693      start_match++;  
4694      /* Any return other than MATCH_NOMATCH breaks the loop. */
4695    
4696      if (rc != MATCH_NOMATCH) break;
4697    
4698      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4699      newline in the subject (though it may continue over the newline). Therefore,
4700      if we have just failed to match, starting at a newline, do not continue. */
4701    
4702      if (firstline && IS_NEWLINE(start_match)) break;
4703    
4704      /* Advance the match position by one character. */
4705    
4706      start_match++;
4707  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4708      if (match_block.utf8)    if (utf8)
4709        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4710          start_match++;        start_match++;
4711  #endif  #endif
     continue;  
     }  
4712    
4713    if (rc != MATCH_MATCH)    /* Break the loop if the pattern is anchored or if we have passed the end of
4714      {    the subject. */
     DPRINTF((">>>> error: returning %d\n", rc));  
     return rc;  
     }  
4715    
4716    /* We have a match! Copy the offset information from temporary store if    if (anchored || start_match > end_subject) break;
4717    necessary */  
4718      /* If we have just passed a CR and the newline option is CRLF or ANY or
4719      ANYCRLF, and we are now at a LF, advance the match position by one more
4720      character. */
4721    
4722      if (start_match[-1] == '\r' &&
4723           (md->nltype == NLTYPE_ANY ||
4724            md->nltype == NLTYPE_ANYCRLF ||
4725            md->nllen == 2) &&
4726           start_match < end_subject &&
4727           *start_match == '\n')
4728        start_match++;
4729    
4730      }   /* End of for(;;) "bumpalong" loop */
4731    
4732    /* ==========================================================================*/
4733    
4734    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4735    conditions is true:
4736    
4737    (1) The pattern is anchored;
4738    
4739    (2) We are past the end of the subject;
4740    
4741    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4742        this option requests that a match occur at or before the first newline in
4743        the subject.
4744    
4745    When we have a match and the offset vector is big enough to deal with any
4746    backreferences, captured substring offsets will already be set up. In the case
4747    where we had to get some local store to hold offsets for backreference
4748    processing, copy those that we can. In this case there need not be overflow if
4749    certain parts of the pattern were not used, even though there are more
4750    capturing parentheses than vector slots. */
4751    
4752    if (rc == MATCH_MATCH)
4753      {
4754    if (using_temporary_offsets)    if (using_temporary_offsets)
4755      {      {
4756      if (offsetcount >= 4)      if (offsetcount >= 4)
4757        {        {
4758        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4759          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4760        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4761        }        }
4762      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4763      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4764      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4765      }      }
4766    
4767    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4768      too many to fit into the vector. */
4769    
4770      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4771    
4772      /* If there is space, set up the whole thing as substring 0. The value of
4773      md->start_match_ptr might be modified if \K was encountered on the success
4774      matching path. */
4775    
4776    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4777      {      {
4778      offsets[0] = start_match - match_block.start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4779      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4780      }      }
4781    
4782    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4783    return rc;    return rc;
4784    }    }
4785    
4786  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4787    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4788    
4789  if (using_temporary_offsets)  if (using_temporary_offsets)
4790    {    {
4791    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4792    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4793    }    }
4794    
4795  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4796      {
4797      DPRINTF((">>>> error: returning %d\n", rc));
4798      return rc;
4799      }
4800    else if (md->partial && md->hitend)
4801    {    {
4802    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4803    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.85  
changed lines
  Added in v.200

  ViewVC Help
Powered by ViewVC 1.1.5