/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 172 by ph10, Tue Jun 5 10:40:13 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* Undefine some potentially clashing cpp symbols */
52    
53  /* Structure for building a chain of data that actually lives on the  #undef min
54  stack, for holding the values of the subject pointer at the start of each  #undef max
55  subpattern, so as to detect when an empty string has been matched by a  
56  subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
57  are on the heap, not on the stack. */  obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
58    
59  typedef struct eptrblock {  #define EPTR_WORK_SIZE (1000)
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
60    
61  /* Flag bits for the match() function */  /* Flag bits for the match() function */
62    
63  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
64  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
65    #define match_tail_recursed  0x04  /* Tail recursive call */
66    
67  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
68  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 103  Returns:     nothing
103  static void  static void
104  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
105  {  {
106  int c;  unsigned int c;
107  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
108  while (length-- > 0)  while (length-- > 0)
109    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 130  Returns:      TRUE if matched
130  */  */
131    
132  static BOOL  static BOOL
133  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
134    unsigned long int ims)    unsigned long int ims)
135  {  {
136  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
137    
138  #ifdef DEBUG  #ifdef DEBUG
139  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 171  return TRUE;
171  ****************************************************************************  ****************************************************************************
172                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
173    
174  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
175  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
176  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
177  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
178  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
179    fine.
180  It turns out that on non-Unix systems there are problems with programs that  
181  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
182  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
183  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
184    been known for decades.) So....
185    
186  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
187  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
188  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
189  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
190  always used to.  always used to.
191    
192    The original heap-recursive code used longjmp(). However, it seems that this
193    can be very slow on some operating systems. Following a suggestion from Stan
194    Switzer, the use of longjmp() has been abolished, at the cost of having to
195    provide a unique number for each call to RMATCH. There is no way of generating
196    a sequence of numbers at compile time in C. I have given them names, to make
197    them stand out more clearly.
198    
199    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
200    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
201    tests. Furthermore, not using longjmp() means that local dynamic variables
202    don't have indeterminate values; this has meant that the frame size can be
203    reduced because the result can be "passed back" by straight setting of the
204    variable instead of being passed in the frame.
205  ****************************************************************************  ****************************************************************************
206  ***************************************************************************/  ***************************************************************************/
207    
208    
209  /* These versions of the macros use the stack, as normal */  /* Numbers for RMATCH calls */
210    
211    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
212           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
213           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
214           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
215           RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };
216    
217    
218    /* These versions of the macros use the stack, as normal. There are debugging
219    versions and production versions. Note that the "rw" argument of RMATCH isn't
220    actuall used in this definition. */
221    
222  #ifndef NO_RECURSE  #ifndef NO_RECURSE
223  #define REGISTER register  #define REGISTER register
224  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
225    #ifdef DEBUG
226    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
227      { \
228      printf("match() called in line %d\n", __LINE__); \
229      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
230      printf("to line %d\n", __LINE__); \
231      }
232    #define RRETURN(ra) \
233      { \
234      printf("match() returned %d from line %d ", ra, __LINE__); \
235      return ra; \
236      }
237    #else
238    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
239      rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
240  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
241    #endif
242    
243  #else  #else
244    
245    
246  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
247  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
248  match(), which never changes. */  argument of match(), which never changes. */
249    
250  #define REGISTER  #define REGISTER
251    
252  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
253    {\    {\
254    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
255    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
256      {\    newframe->Xeptr = ra;\
257      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
258      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
259      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
260      newframe->Xims = re;\    newframe->Xims = re;\
261      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
262      newframe->Xflags = rg;\    newframe->Xflags = rg;\
263      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
264      frame = newframe;\    newframe->Xprevframe = frame;\
265      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
266      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
267      }\    goto HEAP_RECURSE;\
268    else\    L_##rw:\
269      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
270    }    }
271    
272  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 235  match(), which never changes. */ Line 276  match(), which never changes. */
276    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
277    if (frame != NULL)\    if (frame != NULL)\
278      {\      {\
279      frame->Xresult = ra;\      rrc = ra;\
280      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
281      }\      }\
282    return ra;\    return ra;\
283    }    }
# Line 252  typedef struct heapframe { Line 292  typedef struct heapframe {
292    
293    const uschar *Xeptr;    const uschar *Xeptr;
294    const uschar *Xecode;    const uschar *Xecode;
295      const uschar *Xmstart;
296    int Xoffset_top;    int Xoffset_top;
297    long int Xims;    long int Xims;
298    eptrblock *Xeptrb;    eptrblock *Xeptrb;
299    int Xflags;    int Xflags;
300      unsigned int Xrdepth;
301    
302    /* Function local variables */    /* Function local variables */
303    
# Line 271  typedef struct heapframe { Line 313  typedef struct heapframe {
313    
314    BOOL Xcur_is_word;    BOOL Xcur_is_word;
315    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
316    BOOL Xprev_is_word;    BOOL Xprev_is_word;
317    
318    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
319    
320  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
321    int Xprop_type;    int Xprop_type;
322      int Xprop_value;
323    int Xprop_fail_result;    int Xprop_fail_result;
324    int Xprop_category;    int Xprop_category;
325    int Xprop_chartype;    int Xprop_chartype;
326    int Xprop_othercase;    int Xprop_script;
327    int Xprop_test_against;    int Xoclength;
328    int *Xprop_test_variable;    uschar Xocchars[8];
329  #endif  #endif
330    
331    int Xctype;    int Xctype;
332    int Xfc;    unsigned int Xfc;
333    int Xfi;    int Xfi;
334    int Xlength;    int Xlength;
335    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 343  typedef struct heapframe {
343    
344    eptrblock Xnewptrb;    eptrblock Xnewptrb;
345    
346    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
347    
348    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
349    
350  } heapframe;  } heapframe;
351    
# Line 320  typedef struct heapframe { Line 361  typedef struct heapframe {
361  *         Match from current position            *  *         Match from current position            *
362  *************************************************/  *************************************************/
363    
364  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
365  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
366  same response.  same response.
367    
# Line 333  performance. Tests using gcc on a SPARC Line 371  performance. Tests using gcc on a SPARC
371  made performance worse.  made performance worse.
372    
373  Arguments:  Arguments:
374     eptr        pointer in subject     eptr        pointer to current character in subject
375     ecode       position in code     ecode       pointer to current position in compiled code
376       mstart      pointer to the current match start position (can be modified
377                     by encountering \K)
378     offset_top  current top pointer     offset_top  current top pointer
379     md          pointer to "static" info for the match     md          pointer to "static" info for the match
380     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 382  Arguments:
382                   brackets - for testing for empty matches                   brackets - for testing for empty matches
383     flags       can contain     flags       can contain
384                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
385                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
386                       group that can match an empty string
387                     match_tail_recursed - this is a tail_recursed group
388       rdepth      the recursion depth
389    
390  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
391                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
392                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
393                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
394  */  */
395    
396  static int  static int
397  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
398    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
399    int flags)    int flags, unsigned int rdepth)
400  {  {
401  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
402  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
403  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
404    
405    register int  rrc;         /* Returns from recursive calls */
406    register int  i;           /* Used for loops not involving calls to RMATCH() */
407    register unsigned int c;   /* Character values not kept over RMATCH() calls */
408    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
409    
410  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
 register int  i;      /* Used for loops not involving calls to RMATCH() */  
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
411    
412  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
413  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 377  frame->Xprevframe = NULL;            /* Line 422  frame->Xprevframe = NULL;            /*
422    
423  frame->Xeptr = eptr;  frame->Xeptr = eptr;
424  frame->Xecode = ecode;  frame->Xecode = ecode;
425    frame->Xmstart = mstart;
426  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
427  frame->Xims = ims;  frame->Xims = ims;
428  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
429  frame->Xflags = flags;  frame->Xflags = flags;
430    frame->Xrdepth = rdepth;
431    
432  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
433    
# Line 390  HEAP_RECURSE: Line 437  HEAP_RECURSE:
437    
438  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
439  #define ecode              frame->Xecode  #define ecode              frame->Xecode
440    #define mstart             frame->Xmstart
441  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
442  #define ims                frame->Xims  #define ims                frame->Xims
443  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
444  #define flags              frame->Xflags  #define flags              frame->Xflags
445    #define rdepth             frame->Xrdepth
446    
447  /* Ditto for the local variables */  /* Ditto for the local variables */
448    
# Line 411  HEAP_RECURSE: Line 460  HEAP_RECURSE:
460    
461  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
462  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
463  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
464    
465  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
466    
467  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
468  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
469    #define prop_value         frame->Xprop_value
470  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
471  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
472  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
473  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
474  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
475  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
476  #endif  #endif
477    
478  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 496  HEAP_RECURSE:
496  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
497  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
498    
499  #else  #else         /* NO_RECURSE not defined */
500  #define fi i  #define fi i
501  #define fc c  #define fc c
502    
503    
504  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
505  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
506  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
507  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
508  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
509  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
510  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
511  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
512  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
513                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
514  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
515                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
516  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
517  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
518  BOOL prev_is_word;  BOOL prev_is_word;
519    
520  unsigned long int original_ims;  unsigned long int original_ims;
521    
522  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
523  int prop_type;  int prop_type;
524    int prop_value;
525  int prop_fail_result;  int prop_fail_result;
526  int prop_category;  int prop_category;
527  int prop_chartype;  int prop_chartype;
528  int prop_othercase;  int prop_script;
529  int prop_test_against;  int oclength;
530  int *prop_test_variable;  uschar occhars[8];
531  #endif  #endif
532    
533  int ctype;  int ctype;
# Line 493  int save_offset1, save_offset2, save_off Line 542  int save_offset1, save_offset2, save_off
542  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
543    
544  eptrblock newptrb;  eptrblock newptrb;
545  #endif  #endif     /* NO_RECURSE */
546    
547  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
548  variables. */  variables. */
549    
550  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
551    prop_value = 0;
552  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
553  #endif  #endif
554    
555  /* OK, now we can get on with the real code of the function. Recursion is  
556  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
557  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
558  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
559  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
560  performance when true recursion is being used. */  
561    TAIL_RECURSE:
562    
563    /* OK, now we can get on with the real code of the function. Recursive calls
564    are specified by the macro RMATCH and RRETURN is used to return. When
565    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
566    and a "return", respectively (possibly with some debugging if DEBUG is
567    defined). However, RMATCH isn't like a function call because it's quite a
568    complicated macro. It has to be used in one particular way. This shouldn't,
569    however, impact performance when true recursion is being used. */
570    
571    #ifdef SUPPORT_UTF8
572    utf8 = md->utf8;       /* Local copy of the flag */
573    #else
574    utf8 = FALSE;
575    #endif
576    
577    /* First check that we haven't called match() too many times, or that we
578    haven't exceeded the recursive call limit. */
579    
580  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
581    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
582    
583  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
584    
585  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
586  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
587  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
588  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
589    When match() is called in other circumstances, don't add to the chain. If this
590    is a tail recursion, use a block from the workspace, as the one on the stack is
591    already used. */
592    
593  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
594    {    {
595    newptrb.epb_prev = eptrb;    eptrblock *p;
596    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
597    eptrb = &newptrb;      {
598        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
599        p = md->eptrchain + md->eptrn++;
600        }
601      else p = &newptrb;
602      p->epb_saved_eptr = eptr;
603      p->epb_prev = eptrb;
604      eptrb = p;
605    }    }
606    
607  /* Now start processing the operations. */  /* Now start processing the opcodes. */
608    
609  for (;;)  for (;;)
610    {    {
611      minimize = possessive = FALSE;
612    op = *ecode;    op = *ecode;
   minimize = FALSE;  
613    
614    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
615    matching at least one subject character. */    matching at least one subject character. */
616    
617    if (md->partial &&    if (md->partial &&
618        eptr >= md->end_subject &&        eptr >= md->end_subject &&
619        eptr > md->start_match)        eptr > mstart)
620      md->hitend = TRUE;      md->hitend = TRUE;
621    
622    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
623      {      {
624      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
625        the current subject position in the working slot at the top of the vector.
626      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
627      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
628        reference inside the group.
629      if (number > EXTRACT_BASIC_MAX)  
630        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
631        values of the final offsets, in case they were set by a previous iteration
632        of the same bracket.
633    
634        If there isn't enough space in the offset vector, treat this as if it were
635        a non-capturing bracket. Don't worry about setting the flag for the error
636        case here; that is handled in the code for KET. */
637    
638        case OP_CBRA:
639        case OP_SCBRA:
640        number = GET2(ecode, 1+LINK_SIZE);
641      offset = number << 1;      offset = number << 1;
642    
643  #ifdef DEBUG  #ifdef DEBUG
644      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
645        printf("subject=");
646      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
647      printf("\n");      printf("\n");
648  #endif  #endif
# Line 584  for (;;) Line 657  for (;;)
657        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
658        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
659    
660          flags = (op == OP_SCBRA)? match_cbegroup : 0;
661        do        do
662          {          {
663          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
664            match_isgroup);            ims, eptrb, flags, RM1);
665          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
666          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
667          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 603  for (;;) Line 677  for (;;)
677        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
678        }        }
679    
680      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
681        bracket. */
682    
683      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
684    
685    /* Other types of node can be handled by a switch */      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
686        final alternative within the brackets, we would return the result of a
687        recursive call to match() whatever happened. We can reduce stack usage by
688        turning this into a tail recursion. */
689    
690    switch(op)      case OP_BRA:
691      {      case OP_SBRA:
692      case OP_BRA:     /* Non-capturing bracket: optimized */      DPRINTF(("start non-capturing bracket\n"));
693      DPRINTF(("start bracket 0\n"));      flags = (op >= OP_SBRA)? match_cbegroup : 0;
694      do      for (;;)
695        {        {
696        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)
697          match_isgroup);          {
698            ecode += _pcre_OP_lengths[*ecode];
699            flags |= match_tail_recursed;
700            DPRINTF(("bracket 0 tail recursion\n"));
701            goto TAIL_RECURSE;
702            }
703    
704          /* For non-final alternatives, continue the loop for a NOMATCH result;
705          otherwise return. */
706    
707          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
708            eptrb, flags, RM2);
709        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
710        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
711        }        }
712      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
713    
714      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
715      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
716      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
717      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
718        obeyed, we can use tail recursion to avoid using another stack frame. */
719    
720      case OP_COND:      case OP_COND:
721      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
722        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
723          {
724          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
725          condition = md->recursive != NULL &&
726            (offset == RREF_ANY || offset == md->recursive->group_num);
727          ecode += condition? 3 : GET(ecode, 1);
728          }
729    
730        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
731        {        {
732        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
733        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
734          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
735          (offset < offset_top && md->offset_vector[offset] >= 0);        }
736        RMATCH(rrc, eptr, ecode + (condition?  
737          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
738          offset_top, md, ims, eptrb, match_isgroup);        {
739        RRETURN(rrc);        condition = FALSE;
740          ecode += GET(ecode, 1);
741        }        }
742    
743      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
744      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
745        assertion. */
746    
747      else      else
748        {        {
749        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
750            match_condassert | match_isgroup);            match_condassert, RM3);
751        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
752          {          {
753          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
754            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
755          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
756          }          }
757        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
758          {          {
759          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
760          }          }
761        else ecode += GET(ecode, 1);        else
762        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
763          match_isgroup);          condition = FALSE;
764        RRETURN(rrc);          ecode += GET(ecode, 1);
765            }
766        }        }
     /* Control never reaches here */  
767    
768      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
769      encountered. */      we can use tail recursion to avoid using another stack frame. If the second
770        alternative doesn't exist, we can just plough on. */
771    
772      case OP_CREF:      if (condition || *ecode == OP_ALT)
773      case OP_BRANUMBER:        {
774      ecode += 3;        ecode += 1 + LINK_SIZE;
775          flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
776          goto TAIL_RECURSE;
777          }
778        else
779          {
780          ecode += 1 + LINK_SIZE;
781          }
782      break;      break;
783    
784      /* End of the pattern. If we are in a recursion, we should restore the  
785      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
786        restore the offsets appropriately and continue from after the call. */
787    
788      case OP_END:      case OP_END:
789      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
790        {        {
791        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
792        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
793        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
794        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
795          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
796        md->start_match = rec->save_start;        mstart = rec->save_start;
797        ims = original_ims;        ims = original_ims;
798        ecode = rec->after_call;        ecode = rec->after_call;
799        break;        break;
# Line 694  for (;;) Line 802  for (;;)
802      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
803      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
804    
805      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
806      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
807      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
808        md->start_match_ptr = mstart;  /* and the start (\K can modify) */
809      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
810    
811      /* Change option settings */      /* Change option settings */
# Line 717  for (;;) Line 826  for (;;)
826      case OP_ASSERTBACK:      case OP_ASSERTBACK:
827      do      do
828        {        {
829        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
830          match_isgroup);          RM4);
831        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
832        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
833        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 744  for (;;) Line 853  for (;;)
853      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
854      do      do
855        {        {
856        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
857          match_isgroup);          RM5);
858        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
859        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
860        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 766  for (;;) Line 875  for (;;)
875  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
876      if (utf8)      if (utf8)
877        {        {
878        c = GET(ecode,1);        i = GET(ecode, 1);
879        for (i = 0; i < c; i++)        while (i-- > 0)
880          {          {
881          eptr--;          eptr--;
882          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 780  for (;;) Line 889  for (;;)
889      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
890    
891        {        {
892        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
893        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
894        }        }
895    
# Line 800  for (;;) Line 909  for (;;)
909        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
910        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
911        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
912        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
913        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
914        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
915        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
916        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
917        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 837  for (;;) Line 946  for (;;)
946      case OP_RECURSE:      case OP_RECURSE:
947        {        {
948        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
949        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
950            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
951    
952        /* Add to "recursing stack" */        /* Add to "recursing stack" */
953    
# Line 869  for (;;) Line 973  for (;;)
973    
974        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
975              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
976        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
977        md->start_match = eptr;        mstart = eptr;
978    
979        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
980        restore the offset and recursion data. */        restore the offset and recursion data. */
981    
982        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
983          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
984        do        do
985          {          {
986          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
987              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
988          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
989            {            {
990              DPRINTF(("Recursion matched\n"));
991            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
992            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
993              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
994            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
995            }            }
996          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH)
997              {
998              DPRINTF(("Recursion gave error %d\n", rrc));
999              RRETURN(rrc);
1000              }
1001    
1002          md->recursive = &new_recursive;          md->recursive = &new_recursive;
1003          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 1022  for (;;)
1022      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1023    
1024      case OP_ONCE:      case OP_ONCE:
1025        {      prev = ecode;
1026        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1027    
1028        do      do
1029          {        {
1030          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1031            eptrb, match_isgroup);          eptrb, 0, RM7);
1032          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1033          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1034          ecode += GET(ecode,1);        ecode += GET(ecode,1);
1035          }        }
1036        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1037    
1038        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1039    
1040        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1041    
1042        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1043        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1044    
1045        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1046    
1047        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1048        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1049    
1050        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1051        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1052        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1053        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1054        course of events. */      course of events. */
1055    
1056        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1057          {        {
1058          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1059          break;        break;
1060          }        }
1061    
1062        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1063        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1064        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1065        opcode. */      any options that changed within the bracket before re-running it, so
1066        check the next opcode. */
1067    
1068        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1069          {        {
1070          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1071          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1072          }        }
1073    
1074        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1075          {        {
1076          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1077          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RM8);
1078          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1079          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode = prev;
1080          }        flags = match_tail_recursed;
1081        else  /* OP_KETRMAX */        goto TAIL_RECURSE;
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1082        }        }
1083      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1084          {
1085          RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1086          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1087          ecode += 1 + LINK_SIZE;
1088          flags = match_tail_recursed;
1089          goto TAIL_RECURSE;
1090          }
1091        /* Control never gets here */
1092    
1093      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1094      bracketed group and go to there. */      bracketed group and go to there. */
# Line 994  for (;;) Line 1106  for (;;)
1106      case OP_BRAZERO:      case OP_BRAZERO:
1107        {        {
1108        next = ecode+1;        next = ecode+1;
1109        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1110        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1111        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1112        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1113        }        }
1114      break;      break;
1115    
1116      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1117        {        {
1118        next = ecode+1;        next = ecode+1;
1119        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1120        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1121        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1122        ecode++;        ecode++;
1123        }        }
1124      break;      break;
1125    
1126      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1127    
1128      case OP_KET:      case OP_KET:
1129      case OP_KETRMIN:      case OP_KETRMIN:
1130      case OP_KETRMAX:      case OP_KETRMAX:
1131        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
   
       /* Back up the stack of bracket start pointers. */  
1132    
1133        eptrb = eptrb->epb_prev;      /* If this was a group that remembered the subject start, in order to break
1134        infinite repeats of empty string matches, retrieve the subject start from
1135        the chain. Otherwise, set it NULL. */
1136    
1137        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev >= OP_SBRA)
1138            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        {
1139            *prev == OP_ONCE)        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1140          {        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1141          md->end_match_ptr = eptr;      /* For ONCE */        }
1142          md->end_offset_top = offset_top;      else saved_eptr = NULL;
         RRETURN(MATCH_MATCH);  
         }  
1143    
1144        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1145        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1146        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1147    
1148        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1149          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1150          number = *prev - OP_BRA;          *prev == OP_ONCE)
1151          {
1152          md->end_match_ptr = eptr;      /* For ONCE */
1153          md->end_offset_top = offset_top;
1154          RRETURN(MATCH_MATCH);
1155          }
1156    
1157          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1158          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1159        bumping the high water mark. Note that whole-pattern recursion is coded as
1160        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1161        when the OP_END is reached. Other recursion is handled here. */
1162    
1163          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1164          offset = number << 1;        {
1165          number = GET2(prev, 1+LINK_SIZE);
1166          offset = number << 1;
1167    
1168  #ifdef DEBUG  #ifdef DEBUG
1169          printf("end bracket %d", number);        printf("end bracket %d", number);
1170          printf("\n");        printf("\n");
1171  #endif  #endif
1172    
1173          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1174          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1175          into group 0, so it won't be picked up here. Instead, we catch it when          {
1176          the OP_END is reached. */          md->offset_vector[offset] =
1177              md->offset_vector[md->offset_end - number];
1178          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1179            {          if (offset_top <= offset) offset_top = offset + 2;
1180            md->capture_last = number;          }
1181            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1182              {        /* Handle a recursively called group. Restore the offsets
1183              md->offset_vector[offset] =        appropriately and continue from after the call. */
1184                md->offset_vector[md->offset_end - number];  
1185              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1186              if (offset_top <= offset) offset_top = offset + 2;          {
1187              }          recursion_info *rec = md->recursive;
1188            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1189            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1190            appropriately and continue from after the call. */          mstart = rec->save_start;
1191            memcpy(md->offset_vector, rec->offset_save,
1192            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1193              {          ecode = rec->after_call;
1194              recursion_info *rec = md->recursive;          ims = original_ims;
1195              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1196          }          }
1197          }
1198    
1199        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1200        the group. */      flags, in case they got changed during the group. */
1201    
1202        ims = original_ims;      ims = original_ims;
1203        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1204    
1205        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1206        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1207        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1208        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1209        course of events. */      course of events. */
1210    
1211        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1212          {        {
1213          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1214          break;        break;
1215          }        }
1216    
1217        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1218        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1219        tail recursion to avoid using another stack frame. */
1220    
1221        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       }  
1222    
1223      RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KETRMIN)
1224          {
1225          RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1226            RM12);
1227          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1228          ecode = prev;
1229          flags |= match_tail_recursed;
1230          goto TAIL_RECURSE;
1231          }
1232        else  /* OP_KETRMAX */
1233          {
1234          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1235          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1236          ecode += 1 + LINK_SIZE;
1237          flags = match_tail_recursed;
1238          goto TAIL_RECURSE;
1239          }
1240        /* Control never gets here */
1241    
1242      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1243    
# Line 1135  for (;;) Line 1245  for (;;)
1245      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1246      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1247        {        {
1248        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1249              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1250          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1251        ecode++;        ecode++;
1252        break;        break;
# Line 1156  for (;;) Line 1267  for (;;)
1267      ecode++;      ecode++;
1268      break;      break;
1269    
1270        /* Reset the start of match point */
1271    
1272        case OP_SET_SOM:
1273        mstart = eptr;
1274        ecode++;
1275        break;
1276    
1277      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1278      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1279    
# Line 1163  for (;;) Line 1281  for (;;)
1281      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1282        {        {
1283        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1284          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1285        else        else
1286          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1287        ecode++;        ecode++;
# Line 1174  for (;;) Line 1292  for (;;)
1292        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1293        if (!md->endonly)        if (!md->endonly)
1294          {          {
1295          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1296             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1297            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1298          ecode++;          ecode++;
1299          break;          break;
1300          }          }
1301        }        }
1302      /* ... else fall through */      /* ... else fall through for endonly */
1303    
1304      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1305    
# Line 1193  for (;;) Line 1311  for (;;)
1311      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1312    
1313      case OP_EODN:      case OP_EODN:
1314      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1315         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1316          RRETURN(MATCH_NOMATCH);
1317      ecode++;      ecode++;
1318      break;      break;
1319    
# Line 1247  for (;;) Line 1366  for (;;)
1366      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1367    
1368      case OP_ANY:      case OP_ANY:
1369      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1370        RRETURN(MATCH_NOMATCH);        {
1371          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1372          }
1373      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1374      if (utf8)      if (utf8)
1375        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1376      ecode++;      ecode++;
1377      break;      break;
1378    
# Line 1343  for (;;) Line 1462  for (;;)
1462      ecode++;      ecode++;
1463      break;      break;
1464    
1465        case OP_ANYNL:
1466        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1467        GETCHARINCTEST(c, eptr);
1468        switch(c)
1469          {
1470          default: RRETURN(MATCH_NOMATCH);
1471          case 0x000d:
1472          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1473          break;
1474          case 0x000a:
1475          case 0x000b:
1476          case 0x000c:
1477          case 0x0085:
1478          case 0x2028:
1479          case 0x2029:
1480          break;
1481          }
1482        ecode++;
1483        break;
1484    
1485  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1486      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1487      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1352  for (;;) Line 1491  for (;;)
1491      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1492      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1493        {        {
1494        int chartype, rqdtype;        int chartype, script;
1495        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
1496    
1497        if (rqdtype >= 128)        switch(ecode[1])
1498          {          {
1499          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1500            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1501            break;
1502    
1503            case PT_LAMP:
1504            if ((chartype == ucp_Lu ||
1505                 chartype == ucp_Ll ||
1506                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1507            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1508          }           break;
1509        else  
1510          {          case PT_GC:
1511          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1512              RRETURN(MATCH_NOMATCH);
1513            break;
1514    
1515            case PT_PC:
1516            if ((ecode[2] != chartype) == (op == OP_PROP))
1517              RRETURN(MATCH_NOMATCH);
1518            break;
1519    
1520            case PT_SC:
1521            if ((ecode[2] != script) == (op == OP_PROP))
1522            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1523            break;
1524    
1525            default:
1526            RRETURN(PCRE_ERROR_INTERNAL);
1527          }          }
1528    
1529          ecode += 3;
1530        }        }
1531      break;      break;
1532    
# Line 1379  for (;;) Line 1537  for (;;)
1537      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1538      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1539        {        {
1540        int chartype;        int chartype, script;
1541        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1542        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1543        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1544          {          {
# Line 1390  for (;;) Line 1547  for (;;)
1547            {            {
1548            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1549            }            }
1550          category = _pcre_ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1551          if (category != ucp_M) break;          if (category != ucp_M) break;
1552          eptr += len;          eptr += len;
1553          }          }
# Line 1480  for (;;) Line 1637  for (;;)
1637          {          {
1638          for (fi = min;; fi++)          for (fi = min;; fi++)
1639            {            {
1640            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1641            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1642            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1643              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1501  for (;;) Line 1658  for (;;)
1658            }            }
1659          while (eptr >= pp)          while (eptr >= pp)
1660            {            {
1661            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1662            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1663            eptr -= length;            eptr -= length;
1664            }            }
# Line 1606  for (;;) Line 1763  for (;;)
1763            {            {
1764            for (fi = min;; fi++)            for (fi = min;; fi++)
1765              {              {
1766              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1767              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1768              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1769              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1626  for (;;) Line 1783  for (;;)
1783            {            {
1784            for (fi = min;; fi++)            for (fi = min;; fi++)
1785              {              {
1786              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1787              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1788              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1789              c = *eptr++;              c = *eptr++;
# Line 1663  for (;;) Line 1820  for (;;)
1820              }              }
1821            for (;;)            for (;;)
1822              {              {
1823              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1824              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1825              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1826              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1682  for (;;) Line 1839  for (;;)
1839              }              }
1840            while (eptr >= pp)            while (eptr >= pp)
1841              {              {
1842              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
1843              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1844                eptr--;
1845              }              }
1846            }            }
1847    
# Line 1753  for (;;) Line 1910  for (;;)
1910          {          {
1911          for (fi = min;; fi++)          for (fi = min;; fi++)
1912            {            {
1913            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
1914            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1915            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1916            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1777  for (;;) Line 1934  for (;;)
1934            }            }
1935          for(;;)          for(;;)
1936            {            {
1937            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
1938            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1939            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
1940            BACKCHAR(eptr)            BACKCHAR(eptr)
# Line 1836  for (;;) Line 1993  for (;;)
1993    
1994        else        else
1995          {          {
1996          int dc;          unsigned int dc;
1997          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1998          ecode += length;          ecode += length;
1999    
2000          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
2001          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
2002    
2003          if (fc != dc)          if (fc != dc)
2004            {            {
2005  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2006            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
2007  #endif  #endif
2008              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2009            }            }
# Line 1867  for (;;) Line 2020  for (;;)
2020        }        }
2021      break;      break;
2022    
2023      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2024    
2025      case OP_EXACT:      case OP_EXACT:
2026      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2027      ecode += 3;      ecode += 3;
2028      goto REPEATCHAR;      goto REPEATCHAR;
2029    
2030        case OP_POSUPTO:
2031        possessive = TRUE;
2032        /* Fall through */
2033    
2034      case OP_UPTO:      case OP_UPTO:
2035      case OP_MINUPTO:      case OP_MINUPTO:
2036      min = 0;      min = 0;
# Line 1882  for (;;) Line 2039  for (;;)
2039      ecode += 3;      ecode += 3;
2040      goto REPEATCHAR;      goto REPEATCHAR;
2041    
2042        case OP_POSSTAR:
2043        possessive = TRUE;
2044        min = 0;
2045        max = INT_MAX;
2046        ecode++;
2047        goto REPEATCHAR;
2048    
2049        case OP_POSPLUS:
2050        possessive = TRUE;
2051        min = 1;
2052        max = INT_MAX;
2053        ecode++;
2054        goto REPEATCHAR;
2055    
2056        case OP_POSQUERY:
2057        possessive = TRUE;
2058        min = 0;
2059        max = 1;
2060        ecode++;
2061        goto REPEATCHAR;
2062    
2063      case OP_STAR:      case OP_STAR:
2064      case OP_MINSTAR:      case OP_MINSTAR:
2065      case OP_PLUS:      case OP_PLUS:
# Line 1913  for (;;) Line 2091  for (;;)
2091    
2092        if (length > 1)        if (length > 1)
2093          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2094  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2095          int othercase;          unsigned int othercase;
         int chartype;  
2096          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2097               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase > 0)  
2098            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2099            else oclength = 0;
2100  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2101    
2102          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2103            {            {
2104            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2105    #ifdef SUPPORT_UCP
2106            /* Need braces because of following else */            /* Need braces because of following else */
2107            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2108            else            else
# Line 1935  for (;;) Line 2110  for (;;)
2110              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2111              eptr += oclength;              eptr += oclength;
2112              }              }
2113    #else   /* without SUPPORT_UCP */
2114              else { RRETURN(MATCH_NOMATCH); }
2115    #endif  /* SUPPORT_UCP */
2116            }            }
2117    
2118          if (min == max) continue;          if (min == max) continue;
# Line 1943  for (;;) Line 2121  for (;;)
2121            {            {
2122            for (fi = min;; fi++)            for (fi = min;; fi++)
2123              {              {
2124              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2125              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2126              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2127              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2128    #ifdef SUPPORT_UCP
2129              /* Need braces because of following else */              /* Need braces because of following else */
2130              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2131              else              else
# Line 1954  for (;;) Line 2133  for (;;)
2133                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2134                eptr += oclength;                eptr += oclength;
2135                }                }
2136    #else   /* without SUPPORT_UCP */
2137                else { RRETURN (MATCH_NOMATCH); }
2138    #endif  /* SUPPORT_UCP */
2139              }              }
2140            /* Control never gets here */            /* Control never gets here */
2141            }            }
2142          else  
2143            else  /* Maximize */
2144            {            {
2145            pp = eptr;            pp = eptr;
2146            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2147              {              {
2148              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2149              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2150    #ifdef SUPPORT_UCP
2151              else if (oclength == 0) break;              else if (oclength == 0) break;
2152              else              else
2153                {                {
2154                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2155                eptr += oclength;                eptr += oclength;
2156                }                }
2157    #else   /* without SUPPORT_UCP */
2158                else break;
2159    #endif  /* SUPPORT_UCP */
2160              }              }
2161            while (eptr >= pp)  
2162              if (possessive) continue;
2163              for(;;)
2164             {             {
2165             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2166             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2167               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2168    #ifdef SUPPORT_UCP
2169               eptr--;
2170               BACKCHAR(eptr);
2171    #else   /* without SUPPORT_UCP */
2172             eptr -= length;             eptr -= length;
2173    #endif  /* SUPPORT_UCP */
2174             }             }
           RRETURN(MATCH_NOMATCH);  
2175            }            }
2176          /* Control never gets here */          /* Control never gets here */
2177          }          }
# Line 2017  for (;;) Line 2211  for (;;)
2211          {          {
2212          for (fi = min;; fi++)          for (fi = min;; fi++)
2213            {            {
2214            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2215            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2216            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2217                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2025  for (;;) Line 2219  for (;;)
2219            }            }
2220          /* Control never gets here */          /* Control never gets here */
2221          }          }
2222        else        else  /* Maximize */
2223          {          {
2224          pp = eptr;          pp = eptr;
2225          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2227  for (;;)
2227            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2228            eptr++;            eptr++;
2229            }            }
2230            if (possessive) continue;
2231          while (eptr >= pp)          while (eptr >= pp)
2232            {            {
2233            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2234            eptr--;            eptr--;
2235            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2236            }            }
# Line 2054  for (;;) Line 2249  for (;;)
2249          {          {
2250          for (fi = min;; fi++)          for (fi = min;; fi++)
2251            {            {
2252            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2253            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2254            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2255              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2256            }            }
2257          /* Control never gets here */          /* Control never gets here */
2258          }          }
2259        else        else  /* Maximize */
2260          {          {
2261          pp = eptr;          pp = eptr;
2262          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2264  for (;;)
2264            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2265            eptr++;            eptr++;
2266            }            }
2267            if (possessive) continue;
2268          while (eptr >= pp)          while (eptr >= pp)
2269            {            {
2270            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2271            eptr--;            eptr--;
2272            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2273            }            }
# Line 2121  for (;;) Line 2317  for (;;)
2317      ecode += 3;      ecode += 3;
2318      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2319    
2320        case OP_NOTPOSSTAR:
2321        possessive = TRUE;
2322        min = 0;
2323        max = INT_MAX;
2324        ecode++;
2325        goto REPEATNOTCHAR;
2326    
2327        case OP_NOTPOSPLUS:
2328        possessive = TRUE;
2329        min = 1;
2330        max = INT_MAX;
2331        ecode++;
2332        goto REPEATNOTCHAR;
2333    
2334        case OP_NOTPOSQUERY:
2335        possessive = TRUE;
2336        min = 0;
2337        max = 1;
2338        ecode++;
2339        goto REPEATNOTCHAR;
2340    
2341        case OP_NOTPOSUPTO:
2342        possessive = TRUE;
2343        min = 0;
2344        max = GET2(ecode, 1);
2345        ecode += 3;
2346        goto REPEATNOTCHAR;
2347    
2348      case OP_NOTSTAR:      case OP_NOTSTAR:
2349      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2350      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2160  for (;;) Line 2384  for (;;)
2384        /* UTF-8 mode */        /* UTF-8 mode */
2385        if (utf8)        if (utf8)
2386          {          {
2387          register int d;          register unsigned int d;
2388          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2389            {            {
2390            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2185  for (;;) Line 2409  for (;;)
2409          /* UTF-8 mode */          /* UTF-8 mode */
2410          if (utf8)          if (utf8)
2411            {            {
2412            register int d;            register unsigned int d;
2413            for (fi = min;; fi++)            for (fi = min;; fi++)
2414              {              {
2415              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2416              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2417              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2418              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2202  for (;;) Line 2426  for (;;)
2426            {            {
2427            for (fi = min;; fi++)            for (fi = min;; fi++)
2428              {              {
2429              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2430              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2431              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2432                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2221  for (;;) Line 2445  for (;;)
2445          /* UTF-8 mode */          /* UTF-8 mode */
2446          if (utf8)          if (utf8)
2447            {            {
2448            register int d;            register unsigned int d;
2449            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2450              {              {
2451              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2455  for (;;)
2455              if (fc == d) break;              if (fc == d) break;
2456              eptr += len;              eptr += len;
2457              }              }
2458            for(;;)          if (possessive) continue;
2459            for(;;)
2460              {              {
2461              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2462              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2463              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2464              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2248  for (;;) Line 2473  for (;;)
2473              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2474              eptr++;              eptr++;
2475              }              }
2476              if (possessive) continue;
2477            while (eptr >= pp)            while (eptr >= pp)
2478              {              {
2479              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2480              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2481              eptr--;              eptr--;
2482              }              }
# Line 2269  for (;;) Line 2495  for (;;)
2495        /* UTF-8 mode */        /* UTF-8 mode */
2496        if (utf8)        if (utf8)
2497          {          {
2498          register int d;          register unsigned int d;
2499          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2500            {            {
2501            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2292  for (;;) Line 2518  for (;;)
2518          /* UTF-8 mode */          /* UTF-8 mode */
2519          if (utf8)          if (utf8)
2520            {            {
2521            register int d;            register unsigned int d;
2522            for (fi = min;; fi++)            for (fi = min;; fi++)
2523              {              {
2524              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2525              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2526              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2527              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2308  for (;;) Line 2534  for (;;)
2534            {            {
2535            for (fi = min;; fi++)            for (fi = min;; fi++)
2536              {              {
2537              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2538              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2539              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2540                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2327  for (;;) Line 2553  for (;;)
2553          /* UTF-8 mode */          /* UTF-8 mode */
2554          if (utf8)          if (utf8)
2555            {            {
2556            register int d;            register unsigned int d;
2557            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2558              {              {
2559              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2562  for (;;)
2562              if (fc == d) break;              if (fc == d) break;
2563              eptr += len;              eptr += len;
2564              }              }
2565              if (possessive) continue;
2566            for(;;)            for(;;)
2567              {              {
2568              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2569              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2570              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2571              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2353  for (;;) Line 2580  for (;;)
2580              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2581              eptr++;              eptr++;
2582              }              }
2583              if (possessive) continue;
2584            while (eptr >= pp)            while (eptr >= pp)
2585              {              {
2586              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2587              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2588              eptr--;              eptr--;
2589              }              }
# Line 2384  for (;;) Line 2612  for (;;)
2612      ecode += 3;      ecode += 3;
2613      goto REPEATTYPE;      goto REPEATTYPE;
2614    
2615      case OP_TYPESTAR:      case OP_TYPEPOSSTAR:
2616      case OP_TYPEMINSTAR:      possessive = TRUE;
2617      case OP_TYPEPLUS:      min = 0;
2618      case OP_TYPEMINPLUS:      max = INT_MAX;
2619      case OP_TYPEQUERY:      ecode++;
2620      case OP_TYPEMINQUERY:      goto REPEATTYPE;
2621      c = *ecode++ - OP_TYPESTAR;  
2622      minimize = (c & 1) != 0;      case OP_TYPEPOSPLUS:
2623      min = rep_min[c];                 /* Pick up values from tables; */      possessive = TRUE;
2624      max = rep_max[c];                 /* zero for max => infinity */      min = 1;
2625        max = INT_MAX;
2626        ecode++;
2627        goto REPEATTYPE;
2628    
2629        case OP_TYPEPOSQUERY:
2630        possessive = TRUE;
2631        min = 0;
2632        max = 1;
2633        ecode++;
2634        goto REPEATTYPE;
2635    
2636        case OP_TYPEPOSUPTO:
2637        possessive = TRUE;
2638        min = 0;
2639        max = GET2(ecode, 1);
2640        ecode += 3;
2641        goto REPEATTYPE;
2642    
2643        case OP_TYPESTAR:
2644        case OP_TYPEMINSTAR:
2645        case OP_TYPEPLUS:
2646        case OP_TYPEMINPLUS:
2647        case OP_TYPEQUERY:
2648        case OP_TYPEMINQUERY:
2649        c = *ecode++ - OP_TYPESTAR;
2650        minimize = (c & 1) != 0;
2651        min = rep_min[c];                 /* Pick up values from tables; */
2652        max = rep_max[c];                 /* zero for max => infinity */
2653      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2654    
2655      /* Common code for all repeated single character type matches. Note that      /* Common code for all repeated single character type matches. Note that
# Line 2408  for (;;) Line 2664  for (;;)
2664        {        {
2665        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2666        prop_type = *ecode++;        prop_type = *ecode++;
2667        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2668        }        }
2669      else prop_type = -1;      else prop_type = -1;
2670  #endif  #endif
# Line 2434  for (;;) Line 2681  for (;;)
2681      if (min > 0)      if (min > 0)
2682        {        {
2683  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2684        if (prop_type > 0)        if (prop_type >= 0)
2685          {          {
2686          for (i = 1; i <= min; i++)          switch(prop_type)
2687            {            {
2688            GETCHARINC(c, eptr);            case PT_ANY:
2689            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2690            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2691              RRETURN(MATCH_NOMATCH);              {
2692                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2693                GETCHARINC(c, eptr);
2694                }
2695              break;
2696    
2697              case PT_LAMP:
2698              for (i = 1; i <= min; i++)
2699                {
2700                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2701                GETCHARINC(c, eptr);
2702                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2703                if ((prop_chartype == ucp_Lu ||
2704                     prop_chartype == ucp_Ll ||
2705                     prop_chartype == ucp_Lt) == prop_fail_result)
2706                  RRETURN(MATCH_NOMATCH);
2707                }
2708              break;
2709    
2710              case PT_GC:
2711              for (i = 1; i <= min; i++)
2712                {
2713                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2714                GETCHARINC(c, eptr);
2715                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2716                if ((prop_category == prop_value) == prop_fail_result)
2717                  RRETURN(MATCH_NOMATCH);
2718                }
2719              break;
2720    
2721              case PT_PC:
2722              for (i = 1; i <= min; i++)
2723                {
2724                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2725                GETCHARINC(c, eptr);
2726                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2727                if ((prop_chartype == prop_value) == prop_fail_result)
2728                  RRETURN(MATCH_NOMATCH);
2729                }
2730              break;
2731    
2732              case PT_SC:
2733              for (i = 1; i <= min; i++)
2734                {
2735                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2736                GETCHARINC(c, eptr);
2737                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2738                if ((prop_script == prop_value) == prop_fail_result)
2739                  RRETURN(MATCH_NOMATCH);
2740                }
2741              break;
2742    
2743              default:
2744              RRETURN(PCRE_ERROR_INTERNAL);
2745            }            }
2746          }          }
2747    
# Line 2453  for (;;) Line 2753  for (;;)
2753          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2754            {            {
2755            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2756            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2757            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2758            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2759              {              {
# Line 2462  for (;;) Line 2762  for (;;)
2762                {                {
2763                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2764                }                }
2765              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2766              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2767              eptr += len;              eptr += len;
2768              }              }
# Line 2481  for (;;) Line 2781  for (;;)
2781          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2782            {            {
2783            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2784               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2785              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2786              eptr++;
2787            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2788            }            }
2789          break;          break;
# Line 2491  for (;;) Line 2792  for (;;)
2792          eptr += min;          eptr += min;
2793          break;          break;
2794    
2795            case OP_ANYNL:
2796            for (i = 1; i <= min; i++)
2797              {
2798              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2799              GETCHARINC(c, eptr);
2800              switch(c)
2801                {
2802                default: RRETURN(MATCH_NOMATCH);
2803                case 0x000d:
2804                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2805                break;
2806                case 0x000a:
2807                case 0x000b:
2808                case 0x000c:
2809                case 0x0085:
2810                case 0x2028:
2811                case 0x2029:
2812                break;
2813                }
2814              }
2815            break;
2816    
2817          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2818          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2819            {            {
# Line 2559  for (;;) Line 2882  for (;;)
2882  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2883    
2884        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2885        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2886          number of bytes present, as this was tested above. */
2887    
2888        switch(ctype)        switch(ctype)
2889          {          {
# Line 2567  for (;;) Line 2891  for (;;)
2891          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
2892            {            {
2893            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2894              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
2895                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2896                eptr++;
2897                }
2898            }            }
2899          else eptr += min;          else eptr += min;
2900          break;          break;
# Line 2576  for (;;) Line 2903  for (;;)
2903          eptr += min;          eptr += min;
2904          break;          break;
2905    
2906            /* Because of the CRLF case, we can't assume the minimum number of
2907            bytes are present in this case. */
2908    
2909            case OP_ANYNL:
2910            for (i = 1; i <= min; i++)
2911              {
2912              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2913              switch(*eptr++)
2914                {
2915                default: RRETURN(MATCH_NOMATCH);
2916                case 0x000d:
2917                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2918                break;
2919                case 0x000a:
2920                case 0x000b:
2921                case 0x000c:
2922                case 0x0085:
2923                break;
2924                }
2925              }
2926            break;
2927    
2928          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2929          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2930            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2624  for (;;) Line 2973  for (;;)
2973      if (minimize)      if (minimize)
2974        {        {
2975  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2976        if (prop_type > 0)        if (prop_type >= 0)
2977          {          {
2978          for (fi = min;; fi++)          switch(prop_type)
2979            {            {
2980            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
2981            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
2982            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
2983            GETCHARINC(c, eptr);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
2984            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2985            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2986              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
2987                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2988                }
2989              /* Control never gets here */
2990    
2991              case PT_LAMP:
2992              for (fi = min;; fi++)
2993                {
2994                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
2995                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2996                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2997                GETCHARINC(c, eptr);
2998                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2999                if ((prop_chartype == ucp_Lu ||
3000                     prop_chartype == ucp_Ll ||
3001                     prop_chartype == ucp_Lt) == prop_fail_result)
3002                  RRETURN(MATCH_NOMATCH);
3003                }
3004              /* Control never gets here */
3005    
3006              case PT_GC:
3007              for (fi = min;; fi++)
3008                {
3009                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3010                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3011                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3012                GETCHARINC(c, eptr);
3013                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3014                if ((prop_category == prop_value) == prop_fail_result)
3015                  RRETURN(MATCH_NOMATCH);
3016                }
3017              /* Control never gets here */
3018    
3019              case PT_PC:
3020              for (fi = min;; fi++)
3021                {
3022                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3023                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3024                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3025                GETCHARINC(c, eptr);
3026                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3027                if ((prop_chartype == prop_value) == prop_fail_result)
3028                  RRETURN(MATCH_NOMATCH);
3029                }
3030              /* Control never gets here */
3031    
3032              case PT_SC:
3033              for (fi = min;; fi++)
3034                {
3035                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3036                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3037                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3038                GETCHARINC(c, eptr);
3039                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3040                if ((prop_script == prop_value) == prop_fail_result)
3041                  RRETURN(MATCH_NOMATCH);
3042                }
3043              /* Control never gets here */
3044    
3045              default:
3046              RRETURN(PCRE_ERROR_INTERNAL);
3047            }            }
3048          }          }
3049    
# Line 2645  for (;;) Line 3054  for (;;)
3054          {          {
3055          for (fi = min;; fi++)          for (fi = min;; fi++)
3056            {            {
3057            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3058            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3059            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3060            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3061            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3062            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3063            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3064              {              {
# Line 2658  for (;;) Line 3067  for (;;)
3067                {                {
3068                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3069                }                }
3070              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3071              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3072              eptr += len;              eptr += len;
3073              }              }
# Line 2674  for (;;) Line 3083  for (;;)
3083          {          {
3084          for (fi = min;; fi++)          for (fi = min;; fi++)
3085            {            {
3086            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3087            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3088            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3089                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3090                    IS_NEWLINE(eptr)))
3091                RRETURN(MATCH_NOMATCH);
3092    
3093            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3094            switch(ctype)            switch(ctype)
3095              {              {
3096              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3097              break;              break;
3098    
3099              case OP_ANYBYTE:              case OP_ANYBYTE:
3100              break;              break;
3101    
3102                case OP_ANYNL:
3103                switch(c)
3104                  {
3105                  default: RRETURN(MATCH_NOMATCH);
3106                  case 0x000d:
3107                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3108                  break;
3109                  case 0x000a:
3110                  case 0x000b:
3111                  case 0x000c:
3112                  case 0x0085:
3113                  case 0x2028:
3114                  case 0x2029:
3115                  break;
3116                  }
3117                break;
3118    
3119              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3120              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3121                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2729  for (;;) Line 3157  for (;;)
3157          {          {
3158          for (fi = min;; fi++)          for (fi = min;; fi++)
3159            {            {
3160            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3161            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3162            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3163                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3164                RRETURN(MATCH_NOMATCH);
3165    
3166            c = *eptr++;            c = *eptr++;
3167            switch(ctype)            switch(ctype)
3168              {              {
3169              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3170              break;              break;
3171    
3172              case OP_ANYBYTE:              case OP_ANYBYTE:
3173              break;              break;
3174    
3175                case OP_ANYNL:
3176                switch(c)
3177                  {
3178                  default: RRETURN(MATCH_NOMATCH);
3179                  case 0x000d:
3180                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3181                  break;
3182                  case 0x000a:
3183                  case 0x000b:
3184                  case 0x000c:
3185                  case 0x0085:
3186                  break;
3187                  }
3188                break;
3189    
3190              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3191              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3192              break;              break;
# Line 2774  for (;;) Line 3219  for (;;)
3219        /* Control never gets here */        /* Control never gets here */
3220        }        }
3221    
3222      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3223      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3224      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3225    
# Line 2783  for (;;) Line 3228  for (;;)
3228        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
3229    
3230  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3231        if (prop_type > 0)        if (prop_type >= 0)
3232          {          {
3233          for (i = min; i < max; i++)          switch(prop_type)
3234            {            {
3235            int len = 1;            case PT_ANY:
3236            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
3237            GETCHARLEN(c, eptr, len);              {
3238            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
3239            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
3240              break;              GETCHARLEN(c, eptr, len);
3241            eptr+= len;              if (prop_fail_result) break;
3242                eptr+= len;
3243                }
3244              break;
3245    
3246              case PT_LAMP:
3247              for (i = min; i < max; i++)
3248                {
3249                int len = 1;
3250                if (eptr >= md->end_subject) break;
3251                GETCHARLEN(c, eptr, len);
3252                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3253                if ((prop_chartype == ucp_Lu ||
3254                     prop_chartype == ucp_Ll ||
3255                     prop_chartype == ucp_Lt) == prop_fail_result)
3256                  break;
3257                eptr+= len;
3258                }
3259              break;
3260    
3261              case PT_GC:
3262              for (i = min; i < max; i++)
3263                {
3264                int len = 1;
3265                if (eptr >= md->end_subject) break;
3266                GETCHARLEN(c, eptr, len);
3267                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3268                if ((prop_category == prop_value) == prop_fail_result)
3269                  break;
3270                eptr+= len;
3271                }
3272              break;
3273    
3274              case PT_PC:
3275              for (i = min; i < max; i++)
3276                {
3277                int len = 1;
3278                if (eptr >= md->end_subject) break;
3279                GETCHARLEN(c, eptr, len);
3280                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3281                if ((prop_chartype == prop_value) == prop_fail_result)
3282                  break;
3283                eptr+= len;
3284                }
3285              break;
3286    
3287              case PT_SC:
3288              for (i = min; i < max; i++)
3289                {
3290                int len = 1;
3291                if (eptr >= md->end_subject) break;
3292                GETCHARLEN(c, eptr, len);
3293                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3294                if ((prop_script == prop_value) == prop_fail_result)
3295                  break;
3296                eptr+= len;
3297                }
3298              break;
3299            }            }
3300    
3301          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3302    
3303            if (possessive) continue;
3304          for(;;)          for(;;)
3305            {            {
3306            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3307            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3308            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3309            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 2816  for (;;) Line 3319  for (;;)
3319            {            {
3320            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3321            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3322            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3323            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3324            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3325              {              {
# Line 2825  for (;;) Line 3328  for (;;)
3328                {                {
3329                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3330                }                }
3331              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3332              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3333              eptr += len;              eptr += len;
3334              }              }
# Line 2833  for (;;) Line 3336  for (;;)
3336    
3337          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3338    
3339            if (possessive) continue;
3340          for(;;)          for(;;)
3341            {            {
3342            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3343            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3344            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3345            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 2846  for (;;) Line 3350  for (;;)
3350                {                {
3351                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3352                }                }
3353              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3354              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3355              eptr--;              eptr--;
3356              }              }
# Line 2865  for (;;) Line 3369  for (;;)
3369            {            {
3370            case OP_ANY:            case OP_ANY:
3371    
3372            /* Special code is required for UTF8, but when the maximum is unlimited            /* Special code is required for UTF8, but when the maximum is
3373            we don't need it, so we repeat the non-UTF8 code. This is probably            unlimited we don't need it, so we repeat the non-UTF8 code. This is
3374            worth it, because .* is quite a common idiom. */            probably worth it, because .* is quite a common idiom. */
3375    
3376            if (max < INT_MAX)            if (max < INT_MAX)
3377              {              {
# Line 2875  for (;;) Line 3379  for (;;)
3379                {                {
3380                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3381                  {                  {
3382                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3383                  eptr++;                  eptr++;
3384                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3385                  }                  }
# Line 2884  for (;;) Line 3388  for (;;)
3388                {                {
3389                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3390                  {                  {
3391                    if (eptr >= md->end_subject) break;
3392                  eptr++;                  eptr++;
3393                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3394                  }                  }
# Line 2898  for (;;) Line 3403  for (;;)
3403                {                {
3404                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3405                  {                  {
3406                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3407                  eptr++;                  eptr++;
3408                  }                  }
3409                break;                break;
# Line 2906  for (;;) Line 3411  for (;;)
3411              else              else
3412                {                {
3413                c = max - min;                c = max - min;
3414                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3415                    c = md->end_subject - eptr;
3416                eptr += c;                eptr += c;
3417                }                }
3418              }              }
# Line 2916  for (;;) Line 3422  for (;;)
3422    
3423            case OP_ANYBYTE:            case OP_ANYBYTE:
3424            c = max - min;            c = max - min;
3425            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3426                c = md->end_subject - eptr;
3427            eptr += c;            eptr += c;
3428            break;            break;
3429    
3430              case OP_ANYNL:
3431              for (i = min; i < max; i++)
3432                {
3433                int len = 1;
3434                if (eptr >= md->end_subject) break;
3435                GETCHARLEN(c, eptr, len);
3436                if (c == 0x000d)
3437                  {
3438                  if (++eptr >= md->end_subject) break;
3439                  if (*eptr == 0x000a) eptr++;
3440                  }
3441                else
3442                  {
3443                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3444                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3445                    break;
3446                  eptr += len;
3447                  }
3448                }
3449              break;
3450    
3451            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3452            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3453              {              {
# Line 2992  for (;;) Line 3520  for (;;)
3520    
3521          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3522    
3523            if (possessive) continue;
3524          for(;;)          for(;;)
3525            {            {
3526            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3527            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3528            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3529            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3012  for (;;) Line 3541  for (;;)
3541              {              {
3542              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3543                {                {
3544                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3545                eptr++;                eptr++;
3546                }                }
3547              break;              break;
# Line 3021  for (;;) Line 3550  for (;;)
3550    
3551            case OP_ANYBYTE:            case OP_ANYBYTE:
3552            c = max - min;            c = max - min;
3553            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3554                c = md->end_subject - eptr;
3555            eptr += c;            eptr += c;
3556            break;            break;
3557    
3558              case OP_ANYNL:
3559              for (i = min; i < max; i++)
3560                {
3561                if (eptr >= md->end_subject) break;
3562                c = *eptr;
3563                if (c == 0x000d)
3564                  {
3565                  if (++eptr >= md->end_subject) break;
3566                  if (*eptr == 0x000a) eptr++;
3567                  }
3568                else
3569                  {
3570                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3571                    break;
3572                  eptr++;
3573                  }
3574                }
3575              break;
3576    
3577            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3578            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3579              {              {
# Line 3085  for (;;) Line 3634  for (;;)
3634    
3635          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3636    
3637            if (possessive) continue;
3638          while (eptr >= pp)          while (eptr >= pp)
3639            {            {
3640            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
3641            eptr--;            eptr--;
3642            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3643            }            }
# Line 3099  for (;;) Line 3649  for (;;)
3649        }        }
3650      /* Control never gets here */      /* Control never gets here */
3651    
3652      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3653      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3654    
3655      default:      default:
3656      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3657      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3658      }      }
3659    
3660    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3115  for (;;) Line 3663  for (;;)
3663    
3664    }             /* End of main loop */    }             /* End of main loop */
3665  /* Control never reaches here */  /* Control never reaches here */
3666    
3667    
3668    /* When compiling to use the heap rather than the stack for recursive calls to
3669    match(), the RRETURN() macro jumps here. The number that is saved in
3670    frame->Xwhere indicates which label we actually want to return to. */
3671    
3672    #ifdef NO_RECURSE
3673    #define LBL(val) case val: goto L_RM##val;
3674    HEAP_RETURN:
3675    switch (frame->Xwhere)
3676      {
3677      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
3678      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
3679      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
3680      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
3681      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
3682      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
3683      default:
3684      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
3685      return PCRE_ERROR_INTERNAL;
3686      }
3687    #undef LBL
3688    #endif  /* NO_RECURSE */
3689  }  }
3690    
3691    
# Line 3127  Undefine all the macros that were define Line 3698  Undefine all the macros that were define
3698  #ifdef NO_RECURSE  #ifdef NO_RECURSE
3699  #undef eptr  #undef eptr
3700  #undef ecode  #undef ecode
3701    #undef mstart
3702  #undef offset_top  #undef offset_top
3703  #undef ims  #undef ims
3704  #undef eptrb  #undef eptrb
# Line 3144  Undefine all the macros that were define Line 3716  Undefine all the macros that were define
3716    
3717  #undef cur_is_word  #undef cur_is_word
3718  #undef condition  #undef condition
 #undef minimize  
3719  #undef prev_is_word  #undef prev_is_word
3720    
3721  #undef original_ims  #undef original_ims
# Line 3200  Returns:          > 0 => success; value Line 3771  Returns:          > 0 => success; value
3771                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3772  */  */
3773    
3774  PCRE_EXPORT int  PCRE_EXP_DEFN int
3775  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3776    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3777    int offsetcount)    int offsetcount)
3778  {  {
3779  int rc, resetcount, ocount;  int rc, resetcount, ocount;
3780  int first_byte = -1;  int first_byte = -1;
3781  int req_byte = -1;  int req_byte = -1;
3782  int req_byte2 = -1;  int req_byte2 = -1;
3783  unsigned long int ims = 0;  int newline;
3784    unsigned long int ims;
3785  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
3786  BOOL anchored;  BOOL anchored;
3787  BOOL startline;  BOOL startline;
3788  BOOL firstline;  BOOL firstline;
3789  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3790  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3791    BOOL utf8;
3792  match_data match_block;  match_data match_block;
3793    match_data *md = &match_block;
3794  const uschar *tables;  const uschar *tables;
3795  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3796  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3797  const uschar *end_subject;  USPTR end_subject;
3798  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3799    eptrblock eptrchain[EPTR_WORK_SIZE];
3800    
3801  pcre_study_data internal_study;  pcre_study_data internal_study;
3802  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3241  if (offsetcount < 0) return PCRE_ERROR_B Line 3816  if (offsetcount < 0) return PCRE_ERROR_B
3816  the default values. */  the default values. */
3817    
3818  study = NULL;  study = NULL;
3819  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
3820  match_block.callout_data = NULL;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3821    md->callout_data = NULL;
3822    
3823  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
3824    
# Line 3254  if (extra_data != NULL) Line 3830  if (extra_data != NULL)
3830    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3831      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3832    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3833      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
3834      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3835        md->match_limit_recursion = extra_data->match_limit_recursion;
3836    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3837      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3838    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3839    }    }
3840    
# Line 3286  firstline = (re->options & PCRE_FIRSTLIN Line 3864  firstline = (re->options & PCRE_FIRSTLIN
3864    
3865  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
3866    
3867  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
3868    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3869    
3870  match_block.start_subject = (const uschar *)subject;  md->start_subject = (USPTR)subject;
3871  match_block.start_offset = start_offset;  md->start_offset = start_offset;
3872  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
3873  end_subject = match_block.end_subject;  end_subject = md->end_subject;
3874    
3875  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3876  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3877    
3878  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3879  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
3880  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
3881  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
3882  match_block.hitend = FALSE;  md->hitend = FALSE;
3883    
3884    md->recursive = NULL;                   /* No recursion at top level */
3885    md->eptrchain = eptrchain;              /* Make workspace generally available */
3886    
3887    md->lcc = tables + lcc_offset;
3888    md->ctypes = tables + ctypes_offset;
3889    
3890    /* Handle different types of newline. The three bits give eight cases. If
3891    nothing is set at run time, whatever was used at compile time applies. */
3892    
3893  match_block.recursive = NULL;                   /* No recursion at top level */  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3894           PCRE_NEWLINE_BITS)
3895      {
3896      case 0: newline = NEWLINE; break;   /* Compile-time default */
3897      case PCRE_NEWLINE_CR: newline = '\r'; break;
3898      case PCRE_NEWLINE_LF: newline = '\n'; break;
3899      case PCRE_NEWLINE_CR+
3900           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3901      case PCRE_NEWLINE_ANY: newline = -1; break;
3902      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3903      default: return PCRE_ERROR_BADNEWLINE;
3904      }
3905    
3906  match_block.lcc = tables + lcc_offset;  if (newline == -2)
3907  match_block.ctypes = tables + ctypes_offset;    {
3908      md->nltype = NLTYPE_ANYCRLF;
3909      }
3910    else if (newline < 0)
3911      {
3912      md->nltype = NLTYPE_ANY;
3913      }
3914    else
3915      {
3916      md->nltype = NLTYPE_FIXED;
3917      if (newline > 255)
3918        {
3919        md->nllen = 2;
3920        md->nl[0] = (newline >> 8) & 255;
3921        md->nl[1] = newline & 255;
3922        }
3923      else
3924        {
3925        md->nllen = 1;
3926        md->nl[0] = newline;
3927        }
3928      }
3929    
3930  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
3931  moment. */  moment. */
3932    
3933  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3934    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
3935    
3936  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3937  back the character offset. */  back the character offset. */
3938    
3939  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3940  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3941    {    {
3942    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3943      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3350  ocount = offsetcount - (offsetcount % 3) Line 3969  ocount = offsetcount - (offsetcount % 3)
3969  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
3970    {    {
3971    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
3972    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3973    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3974    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3975    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3976    }    }
3977  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
3978    
3979  match_block.offset_end = ocount;  md->offset_end = ocount;
3980  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
3981  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
3982  match_block.capture_last = -1;  md->capture_last = -1;
3983    
3984  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
3985  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3373  if (resetcount > offsetcount) resetcount Line 3992  if (resetcount > offsetcount) resetcount
3992  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
3993  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
3994    
3995  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
3996    {    {
3997    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
3998    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
3999    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
4000    }    }
# Line 3392  if (!anchored) Line 4011  if (!anchored)
4011      {      {
4012      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4013      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4014        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
4015      }      }
4016    else    else
4017      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3410  if ((re->options & PCRE_REQCHSET) != 0) Line 4029  if ((re->options & PCRE_REQCHSET) != 0)
4029    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4030    }    }
4031    
4032    
4033    /* ==========================================================================*/
4034    
4035  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4036  the loop runs just once. */  the loop runs just once. */
4037    
4038  do  for(;;)
4039    {    {
4040    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4041    
4042    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4043    
4044    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
4045      {      {
4046      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
4047      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4048      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4049      }      }
4050    
4051    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
4052    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
4053    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
4054    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
4055    */    the match fails at the newline, later code breaks this loop. */
4056    
4057    if (firstline)    if (firstline)
4058      {      {
4059      const uschar *t = start_match;      USPTR t = start_match;
4060      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4061      end_subject = t;      end_subject = t;
4062      }      }
4063    
# Line 3445  do Line 4067  do
4067      {      {
4068      if (first_byte_caseless)      if (first_byte_caseless)
4069        while (start_match < end_subject &&        while (start_match < end_subject &&
4070               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4071          start_match++;          start_match++;
4072      else      else
4073        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4074          start_match++;          start_match++;
4075      }      }
4076    
4077    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4078    
4079    else if (startline)    else if (startline)
4080      {      {
4081      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4082        {        {
4083        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4084            start_match++;
4085    
4086          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4087          and we are now at a LF, advance the match position by one more character.
4088          */
4089    
4090          if (start_match[-1] == '\r' &&
4091               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4092               start_match < end_subject &&
4093               *start_match == '\n')
4094          start_match++;          start_match++;
4095        }        }
4096      }      }
# Line 3480  do Line 4112  do
4112    
4113  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4114    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4115    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4116    printf("\n");    printf("\n");
4117  #endif  #endif
4118    
# Line 3494  do Line 4126  do
4126    
4127    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4128    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4129    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4130    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4131    
4132    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4133    */    */
4134    
4135    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4136        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4137        !match_block.partial)        !md->partial)
4138      {      {
4139      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4140    
4141      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
4142      place we found it at last time. */      place we found it at last time. */
# Line 3527  do Line 4159  do
4159            }            }
4160          }          }
4161    
4162        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4163          forcing a match failure. */
4164    
4165        if (p >= end_subject) break;        if (p >= end_subject)
4166            {
4167            rc = MATCH_NOMATCH;
4168            break;
4169            }
4170    
4171        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4172        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3539  do Line 4176  do
4176        }        }
4177      }      }
4178    
4179    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4180    
4181    if (rc == MATCH_NOMATCH)    md->start_match_ptr = start_match;      /* Insurance */
4182      {    md->match_call_count = 0;
4183      if (firstline && *start_match == NEWLINE) break;    md->eptrn = 0;                          /* Next free eptrchain slot */
4184      start_match++;    rc = match(start_match, md->start_code, start_match, 2, md,
4185        ims, NULL, 0, 0);
4186    
4187      /* Any return other than MATCH_NOMATCH breaks the loop. */
4188    
4189      if (rc != MATCH_NOMATCH) break;
4190    
4191      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4192      newline in the subject (though it may continue over the newline). Therefore,
4193      if we have just failed to match, starting at a newline, do not continue. */
4194    
4195      if (firstline && IS_NEWLINE(start_match)) break;
4196    
4197      /* Advance the match position by one character. */
4198    
4199      start_match++;
4200  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4201      if (match_block.utf8)    if (utf8)
4202        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4203          start_match++;        start_match++;
4204  #endif  #endif
     continue;  
     }  
4205    
4206    if (rc != MATCH_MATCH)    /* Break the loop if the pattern is anchored or if we have passed the end of
4207      {    the subject. */
4208      DPRINTF((">>>> error: returning %d\n", rc));  
4209      return rc;    if (anchored || start_match > end_subject) break;
4210      }  
4211      /* If we have just passed a CR and the newline option is CRLF or ANY or
4212      ANYCRLF, and we are now at a LF, advance the match position by one more
4213      character. */
4214    
4215      if (start_match[-1] == '\r' &&
4216           (md->nltype == NLTYPE_ANY ||
4217            md->nltype == NLTYPE_ANYCRLF ||
4218            md->nllen == 2) &&
4219           start_match < end_subject &&
4220           *start_match == '\n')
4221        start_match++;
4222    
4223      }   /* End of for(;;) "bumpalong" loop */
4224    
4225    /* ==========================================================================*/
4226    
4227    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4228    conditions is true:
4229    
4230    (1) The pattern is anchored;
4231    
4232    (2) We are past the end of the subject;
4233    
4234    /* We have a match! Copy the offset information from temporary store if  (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4235    necessary */      this option requests that a match occur at or before the first newline in
4236        the subject.
4237    
4238    When we have a match and the offset vector is big enough to deal with any
4239    backreferences, captured substring offsets will already be set up. In the case
4240    where we had to get some local store to hold offsets for backreference
4241    processing, copy those that we can. In this case there need not be overflow if
4242    certain parts of the pattern were not used, even though there are more
4243    capturing parentheses than vector slots. */
4244    
4245    if (rc == MATCH_MATCH)
4246      {
4247    if (using_temporary_offsets)    if (using_temporary_offsets)
4248      {      {
4249      if (offsetcount >= 4)      if (offsetcount >= 4)
4250        {        {
4251        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4252          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4253        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4254        }        }
4255      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4256      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4257      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4258      }      }
4259    
4260    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4261      too many to fit into the vector. */
4262    
4263      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4264    
4265      /* If there is space, set up the whole thing as substring 0. The value of
4266      md->start_match_ptr might be modified if \K was encountered on the success
4267      matching path. */
4268    
4269    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4270      {      {
4271      offsets[0] = start_match - match_block.start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4272      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4273      }      }
4274    
4275    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4276    return rc;    return rc;
4277    }    }
4278    
4279  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4280    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4281    
4282  if (using_temporary_offsets)  if (using_temporary_offsets)
4283    {    {
4284    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4285    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4286    }    }
4287    
4288  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4289      {
4290      DPRINTF((">>>> error: returning %d\n", rc));
4291      return rc;
4292      }
4293    else if (md->partial && md->hitend)
4294    {    {
4295    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4296    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.85  
changed lines
  Added in v.172

  ViewVC Help
Powered by ViewVC 1.1.5