/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 165 by ph10, Wed May 9 10:50:57 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* Undefine some potentially clashing cpp symbols */
52    
53  /* Structure for building a chain of data that actually lives on the  #undef min
54  stack, for holding the values of the subject pointer at the start of each  #undef max
55  subpattern, so as to detect when an empty string has been matched by a  
56  subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
57  are on the heap, not on the stack. */  obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
58    
59  typedef struct eptrblock {  #define EPTR_WORK_SIZE (1000)
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
60    
61  /* Flag bits for the match() function */  /* Flag bits for the match() function */
62    
63  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
64  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
65    #define match_tail_recursed  0x04  /* Tail recursive call */
66    
67  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
68  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns:     nothing Line 103  Returns:     nothing
103  static void  static void
104  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
105  {  {
106  int c;  unsigned int c;
107  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
108  while (length-- > 0)  while (length-- > 0)
109    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns:      TRUE if matched Line 130  Returns:      TRUE if matched
130  */  */
131    
132  static BOOL  static BOOL
133  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
134    unsigned long int ims)    unsigned long int ims)
135  {  {
136  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
137    
138  #ifdef DEBUG  #ifdef DEBUG
139  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 171  return TRUE;
171  ****************************************************************************  ****************************************************************************
172                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
173    
174  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
175  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
176  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
177  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
178  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
179    fine.
180  It turns out that on non-Unix systems there are problems with programs that  
181  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
182  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
183  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
184    been known for decades.) So....
185    
186  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
187  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
188  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
189  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
190  always used to.  always used to.
191    
192    The original heap-recursive code used longjmp(). However, it seems that this
193    can be very slow on some operating systems. Following a suggestion from Stan
194    Switzer, the use of longjmp() has been abolished, at the cost of having to
195    provide a unique number for each call to RMATCH. There is no way of generating
196    a sequence of numbers at compile time in C. I have given them names, to make
197    them stand out more clearly.
198    
199    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
200    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
201    tests. Furthermore, not using longjmp() means that local dynamic variables
202    don't have indeterminate values; this has meant that the frame size can be
203    reduced because the result can be "passed back" by straight setting of the
204    variable instead of being passed in the frame.
205  ****************************************************************************  ****************************************************************************
206  ***************************************************************************/  ***************************************************************************/
207    
208    
209  /* These versions of the macros use the stack, as normal */  /* Numbers for RMATCH calls */
210    
211    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
212           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
213           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
214           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
215           RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };
216    
217    
218    /* These versions of the macros use the stack, as normal. There are debugging
219    versions and production versions. Note that the "rw" argument of RMATCH isn't
220    actuall used in this definition. */
221    
222  #ifndef NO_RECURSE  #ifndef NO_RECURSE
223  #define REGISTER register  #define REGISTER register
224  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  
225    #ifdef DEBUG
226    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
227      { \
228      printf("match() called in line %d\n", __LINE__); \
229      rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
230      printf("to line %d\n", __LINE__); \
231      }
232    #define RRETURN(ra) \
233      { \
234      printf("match() returned %d from line %d ", ra, __LINE__); \
235      return ra; \
236      }
237    #else
238    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
239      rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
240  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
241    #endif
242    
243  #else  #else
244    
245    
246  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
247  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
248  match(), which never changes. */  argument of match(), which never changes. */
249    
250  #define REGISTER  #define REGISTER
251    
252  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
253    {\    {\
254    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
255    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
256      {\    newframe->Xeptr = ra;\
257      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
258      newframe->Xecode = rb;\    newframe->Xoffset_top = rc;\
259      newframe->Xoffset_top = rc;\    newframe->Xims = re;\
260      newframe->Xims = re;\    newframe->Xeptrb = rf;\
261      newframe->Xeptrb = rf;\    newframe->Xflags = rg;\
262      newframe->Xflags = rg;\    newframe->Xrdepth = frame->Xrdepth + 1;\
263      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
264      frame = newframe;\    frame = newframe;\
265      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
266      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
267      }\    L_##rw:\
268    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
269    }    }
270    
271  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 235  match(), which never changes. */ Line 275  match(), which never changes. */
275    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
276    if (frame != NULL)\    if (frame != NULL)\
277      {\      {\
278      frame->Xresult = ra;\      rrc = ra;\
279      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
280      }\      }\
281    return ra;\    return ra;\
282    }    }
# Line 256  typedef struct heapframe { Line 295  typedef struct heapframe {
295    long int Xims;    long int Xims;
296    eptrblock *Xeptrb;    eptrblock *Xeptrb;
297    int Xflags;    int Xflags;
298      unsigned int Xrdepth;
299    
300    /* Function local variables */    /* Function local variables */
301    
# Line 271  typedef struct heapframe { Line 311  typedef struct heapframe {
311    
312    BOOL Xcur_is_word;    BOOL Xcur_is_word;
313    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
314    BOOL Xprev_is_word;    BOOL Xprev_is_word;
315    
316    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
317    
318  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
319    int Xprop_type;    int Xprop_type;
320      int Xprop_value;
321    int Xprop_fail_result;    int Xprop_fail_result;
322    int Xprop_category;    int Xprop_category;
323    int Xprop_chartype;    int Xprop_chartype;
324    int Xprop_othercase;    int Xprop_script;
325    int Xprop_test_against;    int Xoclength;
326    int *Xprop_test_variable;    uschar Xocchars[8];
327  #endif  #endif
328    
329    int Xctype;    int Xctype;
330    int Xfc;    unsigned int Xfc;
331    int Xfi;    int Xfi;
332    int Xlength;    int Xlength;
333    int Xmax;    int Xmax;
# Line 301  typedef struct heapframe { Line 341  typedef struct heapframe {
341    
342    eptrblock Xnewptrb;    eptrblock Xnewptrb;
343    
344    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
345    
346    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
347    
348  } heapframe;  } heapframe;
349    
# Line 320  typedef struct heapframe { Line 359  typedef struct heapframe {
359  *         Match from current position            *  *         Match from current position            *
360  *************************************************/  *************************************************/
361    
362  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
363  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
364  same response.  same response.
365    
# Line 333  performance. Tests using gcc on a SPARC Line 369  performance. Tests using gcc on a SPARC
369  made performance worse.  made performance worse.
370    
371  Arguments:  Arguments:
372     eptr        pointer in subject     eptr        pointer to current character in subject
373     ecode       position in code     ecode       pointer to current position in compiled code
374     offset_top  current top pointer     offset_top  current top pointer
375     md          pointer to "static" info for the match     md          pointer to "static" info for the match
376     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 378  Arguments:
378                   brackets - for testing for empty matches                   brackets - for testing for empty matches
379     flags       can contain     flags       can contain
380                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
381                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
382                       group that can match an empty string
383                     match_tail_recursed - this is a tail_recursed group
384       rdepth      the recursion depth
385    
386  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
387                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
388                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
389                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
390  */  */
391    
392  static int  static int
393  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
394    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
395    int flags)    int flags, unsigned int rdepth)
396  {  {
397  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
398  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
399  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
400    
401    register int  rrc;         /* Returns from recursive calls */
402    register int  i;           /* Used for loops not involving calls to RMATCH() */
403    register unsigned int c;   /* Character values not kept over RMATCH() calls */
404    register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
405    
406  register int  rrc;    /* Returns from recursive calls */  BOOL minimize, possessive; /* Quantifier options */
 register int  i;      /* Used for loops not involving calls to RMATCH() */  
 register int  c;      /* Character values not kept over RMATCH() calls */  
 register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  
407    
408  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
409  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 381  frame->Xoffset_top = offset_top; Line 422  frame->Xoffset_top = offset_top;
422  frame->Xims = ims;  frame->Xims = ims;
423  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
424  frame->Xflags = flags;  frame->Xflags = flags;
425    frame->Xrdepth = rdepth;
426    
427  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
428    
# Line 394  HEAP_RECURSE: Line 436  HEAP_RECURSE:
436  #define ims                frame->Xims  #define ims                frame->Xims
437  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
438  #define flags              frame->Xflags  #define flags              frame->Xflags
439    #define rdepth             frame->Xrdepth
440    
441  /* Ditto for the local variables */  /* Ditto for the local variables */
442    
# Line 411  HEAP_RECURSE: Line 454  HEAP_RECURSE:
454    
455  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
456  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
457  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
458    
459  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
460    
461  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
462  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
463    #define prop_value         frame->Xprop_value
464  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
465  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
466  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
467  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
468  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
469  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
470  #endif  #endif
471    
472  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 490  HEAP_RECURSE:
490  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
491  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
492    
493  #else  #else         /* NO_RECURSE not defined */
494  #define fi i  #define fi i
495  #define fc c  #define fc c
496    
497    
498  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
499  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
500  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
501  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
502  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
503  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
504  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
505  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
506  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
507                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
508  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
509                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
510  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
511  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
512  BOOL prev_is_word;  BOOL prev_is_word;
513    
514  unsigned long int original_ims;  unsigned long int original_ims;
515    
516  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
517  int prop_type;  int prop_type;
518    int prop_value;
519  int prop_fail_result;  int prop_fail_result;
520  int prop_category;  int prop_category;
521  int prop_chartype;  int prop_chartype;
522  int prop_othercase;  int prop_script;
523  int prop_test_against;  int oclength;
524  int *prop_test_variable;  uschar occhars[8];
525  #endif  #endif
526    
527  int ctype;  int ctype;
# Line 493  int save_offset1, save_offset2, save_off Line 536  int save_offset1, save_offset2, save_off
536  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
537    
538  eptrblock newptrb;  eptrblock newptrb;
539  #endif  #endif     /* NO_RECURSE */
540    
541  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
542  variables. */  variables. */
543    
544  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
545    prop_value = 0;
546  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
547  #endif  #endif
548    
549  /* OK, now we can get on with the real code of the function. Recursion is  
550  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
551  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
552  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
553  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
554  performance when true recursion is being used. */  
555    TAIL_RECURSE:
556    
557    /* OK, now we can get on with the real code of the function. Recursive calls
558    are specified by the macro RMATCH and RRETURN is used to return. When
559    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
560    and a "return", respectively (possibly with some debugging if DEBUG is
561    defined). However, RMATCH isn't like a function call because it's quite a
562    complicated macro. It has to be used in one particular way. This shouldn't,
563    however, impact performance when true recursion is being used. */
564    
565    #ifdef SUPPORT_UTF8
566    utf8 = md->utf8;       /* Local copy of the flag */
567    #else
568    utf8 = FALSE;
569    #endif
570    
571    /* First check that we haven't called match() too many times, or that we
572    haven't exceeded the recursive call limit. */
573    
574  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
575    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
576    
577  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
 utf8 = md->utf8;       /* Local copy of the flag */  
578    
579  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
580  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
581  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
582  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
583    When match() is called in other circumstances, don't add to the chain. If this
584    is a tail recursion, use a block from the workspace, as the one on the stack is
585    already used. */
586    
587  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
588    {    {
589    newptrb.epb_prev = eptrb;    eptrblock *p;
590    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
591    eptrb = &newptrb;      {
592        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
593        p = md->eptrchain + md->eptrn++;
594        }
595      else p = &newptrb;
596      p->epb_saved_eptr = eptr;
597      p->epb_prev = eptrb;
598      eptrb = p;
599    }    }
600    
601  /* Now start processing the operations. */  /* Now start processing the opcodes. */
602    
603  for (;;)  for (;;)
604    {    {
605      minimize = possessive = FALSE;
606    op = *ecode;    op = *ecode;
   minimize = FALSE;  
607    
608    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
609    matching at least one subject character. */    matching at least one subject character. */
# Line 543  for (;;) Line 613  for (;;)
613        eptr > md->start_match)        eptr > md->start_match)
614      md->hitend = TRUE;      md->hitend = TRUE;
615    
616    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
617      {      {
618      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
619        the current subject position in the working slot at the top of the vector.
620      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
621      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
622        reference inside the group.
623      if (number > EXTRACT_BASIC_MAX)  
624        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
625        values of the final offsets, in case they were set by a previous iteration
626        of the same bracket.
627    
628        If there isn't enough space in the offset vector, treat this as if it were
629        a non-capturing bracket. Don't worry about setting the flag for the error
630        case here; that is handled in the code for KET. */
631    
632        case OP_CBRA:
633        case OP_SCBRA:
634        number = GET2(ecode, 1+LINK_SIZE);
635      offset = number << 1;      offset = number << 1;
636    
637  #ifdef DEBUG  #ifdef DEBUG
638      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
639        printf("subject=");
640      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
641      printf("\n");      printf("\n");
642  #endif  #endif
# Line 584  for (;;) Line 651  for (;;)
651        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
652        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
653    
654          flags = (op == OP_SCBRA)? match_cbegroup : 0;
655        do        do
656          {          {
657          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
658            match_isgroup);            ims, eptrb, flags, RM1);
659          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
660          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
661          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 603  for (;;) Line 671  for (;;)
671        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
672        }        }
673    
674      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
675        bracket. */
676    
677      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
678    
679    /* Other types of node can be handled by a switch */      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
680        final alternative within the brackets, we would return the result of a
681        recursive call to match() whatever happened. We can reduce stack usage by
682        turning this into a tail recursion. */
683    
684    switch(op)      case OP_BRA:
685      {      case OP_SBRA:
686      case OP_BRA:     /* Non-capturing bracket: optimized */      DPRINTF(("start non-capturing bracket\n"));
687      DPRINTF(("start bracket 0\n"));      flags = (op >= OP_SBRA)? match_cbegroup : 0;
688      do      for (;;)
689        {        {
690        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)
691          match_isgroup);          {
692            ecode += _pcre_OP_lengths[*ecode];
693            flags |= match_tail_recursed;
694            DPRINTF(("bracket 0 tail recursion\n"));
695            goto TAIL_RECURSE;
696            }
697    
698          /* For non-final alternatives, continue the loop for a NOMATCH result;
699          otherwise return. */
700    
701          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
702            eptrb, flags, RM2);
703        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
704        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
705        }        }
706      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
707    
708      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
709      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
710      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
711      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
712        obeyed, we can use tail recursion to avoid using another stack frame. */
713    
714      case OP_COND:      case OP_COND:
715      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
716        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
717          {
718          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
719          condition = md->recursive != NULL &&
720            (offset == RREF_ANY || offset == md->recursive->group_num);
721          ecode += condition? 3 : GET(ecode, 1);
722          }
723    
724        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
725        {        {
726        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
727        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
728          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
729          (offset < offset_top && md->offset_vector[offset] >= 0);        }
730        RMATCH(rrc, eptr, ecode + (condition?  
731          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
732          offset_top, md, ims, eptrb, match_isgroup);        {
733        RRETURN(rrc);        condition = FALSE;
734          ecode += GET(ecode, 1);
735        }        }
736    
737      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
738      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
739        assertion. */
740    
741      else      else
742        {        {
743        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
744            match_condassert | match_isgroup);            match_condassert, RM3);
745        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
746          {          {
747          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
748            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
749          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
750          }          }
751        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
752          {          {
753          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
754          }          }
755        else ecode += GET(ecode, 1);        else
756        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
757          match_isgroup);          condition = FALSE;
758        RRETURN(rrc);          ecode += GET(ecode, 1);
759            }
760        }        }
     /* Control never reaches here */  
761    
762      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
763      encountered. */      we can use tail recursion to avoid using another stack frame. If the second
764        alternative doesn't exist, we can just plough on. */
765    
766      case OP_CREF:      if (condition || *ecode == OP_ALT)
767      case OP_BRANUMBER:        {
768      ecode += 3;        ecode += 1 + LINK_SIZE;
769          flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
770          goto TAIL_RECURSE;
771          }
772        else
773          {
774          ecode += 1 + LINK_SIZE;
775          }
776      break;      break;
777    
778      /* End of the pattern. If we are in a recursion, we should restore the  
779      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
780        restore the offsets appropriately and continue from after the call. */
781    
782      case OP_END:      case OP_END:
783      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
784        {        {
785        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
786        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
787        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
788        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
789          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
# Line 717  for (;;) Line 819  for (;;)
819      case OP_ASSERTBACK:      case OP_ASSERTBACK:
820      do      do
821        {        {
822        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
823          match_isgroup);          RM4);
824        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
825        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
826        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 744  for (;;) Line 846  for (;;)
846      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
847      do      do
848        {        {
849        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
850          match_isgroup);          RM5);
851        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
852        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
853        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 766  for (;;) Line 868  for (;;)
868  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
869      if (utf8)      if (utf8)
870        {        {
871        c = GET(ecode,1);        i = GET(ecode, 1);
872        for (i = 0; i < c; i++)        while (i-- > 0)
873          {          {
874          eptr--;          eptr--;
875          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 780  for (;;) Line 882  for (;;)
882      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
883    
884        {        {
885        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
886        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
887        }        }
888    
# Line 800  for (;;) Line 902  for (;;)
902        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
903        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
904        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
905        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
906        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
907        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = md->start_match - md->start_subject;
908        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
# Line 837  for (;;) Line 939  for (;;)
939      case OP_RECURSE:      case OP_RECURSE:
940        {        {
941        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
942        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
943            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
944    
945        /* Add to "recursing stack" */        /* Add to "recursing stack" */
946    
# Line 876  for (;;) Line 973  for (;;)
973        restore the offset and recursion data. */        restore the offset and recursion data. */
974    
975        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
976          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
977        do        do
978          {          {
979          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
980              eptrb, match_isgroup);            md, ims, eptrb, flags, RM6);
981          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
982            {            {
983              DPRINTF(("Recursion matched\n"));
984            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
985            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
986              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
987            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
988            }            }
989          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH)
990              {
991              DPRINTF(("Recursion gave error %d\n", rrc));
992              RRETURN(rrc);
993              }
994    
995          md->recursive = &new_recursive;          md->recursive = &new_recursive;
996          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 1015  for (;;)
1015      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
1016    
1017      case OP_ONCE:      case OP_ONCE:
1018        {      prev = ecode;
1019        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
1020    
1021        do      do
1022          {        {
1023          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1024            eptrb, match_isgroup);          eptrb, 0, RM7);
1025          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1026          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1027          ecode += GET(ecode,1);        ecode += GET(ecode,1);
1028          }        }
1029        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1030    
1031        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1032    
1033        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1034    
1035        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1036        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1037    
1038        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1039    
1040        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1041        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1042    
1043        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1044        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1045        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1046        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1047        course of events. */      course of events. */
1048    
1049        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1050          {        {
1051          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1052          break;        break;
1053          }        }
1054    
1055        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1056        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1057        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1058        opcode. */      any options that changed within the bracket before re-running it, so
1059        check the next opcode. */
1060    
1061        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1062          {        {
1063          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1064          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1065          }        }
1066    
1067        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1068          {        {
1069          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1070          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          RM8);
1071          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1072          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode = prev;
1073          }        flags = match_tail_recursed;
1074        else  /* OP_KETRMAX */        goto TAIL_RECURSE;
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
1075        }        }
1076      RRETURN(MATCH_NOMATCH);      else  /* OP_KETRMAX */
1077          {
1078          RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1079          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1080          ecode += 1 + LINK_SIZE;
1081          flags = match_tail_recursed;
1082          goto TAIL_RECURSE;
1083          }
1084        /* Control never gets here */
1085    
1086      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1087      bracketed group and go to there. */      bracketed group and go to there. */
# Line 994  for (;;) Line 1099  for (;;)
1099      case OP_BRAZERO:      case OP_BRAZERO:
1100        {        {
1101        next = ecode+1;        next = ecode+1;
1102        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1103        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1104        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1105        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1106        }        }
1107      break;      break;
1108    
1109      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1110        {        {
1111        next = ecode+1;        next = ecode+1;
1112        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1113        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
         match_isgroup);  
1114        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1115        ecode++;        ecode++;
1116        }        }
1117      break;      break;
1118    
1119      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1120    
1121      case OP_KET:      case OP_KET:
1122      case OP_KETRMIN:      case OP_KETRMIN:
1123      case OP_KETRMAX:      case OP_KETRMAX:
1124        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
   
       /* Back up the stack of bracket start pointers. */  
1125    
1126        eptrb = eptrb->epb_prev;      /* If this was a group that remembered the subject start, in order to break
1127        infinite repeats of empty string matches, retrieve the subject start from
1128        the chain. Otherwise, set it NULL. */
1129    
1130        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev >= OP_SBRA)
1131            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        {
1132            *prev == OP_ONCE)        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1133          {        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1134          md->end_match_ptr = eptr;      /* For ONCE */        }
1135          md->end_offset_top = offset_top;      else saved_eptr = NULL;
         RRETURN(MATCH_MATCH);  
         }  
1136    
1137        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1138        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1139        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1140    
1141        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1142          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1143          number = *prev - OP_BRA;          *prev == OP_ONCE)
1144          {
1145          md->end_match_ptr = eptr;      /* For ONCE */
1146          md->end_offset_top = offset_top;
1147          RRETURN(MATCH_MATCH);
1148          }
1149    
1150          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1151          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1152        bumping the high water mark. Note that whole-pattern recursion is coded as
1153        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1154        when the OP_END is reached. Other recursion is handled here. */
1155    
1156          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1157          offset = number << 1;        {
1158          number = GET2(prev, 1+LINK_SIZE);
1159          offset = number << 1;
1160    
1161  #ifdef DEBUG  #ifdef DEBUG
1162          printf("end bracket %d", number);        printf("end bracket %d", number);
1163          printf("\n");        printf("\n");
1164  #endif  #endif
1165    
1166          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1167          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1168          into group 0, so it won't be picked up here. Instead, we catch it when          {
1169          the OP_END is reached. */          md->offset_vector[offset] =
1170              md->offset_vector[md->offset_end - number];
1171          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1172            {          if (offset_top <= offset) offset_top = offset + 2;
1173            md->capture_last = number;          }
1174            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1175              {        /* Handle a recursively called group. Restore the offsets
1176              md->offset_vector[offset] =        appropriately and continue from after the call. */
1177                md->offset_vector[md->offset_end - number];  
1178              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1179              if (offset_top <= offset) offset_top = offset + 2;          {
1180              }          recursion_info *rec = md->recursive;
1181            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1182            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1183            appropriately and continue from after the call. */          md->start_match = rec->save_start;
1184            memcpy(md->offset_vector, rec->offset_save,
1185            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1186              {          ecode = rec->after_call;
1187              recursion_info *rec = md->recursive;          ims = original_ims;
1188              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1189          }          }
1190          }
1191    
1192        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1193        the group. */      flags, in case they got changed during the group. */
1194    
1195        ims = original_ims;      ims = original_ims;
1196        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1197    
1198        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1199        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1200        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1201        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1202        course of events. */      course of events. */
1203    
1204        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1205          {        {
1206          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1207          break;        break;
1208          }        }
1209    
1210        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1211        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1212        tail recursion to avoid using another stack frame. */
1213    
1214        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       }  
1215    
1216      RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KETRMIN)
1217          {
1218          RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1219            RM12);
1220          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1221          ecode = prev;
1222          flags |= match_tail_recursed;
1223          goto TAIL_RECURSE;
1224          }
1225        else  /* OP_KETRMAX */
1226          {
1227          RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1228          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1229          ecode += 1 + LINK_SIZE;
1230          flags = match_tail_recursed;
1231          goto TAIL_RECURSE;
1232          }
1233        /* Control never gets here */
1234    
1235      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1236    
# Line 1135  for (;;) Line 1238  for (;;)
1238      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1239      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1240        {        {
1241        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1242              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1243          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1244        ecode++;        ecode++;
1245        break;        break;
# Line 1163  for (;;) Line 1267  for (;;)
1267      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1268        {        {
1269        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1270          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1271        else        else
1272          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1273        ecode++;        ecode++;
# Line 1174  for (;;) Line 1278  for (;;)
1278        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1279        if (!md->endonly)        if (!md->endonly)
1280          {          {
1281          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1282             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1283            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1284          ecode++;          ecode++;
1285          break;          break;
1286          }          }
1287        }        }
1288      /* ... else fall through */      /* ... else fall through for endonly */
1289    
1290      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1291    
# Line 1193  for (;;) Line 1297  for (;;)
1297      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1298    
1299      case OP_EODN:      case OP_EODN:
1300      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1301         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1302          RRETURN(MATCH_NOMATCH);
1303      ecode++;      ecode++;
1304      break;      break;
1305    
# Line 1247  for (;;) Line 1352  for (;;)
1352      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1353    
1354      case OP_ANY:      case OP_ANY:
1355      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1356        RRETURN(MATCH_NOMATCH);        {
1357          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1358          }
1359      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1360      if (utf8)      if (utf8)
1361        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1362      ecode++;      ecode++;
1363      break;      break;
1364    
# Line 1343  for (;;) Line 1448  for (;;)
1448      ecode++;      ecode++;
1449      break;      break;
1450    
1451        case OP_ANYNL:
1452        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1453        GETCHARINCTEST(c, eptr);
1454        switch(c)
1455          {
1456          default: RRETURN(MATCH_NOMATCH);
1457          case 0x000d:
1458          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1459          break;
1460          case 0x000a:
1461          case 0x000b:
1462          case 0x000c:
1463          case 0x0085:
1464          case 0x2028:
1465          case 0x2029:
1466          break;
1467          }
1468        ecode++;
1469        break;
1470    
1471  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1472      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1473      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1352  for (;;) Line 1477  for (;;)
1477      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1478      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1479        {        {
1480        int chartype, rqdtype;        int chartype, script;
1481        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
1482    
1483        if (rqdtype >= 128)        switch(ecode[1])
1484          {          {
1485          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1486            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1487            break;
1488    
1489            case PT_LAMP:
1490            if ((chartype == ucp_Lu ||
1491                 chartype == ucp_Ll ||
1492                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1493            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1494          }           break;
1495        else  
1496          {          case PT_GC:
1497          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1498              RRETURN(MATCH_NOMATCH);
1499            break;
1500    
1501            case PT_PC:
1502            if ((ecode[2] != chartype) == (op == OP_PROP))
1503              RRETURN(MATCH_NOMATCH);
1504            break;
1505    
1506            case PT_SC:
1507            if ((ecode[2] != script) == (op == OP_PROP))
1508            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1509            break;
1510    
1511            default:
1512            RRETURN(PCRE_ERROR_INTERNAL);
1513          }          }
1514    
1515          ecode += 3;
1516        }        }
1517      break;      break;
1518    
# Line 1379  for (;;) Line 1523  for (;;)
1523      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1524      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1525        {        {
1526        int chartype;        int chartype, script;
1527        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = ucp_findchar(c, &chartype, &othercase);  
1528        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1529        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1530          {          {
# Line 1390  for (;;) Line 1533  for (;;)
1533            {            {
1534            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1535            }            }
1536          category = ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1537          if (category != ucp_M) break;          if (category != ucp_M) break;
1538          eptr += len;          eptr += len;
1539          }          }
# Line 1480  for (;;) Line 1623  for (;;)
1623          {          {
1624          for (fi = min;; fi++)          for (fi = min;; fi++)
1625            {            {
1626            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1627            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1628            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1629              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1501  for (;;) Line 1644  for (;;)
1644            }            }
1645          while (eptr >= pp)          while (eptr >= pp)
1646            {            {
1647            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1648            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1649            eptr -= length;            eptr -= length;
1650            }            }
# Line 1606  for (;;) Line 1749  for (;;)
1749            {            {
1750            for (fi = min;; fi++)            for (fi = min;; fi++)
1751              {              {
1752              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1753              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1754              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1755              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1626  for (;;) Line 1769  for (;;)
1769            {            {
1770            for (fi = min;; fi++)            for (fi = min;; fi++)
1771              {              {
1772              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1773              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1774              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1775              c = *eptr++;              c = *eptr++;
# Line 1663  for (;;) Line 1806  for (;;)
1806              }              }
1807            for (;;)            for (;;)
1808              {              {
1809              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1810              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1811              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1812              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1682  for (;;) Line 1825  for (;;)
1825              }              }
1826            while (eptr >= pp)            while (eptr >= pp)
1827              {              {
1828              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
             eptr--;  
1829              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1830                eptr--;
1831              }              }
1832            }            }
1833    
# Line 1753  for (;;) Line 1896  for (;;)
1896          {          {
1897          for (fi = min;; fi++)          for (fi = min;; fi++)
1898            {            {
1899            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
1900            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1901            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1902            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1777  for (;;) Line 1920  for (;;)
1920            }            }
1921          for(;;)          for(;;)
1922            {            {
1923            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
1924            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1925            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
1926            BACKCHAR(eptr)            BACKCHAR(eptr)
# Line 1836  for (;;) Line 1979  for (;;)
1979    
1980        else        else
1981          {          {
1982          int dc;          unsigned int dc;
1983          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1984          ecode += length;          ecode += length;
1985    
1986          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
1987          case of the character, if there is one. The result of ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
1988    
1989          if (fc != dc)          if (fc != dc)
1990            {            {
1991  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1992            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
1993  #endif  #endif
1994              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1995            }            }
# Line 1867  for (;;) Line 2006  for (;;)
2006        }        }
2007      break;      break;
2008    
2009      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
2010    
2011      case OP_EXACT:      case OP_EXACT:
2012      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2013      ecode += 3;      ecode += 3;
2014      goto REPEATCHAR;      goto REPEATCHAR;
2015    
2016        case OP_POSUPTO:
2017        possessive = TRUE;
2018        /* Fall through */
2019    
2020      case OP_UPTO:      case OP_UPTO:
2021      case OP_MINUPTO:      case OP_MINUPTO:
2022      min = 0;      min = 0;
# Line 1882  for (;;) Line 2025  for (;;)
2025      ecode += 3;      ecode += 3;
2026      goto REPEATCHAR;      goto REPEATCHAR;
2027    
2028        case OP_POSSTAR:
2029        possessive = TRUE;
2030        min = 0;
2031        max = INT_MAX;
2032        ecode++;
2033        goto REPEATCHAR;
2034    
2035        case OP_POSPLUS:
2036        possessive = TRUE;
2037        min = 1;
2038        max = INT_MAX;
2039        ecode++;
2040        goto REPEATCHAR;
2041    
2042        case OP_POSQUERY:
2043        possessive = TRUE;
2044        min = 0;
2045        max = 1;
2046        ecode++;
2047        goto REPEATCHAR;
2048    
2049      case OP_STAR:      case OP_STAR:
2050      case OP_MINSTAR:      case OP_MINSTAR:
2051      case OP_PLUS:      case OP_PLUS:
# Line 1913  for (;;) Line 2077  for (;;)
2077    
2078        if (length > 1)        if (length > 1)
2079          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2080  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2081          int othercase;          unsigned int othercase;
         int chartype;  
2082          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2083               ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase > 0)  
2084            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2085            else oclength = 0;
2086  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2087    
2088          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2089            {            {
2090            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2091    #ifdef SUPPORT_UCP
2092            /* Need braces because of following else */            /* Need braces because of following else */
2093            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2094            else            else
# Line 1935  for (;;) Line 2096  for (;;)
2096              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2097              eptr += oclength;              eptr += oclength;
2098              }              }
2099    #else   /* without SUPPORT_UCP */
2100              else { RRETURN(MATCH_NOMATCH); }
2101    #endif  /* SUPPORT_UCP */
2102            }            }
2103    
2104          if (min == max) continue;          if (min == max) continue;
# Line 1943  for (;;) Line 2107  for (;;)
2107            {            {
2108            for (fi = min;; fi++)            for (fi = min;; fi++)
2109              {              {
2110              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2111              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2112              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2113              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2114    #ifdef SUPPORT_UCP
2115              /* Need braces because of following else */              /* Need braces because of following else */
2116              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2117              else              else
# Line 1954  for (;;) Line 2119  for (;;)
2119                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2120                eptr += oclength;                eptr += oclength;
2121                }                }
2122    #else   /* without SUPPORT_UCP */
2123                else { RRETURN (MATCH_NOMATCH); }
2124    #endif  /* SUPPORT_UCP */
2125              }              }
2126            /* Control never gets here */            /* Control never gets here */
2127            }            }
2128          else  
2129            else  /* Maximize */
2130            {            {
2131            pp = eptr;            pp = eptr;
2132            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2133              {              {
2134              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2135              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2136    #ifdef SUPPORT_UCP
2137              else if (oclength == 0) break;              else if (oclength == 0) break;
2138              else              else
2139                {                {
2140                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2141                eptr += oclength;                eptr += oclength;
2142                }                }
2143    #else   /* without SUPPORT_UCP */
2144                else break;
2145    #endif  /* SUPPORT_UCP */
2146              }              }
2147            while (eptr >= pp)  
2148              if (possessive) continue;
2149              for(;;)
2150             {             {
2151             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2152             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2153               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2154    #ifdef SUPPORT_UCP
2155               eptr--;
2156               BACKCHAR(eptr);
2157    #else   /* without SUPPORT_UCP */
2158             eptr -= length;             eptr -= length;
2159    #endif  /* SUPPORT_UCP */
2160             }             }
           RRETURN(MATCH_NOMATCH);  
2161            }            }
2162          /* Control never gets here */          /* Control never gets here */
2163          }          }
# Line 2017  for (;;) Line 2197  for (;;)
2197          {          {
2198          for (fi = min;; fi++)          for (fi = min;; fi++)
2199            {            {
2200            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2201            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2202            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2203                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2025  for (;;) Line 2205  for (;;)
2205            }            }
2206          /* Control never gets here */          /* Control never gets here */
2207          }          }
2208        else        else  /* Maximize */
2209          {          {
2210          pp = eptr;          pp = eptr;
2211          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2213  for (;;)
2213            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2214            eptr++;            eptr++;
2215            }            }
2216            if (possessive) continue;
2217          while (eptr >= pp)          while (eptr >= pp)
2218            {            {
2219            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2220            eptr--;            eptr--;
2221            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2222            }            }
# Line 2054  for (;;) Line 2235  for (;;)
2235          {          {
2236          for (fi = min;; fi++)          for (fi = min;; fi++)
2237            {            {
2238            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2239            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2240            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2241              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2242            }            }
2243          /* Control never gets here */          /* Control never gets here */
2244          }          }
2245        else        else  /* Maximize */
2246          {          {
2247          pp = eptr;          pp = eptr;
2248          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2250  for (;;)
2250            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2251            eptr++;            eptr++;
2252            }            }
2253            if (possessive) continue;
2254          while (eptr >= pp)          while (eptr >= pp)
2255            {            {
2256            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2257            eptr--;            eptr--;
2258            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2259            }            }
# Line 2121  for (;;) Line 2303  for (;;)
2303      ecode += 3;      ecode += 3;
2304      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2305    
2306        case OP_NOTPOSSTAR:
2307        possessive = TRUE;
2308        min = 0;
2309        max = INT_MAX;
2310        ecode++;
2311        goto REPEATNOTCHAR;
2312    
2313        case OP_NOTPOSPLUS:
2314        possessive = TRUE;
2315        min = 1;
2316        max = INT_MAX;
2317        ecode++;
2318        goto REPEATNOTCHAR;
2319    
2320        case OP_NOTPOSQUERY:
2321        possessive = TRUE;
2322        min = 0;
2323        max = 1;
2324        ecode++;
2325        goto REPEATNOTCHAR;
2326    
2327        case OP_NOTPOSUPTO:
2328        possessive = TRUE;
2329        min = 0;
2330        max = GET2(ecode, 1);
2331        ecode += 3;
2332        goto REPEATNOTCHAR;
2333    
2334      case OP_NOTSTAR:      case OP_NOTSTAR:
2335      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2336      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2160  for (;;) Line 2370  for (;;)
2370        /* UTF-8 mode */        /* UTF-8 mode */
2371        if (utf8)        if (utf8)
2372          {          {
2373          register int d;          register unsigned int d;
2374          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2375            {            {
2376            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2185  for (;;) Line 2395  for (;;)
2395          /* UTF-8 mode */          /* UTF-8 mode */
2396          if (utf8)          if (utf8)
2397            {            {
2398            register int d;            register unsigned int d;
2399            for (fi = min;; fi++)            for (fi = min;; fi++)
2400              {              {
2401              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2402              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2403              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2404              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2202  for (;;) Line 2412  for (;;)
2412            {            {
2413            for (fi = min;; fi++)            for (fi = min;; fi++)
2414              {              {
2415              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2416              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2417              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2418                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2221  for (;;) Line 2431  for (;;)
2431          /* UTF-8 mode */          /* UTF-8 mode */
2432          if (utf8)          if (utf8)
2433            {            {
2434            register int d;            register unsigned int d;
2435            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2436              {              {
2437              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2441  for (;;)
2441              if (fc == d) break;              if (fc == d) break;
2442              eptr += len;              eptr += len;
2443              }              }
2444            for(;;)          if (possessive) continue;
2445            for(;;)
2446              {              {
2447              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2448              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2449              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2450              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2248  for (;;) Line 2459  for (;;)
2459              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2460              eptr++;              eptr++;
2461              }              }
2462              if (possessive) continue;
2463            while (eptr >= pp)            while (eptr >= pp)
2464              {              {
2465              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2466              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2467              eptr--;              eptr--;
2468              }              }
# Line 2269  for (;;) Line 2481  for (;;)
2481        /* UTF-8 mode */        /* UTF-8 mode */
2482        if (utf8)        if (utf8)
2483          {          {
2484          register int d;          register unsigned int d;
2485          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2486            {            {
2487            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2292  for (;;) Line 2504  for (;;)
2504          /* UTF-8 mode */          /* UTF-8 mode */
2505          if (utf8)          if (utf8)
2506            {            {
2507            register int d;            register unsigned int d;
2508            for (fi = min;; fi++)            for (fi = min;; fi++)
2509              {              {
2510              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2511              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2512              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2513              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2308  for (;;) Line 2520  for (;;)
2520            {            {
2521            for (fi = min;; fi++)            for (fi = min;; fi++)
2522              {              {
2523              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2524              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2525              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2526                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2327  for (;;) Line 2539  for (;;)
2539          /* UTF-8 mode */          /* UTF-8 mode */
2540          if (utf8)          if (utf8)
2541            {            {
2542            register int d;            register unsigned int d;
2543            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2544              {              {
2545              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2548  for (;;)
2548              if (fc == d) break;              if (fc == d) break;
2549              eptr += len;              eptr += len;
2550              }              }
2551              if (possessive) continue;
2552            for(;;)            for(;;)
2553              {              {
2554              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2555              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2556              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2557              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2353  for (;;) Line 2566  for (;;)
2566              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2567              eptr++;              eptr++;
2568              }              }
2569              if (possessive) continue;
2570            while (eptr >= pp)            while (eptr >= pp)
2571              {              {
2572              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2573              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574              eptr--;              eptr--;
2575              }              }
# Line 2384  for (;;) Line 2598  for (;;)
2598      ecode += 3;      ecode += 3;
2599      goto REPEATTYPE;      goto REPEATTYPE;
2600    
2601      case OP_TYPESTAR:      case OP_TYPEPOSSTAR:
2602      case OP_TYPEMINSTAR:      possessive = TRUE;
2603      case OP_TYPEPLUS:      min = 0;
2604      case OP_TYPEMINPLUS:      max = INT_MAX;
2605      case OP_TYPEQUERY:      ecode++;
2606      case OP_TYPEMINQUERY:      goto REPEATTYPE;
2607      c = *ecode++ - OP_TYPESTAR;  
2608      minimize = (c & 1) != 0;      case OP_TYPEPOSPLUS:
2609      min = rep_min[c];                 /* Pick up values from tables; */      possessive = TRUE;
2610      max = rep_max[c];                 /* zero for max => infinity */      min = 1;
2611      if (max == 0) max = INT_MAX;      max = INT_MAX;
2612        ecode++;
2613        goto REPEATTYPE;
2614    
2615        case OP_TYPEPOSQUERY:
2616        possessive = TRUE;
2617        min = 0;
2618        max = 1;
2619        ecode++;
2620        goto REPEATTYPE;
2621    
2622        case OP_TYPEPOSUPTO:
2623        possessive = TRUE;
2624        min = 0;
2625        max = GET2(ecode, 1);
2626        ecode += 3;
2627        goto REPEATTYPE;
2628    
2629        case OP_TYPESTAR:
2630        case OP_TYPEMINSTAR:
2631        case OP_TYPEPLUS:
2632        case OP_TYPEMINPLUS:
2633        case OP_TYPEQUERY:
2634        case OP_TYPEMINQUERY:
2635        c = *ecode++ - OP_TYPESTAR;
2636        minimize = (c & 1) != 0;
2637        min = rep_min[c];                 /* Pick up values from tables; */
2638        max = rep_max[c];                 /* zero for max => infinity */
2639        if (max == 0) max = INT_MAX;
2640    
2641      /* Common code for all repeated single character type matches. Note that      /* Common code for all repeated single character type matches. Note that
2642      in UTF-8 mode, '.' matches a character of any length, but for the other      in UTF-8 mode, '.' matches a character of any length, but for the other
# Line 2408  for (;;) Line 2650  for (;;)
2650        {        {
2651        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2652        prop_type = *ecode++;        prop_type = *ecode++;
2653        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2654        }        }
2655      else prop_type = -1;      else prop_type = -1;
2656  #endif  #endif
# Line 2434  for (;;) Line 2667  for (;;)
2667      if (min > 0)      if (min > 0)
2668        {        {
2669  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2670        if (prop_type > 0)        if (prop_type >= 0)
2671          {          {
2672          for (i = 1; i <= min; i++)          switch(prop_type)
2673            {            {
2674            GETCHARINC(c, eptr);            case PT_ANY:
2675            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2676            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2677              RRETURN(MATCH_NOMATCH);              {
2678                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2679                GETCHARINC(c, eptr);
2680                }
2681              break;
2682    
2683              case PT_LAMP:
2684              for (i = 1; i <= min; i++)
2685                {
2686                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2687                GETCHARINC(c, eptr);
2688                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2689                if ((prop_chartype == ucp_Lu ||
2690                     prop_chartype == ucp_Ll ||
2691                     prop_chartype == ucp_Lt) == prop_fail_result)
2692                  RRETURN(MATCH_NOMATCH);
2693                }
2694              break;
2695    
2696              case PT_GC:
2697              for (i = 1; i <= min; i++)
2698                {
2699                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2700                GETCHARINC(c, eptr);
2701                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2702                if ((prop_category == prop_value) == prop_fail_result)
2703                  RRETURN(MATCH_NOMATCH);
2704                }
2705              break;
2706    
2707              case PT_PC:
2708              for (i = 1; i <= min; i++)
2709                {
2710                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2711                GETCHARINC(c, eptr);
2712                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2713                if ((prop_chartype == prop_value) == prop_fail_result)
2714                  RRETURN(MATCH_NOMATCH);
2715                }
2716              break;
2717    
2718              case PT_SC:
2719              for (i = 1; i <= min; i++)
2720                {
2721                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2722                GETCHARINC(c, eptr);
2723                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2724                if ((prop_script == prop_value) == prop_fail_result)
2725                  RRETURN(MATCH_NOMATCH);
2726                }
2727              break;
2728    
2729              default:
2730              RRETURN(PCRE_ERROR_INTERNAL);
2731            }            }
2732          }          }
2733    
# Line 2453  for (;;) Line 2739  for (;;)
2739          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2740            {            {
2741            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2742            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2743            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2744            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2745              {              {
# Line 2462  for (;;) Line 2748  for (;;)
2748                {                {
2749                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2750                }                }
2751              prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2752              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2753              eptr += len;              eptr += len;
2754              }              }
# Line 2481  for (;;) Line 2767  for (;;)
2767          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2768            {            {
2769            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2770               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2771              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2772              eptr++;
2773            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2774            }            }
2775          break;          break;
# Line 2491  for (;;) Line 2778  for (;;)
2778          eptr += min;          eptr += min;
2779          break;          break;
2780    
2781            case OP_ANYNL:
2782            for (i = 1; i <= min; i++)
2783              {
2784              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2785              GETCHARINC(c, eptr);
2786              switch(c)
2787                {
2788                default: RRETURN(MATCH_NOMATCH);
2789                case 0x000d:
2790                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2791                break;
2792                case 0x000a:
2793                case 0x000b:
2794                case 0x000c:
2795                case 0x0085:
2796                case 0x2028:
2797                case 0x2029:
2798                break;
2799                }
2800              }
2801            break;
2802    
2803          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2804          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2805            {            {
# Line 2559  for (;;) Line 2868  for (;;)
2868  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2869    
2870        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2871        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2872          number of bytes present, as this was tested above. */
2873    
2874        switch(ctype)        switch(ctype)
2875          {          {
# Line 2567  for (;;) Line 2877  for (;;)
2877          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
2878            {            {
2879            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2880              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
2881                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2882                eptr++;
2883                }
2884            }            }
2885          else eptr += min;          else eptr += min;
2886          break;          break;
# Line 2576  for (;;) Line 2889  for (;;)
2889          eptr += min;          eptr += min;
2890          break;          break;
2891    
2892            /* Because of the CRLF case, we can't assume the minimum number of
2893            bytes are present in this case. */
2894    
2895            case OP_ANYNL:
2896            for (i = 1; i <= min; i++)
2897              {
2898              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2899              switch(*eptr++)
2900                {
2901                default: RRETURN(MATCH_NOMATCH);
2902                case 0x000d:
2903                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2904                break;
2905                case 0x000a:
2906                case 0x000b:
2907                case 0x000c:
2908                case 0x0085:
2909                break;
2910                }
2911              }
2912            break;
2913    
2914          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2915          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2916            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2624  for (;;) Line 2959  for (;;)
2959      if (minimize)      if (minimize)
2960        {        {
2961  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2962        if (prop_type > 0)        if (prop_type >= 0)
2963          {          {
2964          for (fi = min;; fi++)          switch(prop_type)
2965            {            {
2966            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
2967            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
2968            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
2969            GETCHARINC(c, eptr);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
2970            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2971            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2972              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
2973                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2974                }
2975              /* Control never gets here */
2976    
2977              case PT_LAMP:
2978              for (fi = min;; fi++)
2979                {
2980                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
2981                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2982                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2983                GETCHARINC(c, eptr);
2984                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2985                if ((prop_chartype == ucp_Lu ||
2986                     prop_chartype == ucp_Ll ||
2987                     prop_chartype == ucp_Lt) == prop_fail_result)
2988                  RRETURN(MATCH_NOMATCH);
2989                }
2990              /* Control never gets here */
2991    
2992              case PT_GC:
2993              for (fi = min;; fi++)
2994                {
2995                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
2996                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2997                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2998                GETCHARINC(c, eptr);
2999                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3000                if ((prop_category == prop_value) == prop_fail_result)
3001                  RRETURN(MATCH_NOMATCH);
3002                }
3003              /* Control never gets here */
3004    
3005              case PT_PC:
3006              for (fi = min;; fi++)
3007                {
3008                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3009                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3010                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3011                GETCHARINC(c, eptr);
3012                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3013                if ((prop_chartype == prop_value) == prop_fail_result)
3014                  RRETURN(MATCH_NOMATCH);
3015                }
3016              /* Control never gets here */
3017    
3018              case PT_SC:
3019              for (fi = min;; fi++)
3020                {
3021                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3022                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3023                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3024                GETCHARINC(c, eptr);
3025                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3026                if ((prop_script == prop_value) == prop_fail_result)
3027                  RRETURN(MATCH_NOMATCH);
3028                }
3029              /* Control never gets here */
3030    
3031              default:
3032              RRETURN(PCRE_ERROR_INTERNAL);
3033            }            }
3034          }          }
3035    
# Line 2645  for (;;) Line 3040  for (;;)
3040          {          {
3041          for (fi = min;; fi++)          for (fi = min;; fi++)
3042            {            {
3043            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3044            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3045            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3046            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3047            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3048            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3049            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3050              {              {
# Line 2658  for (;;) Line 3053  for (;;)
3053                {                {
3054                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3055                }                }
3056              prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3057              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3058              eptr += len;              eptr += len;
3059              }              }
# Line 2674  for (;;) Line 3069  for (;;)
3069          {          {
3070          for (fi = min;; fi++)          for (fi = min;; fi++)
3071            {            {
3072            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3073            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3074            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3075                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3076                    IS_NEWLINE(eptr)))
3077                RRETURN(MATCH_NOMATCH);
3078    
3079            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3080            switch(ctype)            switch(ctype)
3081              {              {
3082              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3083              break;              break;
3084    
3085              case OP_ANYBYTE:              case OP_ANYBYTE:
3086              break;              break;
3087    
3088                case OP_ANYNL:
3089                switch(c)
3090                  {
3091                  default: RRETURN(MATCH_NOMATCH);
3092                  case 0x000d:
3093                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3094                  break;
3095                  case 0x000a:
3096                  case 0x000b:
3097                  case 0x000c:
3098                  case 0x0085:
3099                  case 0x2028:
3100                  case 0x2029:
3101                  break;
3102                  }
3103                break;
3104    
3105              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3106              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3107                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2729  for (;;) Line 3143  for (;;)
3143          {          {
3144          for (fi = min;; fi++)          for (fi = min;; fi++)
3145            {            {
3146            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3147            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3148            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3149                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3150                RRETURN(MATCH_NOMATCH);
3151    
3152            c = *eptr++;            c = *eptr++;
3153            switch(ctype)            switch(ctype)
3154              {              {
3155              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3156              break;              break;
3157    
3158              case OP_ANYBYTE:              case OP_ANYBYTE:
3159              break;              break;
3160    
3161                case OP_ANYNL:
3162                switch(c)
3163                  {
3164                  default: RRETURN(MATCH_NOMATCH);
3165                  case 0x000d:
3166                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3167                  break;
3168                  case 0x000a:
3169                  case 0x000b:
3170                  case 0x000c:
3171                  case 0x0085:
3172                  break;
3173                  }
3174                break;
3175    
3176              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3177              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3178              break;              break;
# Line 2774  for (;;) Line 3205  for (;;)
3205        /* Control never gets here */        /* Control never gets here */
3206        }        }
3207    
3208      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3209      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3210      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3211    
# Line 2783  for (;;) Line 3214  for (;;)
3214        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
3215    
3216  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3217        if (prop_type > 0)        if (prop_type >= 0)
3218          {          {
3219          for (i = min; i < max; i++)          switch(prop_type)
3220            {            {
3221            int len = 1;            case PT_ANY:
3222            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
3223            GETCHARLEN(c, eptr, len);              {
3224            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
3225            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
3226              break;              GETCHARLEN(c, eptr, len);
3227            eptr+= len;              if (prop_fail_result) break;
3228                eptr+= len;
3229                }
3230              break;
3231    
3232              case PT_LAMP:
3233              for (i = min; i < max; i++)
3234                {
3235                int len = 1;
3236                if (eptr >= md->end_subject) break;
3237                GETCHARLEN(c, eptr, len);
3238                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3239                if ((prop_chartype == ucp_Lu ||
3240                     prop_chartype == ucp_Ll ||
3241                     prop_chartype == ucp_Lt) == prop_fail_result)
3242                  break;
3243                eptr+= len;
3244                }
3245              break;
3246    
3247              case PT_GC:
3248              for (i = min; i < max; i++)
3249                {
3250                int len = 1;
3251                if (eptr >= md->end_subject) break;
3252                GETCHARLEN(c, eptr, len);
3253                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3254                if ((prop_category == prop_value) == prop_fail_result)
3255                  break;
3256                eptr+= len;
3257                }
3258              break;
3259    
3260              case PT_PC:
3261              for (i = min; i < max; i++)
3262                {
3263                int len = 1;
3264                if (eptr >= md->end_subject) break;
3265                GETCHARLEN(c, eptr, len);
3266                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3267                if ((prop_chartype == prop_value) == prop_fail_result)
3268                  break;
3269                eptr+= len;
3270                }
3271              break;
3272    
3273              case PT_SC:
3274              for (i = min; i < max; i++)
3275                {
3276                int len = 1;
3277                if (eptr >= md->end_subject) break;
3278                GETCHARLEN(c, eptr, len);
3279                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3280                if ((prop_script == prop_value) == prop_fail_result)
3281                  break;
3282                eptr+= len;
3283                }
3284              break;
3285            }            }
3286    
3287          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3288    
3289            if (possessive) continue;
3290          for(;;)          for(;;)
3291            {            {
3292            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3293            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3294            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3295            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 2816  for (;;) Line 3305  for (;;)
3305            {            {
3306            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3307            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3308            prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3309            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3310            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3311              {              {
# Line 2825  for (;;) Line 3314  for (;;)
3314                {                {
3315                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3316                }                }
3317              prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3318              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3319              eptr += len;              eptr += len;
3320              }              }
# Line 2833  for (;;) Line 3322  for (;;)
3322    
3323          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3324    
3325            if (possessive) continue;
3326          for(;;)          for(;;)
3327            {            {
3328            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3329            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3330            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3331            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
# Line 2846  for (;;) Line 3336  for (;;)
3336                {                {
3337                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3338                }                }
3339              prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3340              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3341              eptr--;              eptr--;
3342              }              }
# Line 2865  for (;;) Line 3355  for (;;)
3355            {            {
3356            case OP_ANY:            case OP_ANY:
3357    
3358            /* Special code is required for UTF8, but when the maximum is unlimited            /* Special code is required for UTF8, but when the maximum is
3359            we don't need it, so we repeat the non-UTF8 code. This is probably            unlimited we don't need it, so we repeat the non-UTF8 code. This is
3360            worth it, because .* is quite a common idiom. */            probably worth it, because .* is quite a common idiom. */
3361    
3362            if (max < INT_MAX)            if (max < INT_MAX)
3363              {              {
# Line 2875  for (;;) Line 3365  for (;;)
3365                {                {
3366                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3367                  {                  {
3368                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3369                  eptr++;                  eptr++;
3370                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3371                  }                  }
# Line 2884  for (;;) Line 3374  for (;;)
3374                {                {
3375                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3376                  {                  {
3377                    if (eptr >= md->end_subject) break;
3378                  eptr++;                  eptr++;
3379                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3380                  }                  }
# Line 2898  for (;;) Line 3389  for (;;)
3389                {                {
3390                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3391                  {                  {
3392                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3393                  eptr++;                  eptr++;
3394                  }                  }
3395                break;                break;
# Line 2906  for (;;) Line 3397  for (;;)
3397              else              else
3398                {                {
3399                c = max - min;                c = max - min;
3400                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3401                    c = md->end_subject - eptr;
3402                eptr += c;                eptr += c;
3403                }                }
3404              }              }
# Line 2916  for (;;) Line 3408  for (;;)
3408    
3409            case OP_ANYBYTE:            case OP_ANYBYTE:
3410            c = max - min;            c = max - min;
3411            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3412                c = md->end_subject - eptr;
3413            eptr += c;            eptr += c;
3414            break;            break;
3415    
3416              case OP_ANYNL:
3417              for (i = min; i < max; i++)
3418                {
3419                int len = 1;
3420                if (eptr >= md->end_subject) break;
3421                GETCHARLEN(c, eptr, len);
3422                if (c == 0x000d)
3423                  {
3424                  if (++eptr >= md->end_subject) break;
3425                  if (*eptr == 0x000a) eptr++;
3426                  }
3427                else
3428                  {
3429                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3430                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3431                    break;
3432                  eptr += len;
3433                  }
3434                }
3435              break;
3436    
3437            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3438            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3439              {              {
# Line 2992  for (;;) Line 3506  for (;;)
3506    
3507          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3508    
3509            if (possessive) continue;
3510          for(;;)          for(;;)
3511            {            {
3512            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3513            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3514            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3515            BACKCHAR(eptr);            BACKCHAR(eptr);
# Line 3012  for (;;) Line 3527  for (;;)
3527              {              {
3528              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3529                {                {
3530                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3531                eptr++;                eptr++;
3532                }                }
3533              break;              break;
# Line 3021  for (;;) Line 3536  for (;;)
3536    
3537            case OP_ANYBYTE:            case OP_ANYBYTE:
3538            c = max - min;            c = max - min;
3539            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3540                c = md->end_subject - eptr;
3541            eptr += c;            eptr += c;
3542            break;            break;
3543    
3544              case OP_ANYNL:
3545              for (i = min; i < max; i++)
3546                {
3547                if (eptr >= md->end_subject) break;
3548                c = *eptr;
3549                if (c == 0x000d)
3550                  {
3551                  if (++eptr >= md->end_subject) break;
3552                  if (*eptr == 0x000a) eptr++;
3553                  }
3554                else
3555                  {
3556                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3557                    break;
3558                  eptr++;
3559                  }
3560                }
3561              break;
3562    
3563            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3564            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3565              {              {
# Line 3085  for (;;) Line 3620  for (;;)
3620    
3621          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3622    
3623            if (possessive) continue;
3624          while (eptr >= pp)          while (eptr >= pp)
3625            {            {
3626            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
3627            eptr--;            eptr--;
3628            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3629            }            }
# Line 3099  for (;;) Line 3635  for (;;)
3635        }        }
3636      /* Control never gets here */      /* Control never gets here */
3637    
3638      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3639      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3640    
3641      default:      default:
3642      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3643      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3644      }      }
3645    
3646    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3115  for (;;) Line 3649  for (;;)
3649    
3650    }             /* End of main loop */    }             /* End of main loop */
3651  /* Control never reaches here */  /* Control never reaches here */
3652    
3653    
3654    /* When compiling to use the heap rather than the stack for recursive calls to
3655    match(), the RRETURN() macro jumps here. The number that is saved in
3656    frame->Xwhere indicates which label we actually want to return to. */
3657    
3658    #ifdef NO_RECURSE
3659    #define LBL(val) case val: goto L_RM##val;
3660    HEAP_RETURN:
3661    switch (frame->Xwhere)
3662      {
3663      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
3664      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
3665      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
3666      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
3667      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
3668      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
3669      default:
3670      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
3671      return PCRE_ERROR_INTERNAL;
3672      }
3673    #undef LBL
3674    #endif  /* NO_RECURSE */
3675  }  }
3676    
3677    
# Line 3144  Undefine all the macros that were define Line 3701  Undefine all the macros that were define
3701    
3702  #undef cur_is_word  #undef cur_is_word
3703  #undef condition  #undef condition
 #undef minimize  
3704  #undef prev_is_word  #undef prev_is_word
3705    
3706  #undef original_ims  #undef original_ims
# Line 3200  Returns:          > 0 => success; value Line 3756  Returns:          > 0 => success; value
3756                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3757  */  */
3758    
3759  EXPORT int  PCRE_EXP_DEFN int
3760  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3761    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3762    int offsetcount)    int offsetcount)
3763  {  {
3764  int rc, resetcount, ocount;  int rc, resetcount, ocount;
3765  int first_byte = -1;  int first_byte = -1;
3766  int req_byte = -1;  int req_byte = -1;
3767  int req_byte2 = -1;  int req_byte2 = -1;
3768  unsigned long int ims = 0;  int newline;
3769    unsigned long int ims;
3770  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
3771  BOOL anchored;  BOOL anchored;
3772  BOOL startline;  BOOL startline;
3773  BOOL firstline;  BOOL firstline;
3774  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3775  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3776    BOOL utf8;
3777  match_data match_block;  match_data match_block;
3778    match_data *md = &match_block;
3779  const uschar *tables;  const uschar *tables;
3780  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3781  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3782  const uschar *end_subject;  USPTR end_subject;
3783  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3784    eptrblock eptrchain[EPTR_WORK_SIZE];
3785    
3786  pcre_study_data internal_study;  pcre_study_data internal_study;
3787  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3241  if (offsetcount < 0) return PCRE_ERROR_B Line 3801  if (offsetcount < 0) return PCRE_ERROR_B
3801  the default values. */  the default values. */
3802    
3803  study = NULL;  study = NULL;
3804  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
3805  match_block.callout_data = NULL;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3806    md->callout_data = NULL;
3807    
3808  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
3809    
# Line 3254  if (extra_data != NULL) Line 3815  if (extra_data != NULL)
3815    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3816      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3817    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3818      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
3819      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3820        md->match_limit_recursion = extra_data->match_limit_recursion;
3821    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3822      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3823    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3824    }    }
3825    
# Line 3286  firstline = (re->options & PCRE_FIRSTLIN Line 3849  firstline = (re->options & PCRE_FIRSTLIN
3849    
3850  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
3851    
3852  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
3853    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3854    
3855  match_block.start_subject = (const uschar *)subject;  md->start_subject = (USPTR)subject;
3856  match_block.start_offset = start_offset;  md->start_offset = start_offset;
3857  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
3858  end_subject = match_block.end_subject;  end_subject = md->end_subject;
3859    
3860  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3861  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3862    
3863  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3864  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
3865  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
3866  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
3867  match_block.hitend = FALSE;  md->hitend = FALSE;
3868    
3869    md->recursive = NULL;                   /* No recursion at top level */
3870    md->eptrchain = eptrchain;              /* Make workspace generally available */
3871    
3872    md->lcc = tables + lcc_offset;
3873    md->ctypes = tables + ctypes_offset;
3874    
3875  match_block.recursive = NULL;                   /* No recursion at top level */  /* Handle different types of newline. The three bits give eight cases. If
3876    nothing is set at run time, whatever was used at compile time applies. */
3877    
3878  match_block.lcc = tables + lcc_offset;  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3879  match_block.ctypes = tables + ctypes_offset;         PCRE_NEWLINE_BITS)
3880      {
3881      case 0: newline = NEWLINE; break;   /* Compile-time default */
3882      case PCRE_NEWLINE_CR: newline = '\r'; break;
3883      case PCRE_NEWLINE_LF: newline = '\n'; break;
3884      case PCRE_NEWLINE_CR+
3885           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3886      case PCRE_NEWLINE_ANY: newline = -1; break;
3887      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3888      default: return PCRE_ERROR_BADNEWLINE;
3889      }
3890    
3891    if (newline == -2)
3892      {
3893      md->nltype = NLTYPE_ANYCRLF;
3894      }
3895    else if (newline < 0)
3896      {
3897      md->nltype = NLTYPE_ANY;
3898      }
3899    else
3900      {
3901      md->nltype = NLTYPE_FIXED;
3902      if (newline > 255)
3903        {
3904        md->nllen = 2;
3905        md->nl[0] = (newline >> 8) & 255;
3906        md->nl[1] = newline & 255;
3907        }
3908      else
3909        {
3910        md->nllen = 1;
3911        md->nl[0] = newline;
3912        }
3913      }
3914    
3915  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
3916  moment. */  moment. */
3917    
3918  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3919    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
3920    
3921  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3922  back the character offset. */  back the character offset. */
3923    
3924  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3925  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3926    {    {
3927    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3928      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3350  ocount = offsetcount - (offsetcount % 3) Line 3954  ocount = offsetcount - (offsetcount % 3)
3954  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
3955    {    {
3956    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
3957    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3958    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3959    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3960    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3961    }    }
3962  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
3963    
3964  match_block.offset_end = ocount;  md->offset_end = ocount;
3965  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
3966  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
3967  match_block.capture_last = -1;  md->capture_last = -1;
3968    
3969  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
3970  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3373  if (resetcount > offsetcount) resetcount Line 3977  if (resetcount > offsetcount) resetcount
3977  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
3978  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
3979    
3980  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
3981    {    {
3982    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
3983    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
3984    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
3985    }    }
# Line 3392  if (!anchored) Line 3996  if (!anchored)
3996      {      {
3997      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
3998      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3999        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
4000      }      }
4001    else    else
4002      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3410  if ((re->options & PCRE_REQCHSET) != 0) Line 4014  if ((re->options & PCRE_REQCHSET) != 0)
4014    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4015    }    }
4016    
4017    
4018    /* ==========================================================================*/
4019    
4020  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4021  the loop runs just once. */  the loop runs just once. */
4022    
4023  do  for(;;)
4024    {    {
4025    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4026    
4027    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4028    
4029    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
4030      {      {
4031      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
4032      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4033      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4034      }      }
4035    
4036    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
4037    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
4038    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
4039    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
4040    */    the match fails at the newline, later code breaks this loop. */
4041    
4042    if (firstline)    if (firstline)
4043      {      {
4044      const uschar *t = start_match;      USPTR t = start_match;
4045      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4046      end_subject = t;      end_subject = t;
4047      }      }
4048    
# Line 3445  do Line 4052  do
4052      {      {
4053      if (first_byte_caseless)      if (first_byte_caseless)
4054        while (start_match < end_subject &&        while (start_match < end_subject &&
4055               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4056          start_match++;          start_match++;
4057      else      else
4058        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4059          start_match++;          start_match++;
4060      }      }
4061    
4062    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4063    
4064    else if (startline)    else if (startline)
4065      {      {
4066      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4067        {        {
4068        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4069            start_match++;
4070    
4071          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4072          and we are now at a LF, advance the match position by one more character.
4073          */
4074    
4075          if (start_match[-1] == '\r' &&
4076               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4077               start_match < end_subject &&
4078               *start_match == '\n')
4079          start_match++;          start_match++;
4080        }        }
4081      }      }
# Line 3480  do Line 4097  do
4097    
4098  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4099    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4100    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4101    printf("\n");    printf("\n");
4102  #endif  #endif
4103    
# Line 3494  do Line 4111  do
4111    
4112    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4113    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4114    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4115    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4116    
4117    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4118    */    */
4119    
4120    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4121        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4122        !match_block.partial)        !md->partial)
4123      {      {
4124      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4125    
4126      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
4127      place we found it at last time. */      place we found it at last time. */
# Line 3527  do Line 4144  do
4144            }            }
4145          }          }
4146    
4147        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4148          forcing a match failure. */
4149    
4150        if (p >= end_subject) break;        if (p >= end_subject)
4151            {
4152            rc = MATCH_NOMATCH;
4153            break;
4154            }
4155    
4156        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4157        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3539  do Line 4161  do
4161        }        }
4162      }      }
4163    
4164    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4165    
4166    if (rc == MATCH_NOMATCH)    md->start_match = start_match;
4167      {    md->match_call_count = 0;
4168      if (firstline && *start_match == NEWLINE) break;    md->eptrn = 0;                          /* Next free eptrchain slot */
4169      start_match++;    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4170    
4171      /* Any return other than MATCH_NOMATCH breaks the loop. */
4172    
4173      if (rc != MATCH_NOMATCH) break;
4174    
4175      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4176      newline in the subject (though it may continue over the newline). Therefore,
4177      if we have just failed to match, starting at a newline, do not continue. */
4178    
4179      if (firstline && IS_NEWLINE(start_match)) break;
4180    
4181      /* Advance the match position by one character. */
4182    
4183      start_match++;
4184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4185      if (match_block.utf8)    if (utf8)
4186        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4187          start_match++;        start_match++;
4188  #endif  #endif
     continue;  
     }  
4189    
4190    if (rc != MATCH_MATCH)    /* Break the loop if the pattern is anchored or if we have passed the end of
4191      {    the subject. */
4192      DPRINTF((">>>> error: returning %d\n", rc));  
4193      return rc;    if (anchored || start_match > end_subject) break;
4194      }  
4195      /* If we have just passed a CR and the newline option is CRLF or ANY or
4196      ANYCRLF, and we are now at a LF, advance the match position by one more
4197      character. */
4198    
4199      if (start_match[-1] == '\r' &&
4200           (md->nltype == NLTYPE_ANY ||
4201            md->nltype == NLTYPE_ANYCRLF ||
4202            md->nllen == 2) &&
4203           start_match < end_subject &&
4204           *start_match == '\n')
4205        start_match++;
4206    
4207      }   /* End of for(;;) "bumpalong" loop */
4208    
4209    /* ==========================================================================*/
4210    
4211    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4212    conditions is true:
4213    
4214    (1) The pattern is anchored;
4215    
4216    (2) We are past the end of the subject;
4217    
4218    /* We have a match! Copy the offset information from temporary store if  (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4219    necessary */      this option requests that a match occur at or before the first newline in
4220        the subject.
4221    
4222    When we have a match and the offset vector is big enough to deal with any
4223    backreferences, captured substring offsets will already be set up. In the case
4224    where we had to get some local store to hold offsets for backreference
4225    processing, copy those that we can. In this case there need not be overflow if
4226    certain parts of the pattern were not used, even though there are more
4227    capturing parentheses than vector slots. */
4228    
4229    if (rc == MATCH_MATCH)
4230      {
4231    if (using_temporary_offsets)    if (using_temporary_offsets)
4232      {      {
4233      if (offsetcount >= 4)      if (offsetcount >= 4)
4234        {        {
4235        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4236          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4237        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4238        }        }
4239      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4240      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4241      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4242      }      }
4243    
4244    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4245      too many to fit into the vector. */
4246    
4247      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4248    
4249      /* If there is space, set up the whole thing as substring 0. */
4250    
4251    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4252      {      {
4253      offsets[0] = start_match - match_block.start_subject;      offsets[0] = start_match - md->start_subject;
4254      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4255      }      }
4256    
4257    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4258    return rc;    return rc;
4259    }    }
4260    
4261  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4262    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4263    
4264  if (using_temporary_offsets)  if (using_temporary_offsets)
4265    {    {
4266    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4267    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4268    }    }
4269    
4270  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4271      {
4272      DPRINTF((">>>> error: returning %d\n", rc));
4273      return rc;
4274      }
4275    else if (md->partial && md->hitend)
4276    {    {
4277    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4278    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.77  
changed lines
  Added in v.165

  ViewVC Help
Powered by ViewVC 1.1.5