/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 779 by ph10, Fri Dec 2 10:39:32 2011 UTC revision 1047 by zherczeg, Fri Sep 28 15:06:38 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
   
40  /* This module contains pcre_exec(), the externally visible function that does  /* This module contains pcre_exec(), the externally visible function that does
41  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
# Line 113  Returns:     nothing Line 112  Returns:     nothing
112  */  */
113    
114  static void  static void
115  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
116  {  {
117  unsigned int c;  unsigned int c;
118  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 140  Arguments: Line 139  Arguments:
139    md          points to match data block    md          points to match data block
140    caseless    TRUE if caseless    caseless    TRUE if caseless
141    
142  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      >= 0 the number of subject bytes matched
143                  -1 no match
144                  -2 partial match; always given if at end subject
145  */  */
146    
147  static int  static int
148  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
149    BOOL caseless)    BOOL caseless)
150  {  {
151  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
152  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 163  pchars(p, length, FALSE, md); Line 164  pchars(p, length, FALSE, md);
164  printf("\n");  printf("\n");
165  #endif  #endif
166    
167  /* Always fail if reference not set (and not JavaScript compatible). */  /* Always fail if reference not set (and not JavaScript compatible - in that
168    case the length is passed as zero). */
169    
170  if (length < 0) return -1;  if (length < 0) return -1;
171    
# Line 173  ASCII characters. */ Line 175  ASCII characters. */
175    
176  if (caseless)  if (caseless)
177    {    {
178  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
179  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
180    if (md->utf8)    if (md->utf)
181      {      {
182      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
183      bytes matched may differ, because there are some characters whose upper and      data units matched may differ, because in UTF-8 there are some characters
184      lower case versions code as different numbers of bytes. For example, U+023A      whose upper and lower case versions code have different numbers of bytes.
185      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);      For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
186      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of      (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
187      the latter. It is important, therefore, to check the length along the      sequence of two of the latter. It is important, therefore, to check the
188      reference, not along the subject (earlier code did this wrong). */      length along the reference, not along the subject (earlier code did this
189        wrong). */
190    
191      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
192      while (p < endptr)      while (p < endptr)
193        {        {
194        int c, d;        unsigned int c, d;
195        if (eptr >= md->end_subject) return -1;        const ucd_record *ur;
196          if (eptr >= md->end_subject) return -2;   /* Partial match */
197        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
198        GETCHARINC(d, p);        GETCHARINC(d, p);
199        if (c != d && c != UCD_OTHERCASE(d)) return -1;        ur = GET_UCD(d);
200          if (c != d && c != d + ur->other_case)
201            {
202            const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
203            for (;;)
204              {
205              if (c < *pp) return -1;
206              if (c == *pp++) break;
207              }
208            }
209        }        }
210      }      }
211    else    else
# Line 202  if (caseless) Line 215  if (caseless)
215    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
216    is no UCP support. */    is no UCP support. */
217      {      {
     if (eptr + length > md->end_subject) return -1;  
218      while (length-- > 0)      while (length-- > 0)
219        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
220          if (eptr >= md->end_subject) return -2;   /* Partial match */
221          if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
222          p++;
223          eptr++;
224          }
225      }      }
226    }    }
227    
# Line 213  are in UTF-8 mode. */ Line 230  are in UTF-8 mode. */
230    
231  else  else
232    {    {
233    if (eptr + length > md->end_subject) return -1;    while (length-- > 0)
234    while (length-- > 0) if (*p++ != *eptr++) return -1;      {
235        if (eptr >= md->end_subject) return -2;   /* Partial match */
236        if (*p++ != *eptr++) return -1;
237        }
238    }    }
239    
240  return (int)(eptr - eptr_start);  return (int)(eptr - eptr_start);
# Line 307  argument of match(), which never changes Line 327  argument of match(), which never changes
327    
328  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
329    {\    {\
330    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = frame->Xnextframe;\
331    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL)\
332    frame->Xwhere = rw; \      {\
333        newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
334        if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
335        newframe->Xnextframe = NULL;\
336        frame->Xnextframe = newframe;\
337        }\
338      frame->Xwhere = rw;\
339    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
340    newframe->Xecode = rb;\    newframe->Xecode = rb;\
341    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
# Line 328  argument of match(), which never changes Line 354  argument of match(), which never changes
354    {\    {\
355    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
356    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
   (pcre_stack_free)(oldframe);\  
357    if (frame != NULL)\    if (frame != NULL)\
358      {\      {\
359      rrc = ra;\      rrc = ra;\
# Line 342  argument of match(), which never changes Line 367  argument of match(), which never changes
367    
368  typedef struct heapframe {  typedef struct heapframe {
369    struct heapframe *Xprevframe;    struct heapframe *Xprevframe;
370      struct heapframe *Xnextframe;
371    
372    /* Function arguments that may change */    /* Function arguments that may change */
373    
374    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
375    const uschar *Xecode;    const pcre_uchar *Xecode;
376    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
377    int Xoffset_top;    int Xoffset_top;
378    eptrblock *Xeptrb;    eptrblock *Xeptrb;
379    unsigned int Xrdepth;    unsigned int Xrdepth;
380    
381    /* Function local variables */    /* Function local variables */
382    
383    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
384  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
385    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
386  #endif  #endif
387    USPTR Xdata;    PCRE_PUCHAR Xdata;
388    USPTR Xnext;    PCRE_PUCHAR Xnext;
389    USPTR Xpp;    PCRE_PUCHAR Xpp;
390    USPTR Xprev;    PCRE_PUCHAR Xprev;
391    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
392    
393    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
394    
# Line 375  typedef struct heapframe { Line 401  typedef struct heapframe {
401    int Xprop_value;    int Xprop_value;
402    int Xprop_fail_result;    int Xprop_fail_result;
403    int Xoclength;    int Xoclength;
404    uschar Xocchars[8];    pcre_uchar Xocchars[6];
405  #endif  #endif
406    
407    int Xcodelink;    int Xcodelink;
# Line 440  the subject. */ Line 466  the subject. */
466    
467    
468  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
469  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
470  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
471  made performance worse.  made performance worse.
472    
# Line 463  Returns:       MATCH_MATCH if matched Line 489  Returns:       MATCH_MATCH if matched
489  */  */
490    
491  static int  static int
492  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
493    int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
494      unsigned int rdepth)
495  {  {
496  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
497  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 473  so they can be ordinary variables in all Line 500  so they can be ordinary variables in all
500  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
501  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
502  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
503  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
504    
505  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
506  BOOL caseless;  BOOL caseless;
507  int condcode;  int condcode;
508    
509  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
510  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
511  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
512  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
513    the top-level on the stack rather than malloc-ing them all gives a performance
514    boost in many cases where there is not much "recursion". */
515    
516  #ifdef NO_RECURSE  #ifdef NO_RECURSE
517  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)md->match_frames_base;
 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  
 frame->Xprevframe = NULL;            /* Marks the top level */  
518    
519  /* Copy in the original argument variables */  /* Copy in the original argument variables */
520    
# Line 513  HEAP_RECURSE: Line 540  HEAP_RECURSE:
540    
541  /* Ditto for the local variables */  /* Ditto for the local variables */
542    
543  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
544  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
545  #endif  #endif
546  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 571  declarations can be cut out in a block. Line 598  declarations can be cut out in a block.
598  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
599  to RMATCH(). */  to RMATCH(). */
600    
601  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
602  const uschar *charptr;  const pcre_uchar *charptr;
603  #endif  #endif
604  const uschar *callpat;  const pcre_uchar *callpat;
605  const uschar *data;  const pcre_uchar *data;
606  const uschar *next;  const pcre_uchar *next;
607  USPTR         pp;  PCRE_PUCHAR       pp;
608  const uschar *prev;  const pcre_uchar *prev;
609  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
610    
611  recursion_info new_recursive;  recursion_info new_recursive;
612    
# Line 592  int prop_type; Line 619  int prop_type;
619  int prop_value;  int prop_value;
620  int prop_fail_result;  int prop_fail_result;
621  int oclength;  int oclength;
622  uschar occhars[8];  pcre_uchar occhars[6];
623  #endif  #endif
624    
625  int codelink;  int codelink;
# Line 608  int save_offset1, save_offset2, save_off Line 635  int save_offset1, save_offset2, save_off
635  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
636    
637  eptrblock newptrb;  eptrblock newptrb;
638    
639    /* There is a special fudge for calling match() in a way that causes it to
640    measure the size of its basic stack frame when the stack is being used for
641    recursion. The second argument (ecode) being NULL triggers this behaviour. It
642    cannot normally ever be NULL. The return is the negated value of the frame
643    size. */
644    
645    if (ecode == NULL)
646      {
647      if (rdepth == 0)
648        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
649      else
650        {
651        int len = (char *)&rdepth - (char *)eptr;
652        return (len > 0)? -len : len;
653        }
654      }
655  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
656    
657  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 620  the alternative names that are used. */ Line 664  the alternative names that are used. */
664  #define code_offset   codelink  #define code_offset   codelink
665  #define condassert    condition  #define condassert    condition
666  #define matched_once  prev_is_word  #define matched_once  prev_is_word
667    #define foc           number
668    #define save_mark     data
669    
670  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
671  variables. */  variables. */
# Line 645  defined). However, RMATCH isn't like a f Line 691  defined). However, RMATCH isn't like a f
691  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
692  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
693    
694  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
695  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
696  #else  #else
697  utf8 = FALSE;  utf = FALSE;
698  #endif  #endif
699    
700  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 689  for (;;) Line 735  for (;;)
735      case OP_MARK:      case OP_MARK:
736      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
737      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
738      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
739        eptrb, RM55);        eptrb, RM55);
740      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
741           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 702  for (;;) Line 748  for (;;)
748      unaltered. */      unaltered. */
749    
750      else if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
751          strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
752        {        {
753        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
754        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
# Line 715  for (;;) Line 761  for (;;)
761      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
762    
763      case OP_COMMIT:      case OP_COMMIT:
764      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
765        eptrb, RM52);        eptrb, RM52);
766      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
767          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
# Line 726  for (;;) Line 772  for (;;)
772      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
773    
774      case OP_PRUNE:      case OP_PRUNE:
775      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
776        eptrb, RM51);        eptrb, RM51);
777      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
778      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 734  for (;;) Line 780  for (;;)
780      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
781      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
782      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
783      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
784        eptrb, RM56);        eptrb, RM56);
785      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
786           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 744  for (;;) Line 790  for (;;)
790      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
791    
792      case OP_SKIP:      case OP_SKIP:
793      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
794        eptrb, RM53);        eptrb, RM53);
795      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
796        RRETURN(rrc);        RRETURN(rrc);
# Line 752  for (;;) Line 798  for (;;)
798      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
799    
800      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
801      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. There is a flag that disables this opcode when re-matching a
802      pattern that ended with a SKIP for which there was not a matching MARK. */      pattern that ended with a SKIP for which there was not a matching MARK. */
803    
804      case OP_SKIP_ARG:      case OP_SKIP_ARG:
805      if (md->ignore_skip_arg)      if (md->ignore_skip_arg)
806        {        {
807        ecode += _pcre_OP_lengths[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
808        break;        break;
809        }        }
810      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
811        eptrb, RM57);        eptrb, RM57);
812      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
813        RRETURN(rrc);        RRETURN(rrc);
814    
815      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
816      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
817      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
818      with the md->ignore_skip_arg flag set. */      with the md->ignore_skip_arg flag set. */
819    
820      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
# Line 779  for (;;) Line 825  for (;;)
825      match pointer to do this. */      match pointer to do this. */
826    
827      case OP_THEN:      case OP_THEN:
828      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
829        eptrb, RM54);        eptrb, RM54);
830      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
831      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
# Line 788  for (;;) Line 834  for (;;)
834      case OP_THEN_ARG:      case OP_THEN_ARG:
835      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
836      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
837      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
838        md, eptrb, RM58);        md, eptrb, RM58);
839      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
840           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 812  for (;;) Line 858  for (;;)
858      case OP_ONCE_NC:      case OP_ONCE_NC:
859      prev = ecode;      prev = ecode;
860      saved_eptr = eptr;      saved_eptr = eptr;
861        save_mark = md->mark;
862      do      do
863        {        {
864        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 830  for (;;) Line 877  for (;;)
877    
878        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
879        ecode += GET(ecode,1);        ecode += GET(ecode,1);
880          md->mark = save_mark;
881        }        }
882      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
883    
# Line 869  for (;;) Line 917  for (;;)
917        }        }
918      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
919        {        {
       md->match_function_type = MATCH_CBEGROUP;  
920        RMATCH(eptr, prev, offset_top, md, eptrb, RM66);        RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
921        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
922        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
# Line 909  for (;;) Line 956  for (;;)
956        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
957        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
958        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
959          save_mark = md->mark;
960    
961        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
962        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 917  for (;;) Line 965  for (;;)
965        for (;;)        for (;;)
966          {          {
967          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
968          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
969            eptrb, RM1);            eptrb, RM1);
970          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
971    
# Line 945  for (;;) Line 993  for (;;)
993          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
994          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
995          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
996            md->mark = save_mark;
997          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
998          }          }
999    
# Line 996  for (;;) Line 1045  for (;;)
1045    
1046      for (;;)      for (;;)
1047        {        {
1048        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA || op == OP_ONCE)
1049            md->match_function_type = MATCH_CBEGROUP;
1050    
1051        /* If this is not a possibly empty group, and there are no (*THEN)s in        /* If this is not a possibly empty group, and there are no (*THEN)s in
1052        the pattern, and this is the final alternative, optimize as described        the pattern, and this is the final alternative, optimize as described
# Line 1004  for (;;) Line 1054  for (;;)
1054    
1055        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1056          {          {
1057          ecode += _pcre_OP_lengths[*ecode];          ecode += PRIV(OP_lengths)[*ecode];
1058          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1059          }          }
1060    
1061        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1062    
1063        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        save_mark = md->mark;
1064          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1065          RM2);          RM2);
1066    
1067        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
# Line 1028  for (;;) Line 1079  for (;;)
1079          {          {
1080          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1081            {            {
1082            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1083            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1084              {              {
1085              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 1039  for (;;) Line 1090  for (;;)
1090          RRETURN(rrc);          RRETURN(rrc);
1091          }          }
1092        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1093          md->mark = save_mark;
1094        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1095        }        }
1096    
# Line 1093  for (;;) Line 1145  for (;;)
1145          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1146            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1147          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1148          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1149            eptrb, RM63);            eptrb, RM63);
1150          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1151            {            {
# Line 1165  for (;;) Line 1217  for (;;)
1217      for (;;)      for (;;)
1218        {        {
1219        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1220        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1221          eptrb, RM48);          eptrb, RM48);
1222        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1223          {          {
# Line 1215  for (;;) Line 1267  for (;;)
1267    
1268      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1269        {        {
1270        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1271          {          {
1272          pcre_callout_block cb;          PUBL(callout_block) cb;
1273          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1274          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1275          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1276    #ifdef COMPILE_PCRE8
1277          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1278    #else
1279            cb.subject          = (PCRE_SPTR16)md->start_subject;
1280    #endif
1281          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1282          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1283          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1231  for (;;) Line 1287  for (;;)
1287          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1288          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1289          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1290          if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1291          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1292          }          }
1293        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1294        }        }
1295    
1296      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1260  for (;;) Line 1316  for (;;)
1316    
1317          if (!condition && condcode == OP_NRREF)          if (!condition && condcode == OP_NRREF)
1318            {            {
1319            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1320            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1321              {              {
1322              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1273  for (;;) Line 1329  for (;;)
1329    
1330            if (i < md->name_count)            if (i < md->name_count)
1331              {              {
1332              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1333              while (slotB > md->name_table)              while (slotB > md->name_table)
1334                {                {
1335                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1336                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1337                  {                  {
1338                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1339                  if (condition) break;                  if (condition) break;
# Line 1293  for (;;) Line 1349  for (;;)
1349                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1350                  {                  {
1351                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1352                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1353                    {                    {
1354                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1355                    if (condition) break;                    if (condition) break;
# Line 1306  for (;;) Line 1362  for (;;)
1362    
1363          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1364    
1365          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1366          }          }
1367        }        }
1368    
# Line 1323  for (;;) Line 1379  for (;;)
1379        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1380          {          {
1381          int refno = offset >> 1;          int refno = offset >> 1;
1382          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1383    
1384          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1385            {            {
# Line 1337  for (;;) Line 1393  for (;;)
1393    
1394          if (i < md->name_count)          if (i < md->name_count)
1395            {            {
1396            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1397            while (slotB > md->name_table)            while (slotB > md->name_table)
1398              {              {
1399              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1400              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1401                {                {
1402                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1403                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1359  for (;;) Line 1415  for (;;)
1415              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1416                {                {
1417                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1418                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1419                  {                  {
1420                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1421                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1374  for (;;) Line 1430  for (;;)
1430    
1431        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1432    
1433        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1434        }        }
1435    
1436      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1466  for (;;) Line 1522  for (;;)
1522        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1523        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1524        }        }
1525      ecode += 3;      ecode += 1 + IMM2_SIZE;
1526      break;      break;
1527    
1528    
# Line 1513  for (;;) Line 1569  for (;;)
1569    
1570      case OP_ASSERT:      case OP_ASSERT:
1571      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1572        save_mark = md->mark;
1573      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1574        {        {
1575        condassert = TRUE;        condassert = TRUE;
# Line 1528  for (;;) Line 1585  for (;;)
1585          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1586          break;          break;
1587          }          }
1588          md->mark = save_mark;
1589    
1590        /* PCRE does not allow THEN to escape beyond an assertion; it is treated        /* A COMMIT failure must fail the entire assertion, without trying any
1591        as NOMATCH. */        subsequent branches. */
1592    
1593          if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1594    
1595          /* PCRE does not allow THEN to escape beyond an assertion; it
1596          is treated as NOMATCH. */
1597    
1598        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1599        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 1557  for (;;) Line 1620  for (;;)
1620    
1621      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1622      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1623        save_mark = md->mark;
1624      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1625        {        {
1626        condassert = TRUE;        condassert = TRUE;
# Line 1567  for (;;) Line 1631  for (;;)
1631      do      do
1632        {        {
1633        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1634          md->mark = save_mark;
1635        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1636        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1637          {          {
# Line 1593  for (;;) Line 1658  for (;;)
1658      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1659    
1660      case OP_REVERSE:      case OP_REVERSE:
1661  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1662      if (utf8)      if (utf)
1663        {        {
1664        i = GET(ecode, 1);        i = GET(ecode, 1);
1665        while (i-- > 0)        while (i-- > 0)
# Line 1625  for (;;) Line 1690  for (;;)
1690      function is able to force a failure. */      function is able to force a failure. */
1691    
1692      case OP_CALLOUT:      case OP_CALLOUT:
1693      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1694        {        {
1695        pcre_callout_block cb;        PUBL(callout_block) cb;
1696        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1697        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1698        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1699    #ifdef COMPILE_PCRE8
1700        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1701    #else
1702          cb.subject          = (PCRE_SPTR16)md->start_subject;
1703    #endif
1704        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1705        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1706        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1641  for (;;) Line 1710  for (;;)
1710        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1711        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1712        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1713        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1714        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1715        }        }
1716      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1700  for (;;) Line 1769  for (;;)
1769        else        else
1770          {          {
1771          new_recursive.offset_save =          new_recursive.offset_save =
1772            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1773          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1774          }          }
1775        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
# Line 1715  for (;;) Line 1784  for (;;)
1784        do        do
1785          {          {
1786          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1787          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1788            md, eptrb, RM6);            md, eptrb, RM6);
1789          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1790              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
# Line 1724  for (;;) Line 1793  for (;;)
1793            {            {
1794            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1795            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1796              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1797    
1798            /* Set where we got to in the subject, and reset the start in case            /* Set where we got to in the subject, and reset the start in case
1799            it was changed by \K. This *is* propagated back out of a recursion,            it was changed by \K. This *is* propagated back out of a recursion,
# Line 1735  for (;;) Line 1804  for (;;)
1804            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1805            }            }
1806    
1807          /* PCRE does not allow THEN to escape beyond a recursion; it is treated          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1808          as NOMATCH. */          is treated as NOMATCH. */
1809    
1810          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1811                     rrc != MATCH_COMMIT)
1812            {            {
1813            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1814            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1815              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1816            RRETURN(rrc);            RRETURN(rrc);
1817            }            }
1818    
# Line 1754  for (;;) Line 1824  for (;;)
1824        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1825        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1826        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1827          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1828        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1829        }        }
1830    
# Line 1949  for (;;) Line 2019  for (;;)
2019          }          }
2020        if (*prev >= OP_SBRA)    /* Could match an empty string */        if (*prev >= OP_SBRA)    /* Could match an empty string */
2021          {          {
         md->match_function_type = MATCH_CBEGROUP;  
2022          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);          RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2023          RRETURN(rrc);          RRETURN(rrc);
2024          }          }
# Line 1958  for (;;) Line 2027  for (;;)
2027        }        }
2028      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
2029        {        {
       if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;  
2030        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);        RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2031        if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;        if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2032        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 2015  for (;;) Line 2083  for (;;)
2083    
2084      case OP_DOLLM:      case OP_DOLLM:
2085      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2086        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }        {
2087          if (!IS_NEWLINE(eptr))
2088            {
2089            if (md->partial != 0 &&
2090                eptr + 1 >= md->end_subject &&
2091                NLBLOCK->nltype == NLTYPE_FIXED &&
2092                NLBLOCK->nllen == 2 &&
2093                *eptr == NLBLOCK->nl[0])
2094              {
2095              md->hitend = TRUE;
2096              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2097              }
2098            RRETURN(MATCH_NOMATCH);
2099            }
2100          }
2101      else      else
2102        {        {
2103        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
# Line 2047  for (;;) Line 2129  for (;;)
2129      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2130      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2131          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2132          {
2133          if (md->partial != 0 &&
2134              eptr + 1 >= md->end_subject &&
2135              NLBLOCK->nltype == NLTYPE_FIXED &&
2136              NLBLOCK->nllen == 2 &&
2137              *eptr == NLBLOCK->nl[0])
2138            {
2139            md->hitend = TRUE;
2140            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2141            }
2142        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2143          }
2144    
2145      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2146    
# Line 2066  for (;;) Line 2159  for (;;)
2159        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2160        partial matching. */        partial matching. */
2161    
2162  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2163        if (utf8)        if (utf)
2164          {          {
2165          /* Get status of previous character */          /* Get status of previous character */
2166    
2167          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2168            {            {
2169            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2170            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2171            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2172            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2173  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2139  for (;;) Line 2232  for (;;)
2232              }              }
2233            else            else
2234  #endif  #endif
2235            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2236                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2237            }            }
2238    
2239          /* Get status of next character */          /* Get status of next character */
# Line 2162  for (;;) Line 2256  for (;;)
2256            }            }
2257          else          else
2258  #endif  #endif
2259          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2260              && ((md->ctypes[*eptr] & ctype_word) != 0);
2261          }          }
2262    
2263        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 2173  for (;;) Line 2268  for (;;)
2268        }        }
2269      break;      break;
2270    
2271      /* Match a single character type; inline for speed */      /* Match any single character type except newline; have to take care with
2272        CRLF newlines and partial matching. */
2273    
2274      case OP_ANY:      case OP_ANY:
2275      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2276        if (md->partial != 0 &&
2277            eptr + 1 >= md->end_subject &&
2278            NLBLOCK->nltype == NLTYPE_FIXED &&
2279            NLBLOCK->nllen == 2 &&
2280            *eptr == NLBLOCK->nl[0])
2281          {
2282          md->hitend = TRUE;
2283          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2284          }
2285    
2286      /* Fall through */      /* Fall through */
2287    
2288        /* Match any single character whatsoever. */
2289    
2290      case OP_ALLANY:      case OP_ALLANY:
2291      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2292        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
# Line 2186  for (;;) Line 2294  for (;;)
2294        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2295        }        }
2296      eptr++;      eptr++;
2297      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2298        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2299    #endif
2300      ecode++;      ecode++;
2301      break;      break;
2302    
# Line 2211  for (;;) Line 2321  for (;;)
2321        }        }
2322      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2323      if (      if (
2324  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2325         c < 256 &&         c < 256 &&
2326  #endif  #endif
2327         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
# Line 2228  for (;;) Line 2338  for (;;)
2338        }        }
2339      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2340      if (      if (
2341  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2342         c >= 256 ||         c > 255 ||
2343  #endif  #endif
2344         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2345         )         )
# Line 2245  for (;;) Line 2355  for (;;)
2355        }        }
2356      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2357      if (      if (
2358  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2359         c < 256 &&         c < 256 &&
2360  #endif  #endif
2361         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
# Line 2262  for (;;) Line 2372  for (;;)
2372        }        }
2373      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2374      if (      if (
2375  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2376         c >= 256 ||         c > 255 ||
2377  #endif  #endif
2378         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2379         )         )
# Line 2279  for (;;) Line 2389  for (;;)
2389        }        }
2390      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2391      if (      if (
2392  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2393         c < 256 &&         c < 256 &&
2394  #endif  #endif
2395         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
# Line 2296  for (;;) Line 2406  for (;;)
2406        }        }
2407      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2408      if (      if (
2409  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2410         c >= 256 ||         c > 255 ||
2411  #endif  #endif
2412         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2413         )         )
# Line 2316  for (;;) Line 2426  for (;;)
2426        {        {
2427        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2428    
2429        case 0x000d:        case CHAR_CR:
2430        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr >= md->end_subject)
2431            {
2432            SCHECK_PARTIAL();
2433            }
2434          else if (*eptr == CHAR_LF) eptr++;
2435        break;        break;
2436    
2437        case 0x000a:        case CHAR_LF:
2438        break;        break;
2439    
2440        case 0x000b:        case CHAR_VT:
2441        case 0x000c:        case CHAR_FF:
2442        case 0x0085:        case CHAR_NEL:
2443    #ifndef EBCDIC
2444        case 0x2028:        case 0x2028:
2445        case 0x2029:        case 0x2029:
2446    #endif  /* Not EBCDIC */
2447        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2448        break;        break;
2449        }        }
# Line 2343  for (;;) Line 2459  for (;;)
2459      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2460      switch(c)      switch(c)
2461        {        {
2462          HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2463        default: break;        default: break;
       case 0x09:      /* HT */  
       case 0x20:      /* SPACE */  
       case 0xa0:      /* NBSP */  
       case 0x1680:    /* OGHAM SPACE MARK */  
       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
       case 0x2000:    /* EN QUAD */  
       case 0x2001:    /* EM QUAD */  
       case 0x2002:    /* EN SPACE */  
       case 0x2003:    /* EM SPACE */  
       case 0x2004:    /* THREE-PER-EM SPACE */  
       case 0x2005:    /* FOUR-PER-EM SPACE */  
       case 0x2006:    /* SIX-PER-EM SPACE */  
       case 0x2007:    /* FIGURE SPACE */  
       case 0x2008:    /* PUNCTUATION SPACE */  
       case 0x2009:    /* THIN SPACE */  
       case 0x200A:    /* HAIR SPACE */  
       case 0x202f:    /* NARROW NO-BREAK SPACE */  
       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
       case 0x3000:    /* IDEOGRAPHIC SPACE */  
       RRETURN(MATCH_NOMATCH);  
2464        }        }
2465      ecode++;      ecode++;
2466      break;      break;
# Line 2377  for (;;) Line 2474  for (;;)
2474      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2475      switch(c)      switch(c)
2476        {        {
2477          HSPACE_CASES: break;  /* Byte and multibyte cases */
2478        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
       case 0x09:      /* HT */  
       case 0x20:      /* SPACE */  
       case 0xa0:      /* NBSP */  
       case 0x1680:    /* OGHAM SPACE MARK */  
       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
       case 0x2000:    /* EN QUAD */  
       case 0x2001:    /* EM QUAD */  
       case 0x2002:    /* EN SPACE */  
       case 0x2003:    /* EM SPACE */  
       case 0x2004:    /* THREE-PER-EM SPACE */  
       case 0x2005:    /* FOUR-PER-EM SPACE */  
       case 0x2006:    /* SIX-PER-EM SPACE */  
       case 0x2007:    /* FIGURE SPACE */  
       case 0x2008:    /* PUNCTUATION SPACE */  
       case 0x2009:    /* THIN SPACE */  
       case 0x200A:    /* HAIR SPACE */  
       case 0x202f:    /* NARROW NO-BREAK SPACE */  
       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
       case 0x3000:    /* IDEOGRAPHIC SPACE */  
       break;  
2479        }        }
2480      ecode++;      ecode++;
2481      break;      break;
# Line 2411  for (;;) Line 2489  for (;;)
2489      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2490      switch(c)      switch(c)
2491        {        {
2492          VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2493        default: break;        default: break;
       case 0x0a:      /* LF */  
       case 0x0b:      /* VT */  
       case 0x0c:      /* FF */  
       case 0x0d:      /* CR */  
       case 0x85:      /* NEL */  
       case 0x2028:    /* LINE SEPARATOR */  
       case 0x2029:    /* PARAGRAPH SEPARATOR */  
       RRETURN(MATCH_NOMATCH);  
2494        }        }
2495      ecode++;      ecode++;
2496      break;      break;
# Line 2433  for (;;) Line 2504  for (;;)
2504      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2505      switch(c)      switch(c)
2506        {        {
2507          VSPACE_CASES: break;
2508        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
       case 0x0a:      /* LF */  
       case 0x0b:      /* VT */  
       case 0x0c:      /* FF */  
       case 0x0d:      /* CR */  
       case 0x85:      /* NEL */  
       case 0x2028:    /* LINE SEPARATOR */  
       case 0x2029:    /* PARAGRAPH SEPARATOR */  
       break;  
2509        }        }
2510      ecode++;      ecode++;
2511      break;      break;
# Line 2459  for (;;) Line 2523  for (;;)
2523        }        }
2524      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2525        {        {
2526          const pcre_uint32 *cp;
2527        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2528    
2529        switch(ecode[1])        switch(ecode[1])
# Line 2475  for (;;) Line 2540  for (;;)
2540          break;          break;
2541    
2542          case PT_GC:          case PT_GC:
2543          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2544            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2545          break;          break;
2546    
# Line 2492  for (;;) Line 2557  for (;;)
2557          /* These are specials */          /* These are specials */
2558    
2559          case PT_ALNUM:          case PT_ALNUM:
2560          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2561               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2562            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2563          break;          break;
2564    
2565          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2566          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2567               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2568                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2569            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2570          break;          break;
2571    
2572          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2573          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2574               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2575               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2576                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
# Line 2513  for (;;) Line 2578  for (;;)
2578          break;          break;
2579    
2580          case PT_WORD:          case PT_WORD:
2581          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2582               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2583               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2584            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2585          break;          break;
2586    
2587            case PT_CLIST:
2588            cp = PRIV(ucd_caseless_sets) + prop->caseset;
2589            for (;;)
2590              {
2591              if (c < *cp)
2592                { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2593              if (c == *cp++)
2594                { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2595              }
2596            break;
2597    
2598          /* This should never occur */          /* This should never occur */
2599    
2600          default:          default:
# Line 2538  for (;;) Line 2614  for (;;)
2614        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2615        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2616        }        }
2617      GETCHARINCTEST(c, eptr);      else
     if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);  
     while (eptr < md->end_subject)  
2618        {        {
2619        int len = 1;        int lgb, rgb;
2620        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        GETCHARINCTEST(c, eptr);
2621        if (UCD_CATEGORY(c) != ucp_M) break;        lgb = UCD_GRAPHBREAK(c);
2622        eptr += len;        while (eptr < md->end_subject)
2623            {
2624            int len = 1;
2625            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2626            rgb = UCD_GRAPHBREAK(c);
2627            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2628            lgb = rgb;
2629            eptr += len;
2630            }
2631        }        }
2632        CHECK_PARTIAL();
2633      ecode++;      ecode++;
2634      break;      break;
2635  #endif  #endif  /* SUPPORT_UCP */
2636    
2637    
2638      /* Match a back reference, possibly repeatedly. Look past the end of the      /* Match a back reference, possibly repeatedly. Look past the end of the
# Line 2564  for (;;) Line 2647  for (;;)
2647      case OP_REFI:      case OP_REFI:
2648      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2649      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2650      ecode += 3;      ecode += 1 + IMM2_SIZE;
2651    
2652      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2653    
# Line 2604  for (;;) Line 2687  for (;;)
2687        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2688        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2689        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2690        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2691        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2692        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2693        break;        break;
2694    
2695        default:               /* No repeat follows */        default:               /* No repeat follows */
2696        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2697          {          {
2698            if (length == -2) eptr = md->end_subject;   /* Partial match */
2699          CHECK_PARTIAL();          CHECK_PARTIAL();
2700          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2701          }          }
# Line 2620  for (;;) Line 2704  for (;;)
2704        }        }
2705    
2706      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2707      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2708        means the reference is unset in non-Java-compatible mode. If the minimum is
2709        zero, we can continue at the same level without recursion. For any other
2710        minimum, carrying on will result in NOMATCH. */
2711    
2712      if (length == 0) continue;      if (length == 0) continue;
2713        if (length < 0 && min == 0) continue;
2714    
2715      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2716      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2633  for (;;) Line 2721  for (;;)
2721        int slength;        int slength;
2722        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2723          {          {
2724            if (slength == -2) eptr = md->end_subject;   /* Partial match */
2725          CHECK_PARTIAL();          CHECK_PARTIAL();
2726          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2727          }          }
# Line 2656  for (;;) Line 2745  for (;;)
2745          if (fi >= max) RRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2746          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2747            {            {
2748              if (slength == -2) eptr = md->end_subject;   /* Partial match */
2749            CHECK_PARTIAL();            CHECK_PARTIAL();
2750            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2751            }            }
# Line 2674  for (;;) Line 2764  for (;;)
2764          int slength;          int slength;
2765          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2766            {            {
2767            CHECK_PARTIAL();            /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2768              the soft partial matching case. */
2769    
2770              if (slength == -2 && md->partial != 0 &&
2771                  md->end_subject > md->start_used_ptr)
2772                {
2773                md->hitend = TRUE;
2774                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2775                }
2776            break;            break;
2777            }            }
2778          eptr += slength;          eptr += slength;
2779          }          }
2780    
2781        while (eptr >= pp)        while (eptr >= pp)
2782          {          {
2783          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
# Line 2703  for (;;) Line 2802  for (;;)
2802      case OP_NCLASS:      case OP_NCLASS:
2803      case OP_CLASS:      case OP_CLASS:
2804        {        {
2805          /* The data variable is saved across frames, so the byte map needs to
2806          be stored there. */
2807    #define BYTE_MAP ((pcre_uint8 *)data)
2808        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2809        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2810    
2811        switch (*ecode)        switch (*ecode)
2812          {          {
# Line 2725  for (;;) Line 2827  for (;;)
2827          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2828          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2829          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2830          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2831          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2832          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2833          break;          break;
2834    
2835          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2737  for (;;) Line 2839  for (;;)
2839    
2840        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2841    
2842  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2843        /* UTF-8 mode */        if (utf)
       if (utf8)  
2844          {          {
2845          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2846            {            {
# Line 2754  for (;;) Line 2855  for (;;)
2855              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2856              }              }
2857            else            else
2858              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
             }  
2859            }            }
2860          }          }
2861        else        else
2862  #endif  #endif
2863        /* Not UTF-8 mode */        /* Not UTF mode */
2864          {          {
2865          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2866            {            {
# Line 2771  for (;;) Line 2870  for (;;)
2870              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2871              }              }
2872            c = *eptr++;            c = *eptr++;
2873            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2874              if (c > 255)
2875                {
2876                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2877                }
2878              else
2879    #endif
2880                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2881            }            }
2882          }          }
2883    
# Line 2785  for (;;) Line 2891  for (;;)
2891    
2892        if (minimize)        if (minimize)
2893          {          {
2894  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2895          /* UTF-8 mode */          if (utf)
         if (utf8)  
2896            {            {
2897            for (fi = min;; fi++)            for (fi = min;; fi++)
2898              {              {
# Line 2805  for (;;) Line 2910  for (;;)
2910                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2911                }                }
2912              else              else
2913                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
               }  
2914              }              }
2915            }            }
2916          else          else
2917  #endif  #endif
2918          /* Not UTF-8 mode */          /* Not UTF mode */
2919            {            {
2920            for (fi = min;; fi++)            for (fi = min;; fi++)
2921              {              {
# Line 2825  for (;;) Line 2928  for (;;)
2928                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2929                }                }
2930              c = *eptr++;              c = *eptr++;
2931              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2932                if (c > 255)
2933                  {
2934                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2935                  }
2936                else
2937    #endif
2938                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2939              }              }
2940            }            }
2941          /* Control never gets here */          /* Control never gets here */
# Line 2837  for (;;) Line 2947  for (;;)
2947          {          {
2948          pp = eptr;          pp = eptr;
2949    
2950  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2951          /* UTF-8 mode */          if (utf)
         if (utf8)  
2952            {            {
2953            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2954              {              {
# Line 2855  for (;;) Line 2964  for (;;)
2964                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2965                }                }
2966              else              else
2967                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2968              eptr += len;              eptr += len;
2969              }              }
2970            for (;;)            for (;;)
# Line 2870  for (;;) Line 2977  for (;;)
2977            }            }
2978          else          else
2979  #endif  #endif
2980            /* Not UTF-8 mode */            /* Not UTF mode */
2981            {            {
2982            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2983              {              {
# Line 2880  for (;;) Line 2987  for (;;)
2987                break;                break;
2988                }                }
2989              c = *eptr;              c = *eptr;
2990              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2991                if (c > 255)
2992                  {
2993                  if (op == OP_CLASS) break;
2994                  }
2995                else
2996    #endif
2997                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2998              eptr++;              eptr++;
2999              }              }
3000            while (eptr >= pp)            while (eptr >= pp)
# Line 2893  for (;;) Line 3007  for (;;)
3007    
3008          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3009          }          }
3010    #undef BYTE_MAP
3011        }        }
3012      /* Control never gets here */      /* Control never gets here */
3013    
# Line 2901  for (;;) Line 3016  for (;;)
3016      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3017      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
3018    
3019  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3020      case OP_XCLASS:      case OP_XCLASS:
3021        {        {
3022        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2926  for (;;) Line 3041  for (;;)
3041          case OP_CRMINRANGE:          case OP_CRMINRANGE:
3042          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
3043          min = GET2(ecode, 1);          min = GET2(ecode, 1);
3044          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
3045          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
3046          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
3047          break;          break;
3048    
3049          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2946  for (;;) Line 3061  for (;;)
3061            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3062            }            }
3063          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3064          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3065          }          }
3066    
3067        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2970  for (;;) Line 3085  for (;;)
3085              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3086              }              }
3087            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3088            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3089            }            }
3090          /* Control never gets here */          /* Control never gets here */
3091          }          }
# Line 2988  for (;;) Line 3103  for (;;)
3103              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3104              break;              break;
3105              }              }
3106    #ifdef SUPPORT_UTF
3107            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3108            if (!_pcre_xclass(c, data)) break;  #else
3109              c = *eptr;
3110    #endif
3111              if (!PRIV(xclass)(c, data, utf)) break;
3112            eptr += len;            eptr += len;
3113            }            }
3114          for(;;)          for(;;)
# Line 2997  for (;;) Line 3116  for (;;)
3116            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3117            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3118            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3119            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3120              if (utf) BACKCHAR(eptr);
3121    #endif
3122            }            }
3123          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3124          }          }
# Line 3009  for (;;) Line 3130  for (;;)
3130      /* Match a single character, casefully */      /* Match a single character, casefully */
3131    
3132      case OP_CHAR:      case OP_CHAR:
3133  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3134      if (utf8)      if (utf)
3135        {        {
3136        length = 1;        length = 1;
3137        ecode++;        ecode++;
# Line 3024  for (;;) Line 3145  for (;;)
3145        }        }
3146      else      else
3147  #endif  #endif
3148        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3149        {        {
3150        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3151          {          {
# Line 3037  for (;;) Line 3157  for (;;)
3157        }        }
3158      break;      break;
3159    
3160      /* Match a single character, caselessly. If we are at the end of the      /* Match a single character, caselessly. If we are at the end of the
3161      subject, give up immediately. */      subject, give up immediately. */
3162    
3163      case OP_CHARI:      case OP_CHARI:
3164      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3165        {        {
3166        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3167        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3168        }        }
3169    
3170  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3171      if (utf8)      if (utf)
3172        {        {
3173        length = 1;        length = 1;
3174        ecode++;        ecode++;
3175        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3176    
3177        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3178        we know that its other case must also be one byte long, so we can use the        we know that its other case must also be one byte long, so we can use the
3179        fast lookup table. We know that there is at least one byte left in the        fast lookup table. We know that there is at least one byte left in the
3180        subject. */        subject. */
3181    
3182        if (fc < 128)        if (fc < 128)
3183          {          {
3184          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3185                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3186            ecode++;
3187            eptr++;
3188          }          }
3189    
3190        /* Otherwise we must pick up the subject character. Note that we cannot        /* Otherwise we must pick up the subject character. Note that we cannot
# Line 3087  for (;;) Line 3210  for (;;)
3210          }          }
3211        }        }
3212      else      else
3213  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3214    
3215      /* Non-UTF-8 mode */      /* Not UTF mode */
3216        {        {
3217        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3218              != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3219          eptr++;
3220        ecode += 2;        ecode += 2;
3221        }        }
3222      break;      break;
# Line 3101  for (;;) Line 3226  for (;;)
3226      case OP_EXACT:      case OP_EXACT:
3227      case OP_EXACTI:      case OP_EXACTI:
3228      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3229      ecode += 3;      ecode += 1 + IMM2_SIZE;
3230      goto REPEATCHAR;      goto REPEATCHAR;
3231    
3232      case OP_POSUPTO:      case OP_POSUPTO:
# Line 3116  for (;;) Line 3241  for (;;)
3241      min = 0;      min = 0;
3242      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3243      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3244      ecode += 3;      ecode += 1 + IMM2_SIZE;
3245      goto REPEATCHAR;      goto REPEATCHAR;
3246    
3247      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3164  for (;;) Line 3289  for (;;)
3289      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3290    
3291      REPEATCHAR:      REPEATCHAR:
3292  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3293      if (utf8)      if (utf)
3294        {        {
3295        length = 1;        length = 1;
3296        charptr = ecode;        charptr = ecode;
# Line 3181  for (;;) Line 3306  for (;;)
3306          unsigned int othercase;          unsigned int othercase;
3307          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3308              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3309            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3310          else oclength = 0;          else oclength = 0;
3311  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3312    
3313          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3314            {            {
3315            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3316              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3317  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3318            else if (oclength > 0 &&            else if (oclength > 0 &&
3319                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3320                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3321  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3322            else            else
3323              {              {
# Line 3211  for (;;) Line 3336  for (;;)
3336              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3337              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3338              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3339                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3340  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3341              else if (oclength > 0 &&              else if (oclength > 0 &&
3342                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3343                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3344  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3345              else              else
3346                {                {
# Line 3232  for (;;) Line 3357  for (;;)
3357            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3358              {              {
3359              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3360                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3361  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3362              else if (oclength > 0 &&              else if (oclength > 0 &&
3363                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3364                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3365  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3366              else              else
3367                {                {
# Line 3268  for (;;) Line 3393  for (;;)
3393        value of fc will always be < 128. */        value of fc will always be < 128. */
3394        }        }
3395      else      else
3396  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3397          /* When not in UTF-8 mode, load a single-byte character. */
3398          fc = *ecode++;
3399    
3400      /* When not in UTF-8 mode, load a single-byte character. */      /* The value of fc at this point is always one character, though we may
3401        or may not be in UTF mode. The code is duplicated for the caseless and
     fc = *ecode++;  
   
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3402      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3403      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3404      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3284  for (;;) Line 3407  for (;;)
3407      maximizing, find the maximum number of characters and work backwards. */      maximizing, find the maximum number of characters and work backwards. */
3408    
3409      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3410        max, eptr));        max, (char *)eptr));
3411    
3412      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3413        {        {
3414        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3415          /* fc must be < 128 if UTF is enabled. */
3416          foc = md->fcc[fc];
3417    #else
3418    #ifdef SUPPORT_UTF
3419    #ifdef SUPPORT_UCP
3420          if (utf && fc > 127)
3421            foc = UCD_OTHERCASE(fc);
3422    #else
3423          if (utf && fc > 127)
3424            foc = fc;
3425    #endif /* SUPPORT_UCP */
3426          else
3427    #endif /* SUPPORT_UTF */
3428            foc = TABLE_GET(fc, md->fcc, fc);
3429    #endif /* COMPILE_PCRE8 */
3430    
3431        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3432          {          {
3433          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
# Line 3296  for (;;) Line 3435  for (;;)
3435            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3436            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3437            }            }
3438          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3439            eptr++;
3440          }          }
3441        if (min == max) continue;        if (min == max) continue;
3442        if (minimize)        if (minimize)
# Line 3311  for (;;) Line 3451  for (;;)
3451              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3452              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3453              }              }
3454            if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3455              eptr++;
3456            }            }
3457          /* Control never gets here */          /* Control never gets here */
3458          }          }
# Line 3325  for (;;) Line 3466  for (;;)
3466              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3467              break;              break;
3468              }              }
3469            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3470            eptr++;            eptr++;
3471            }            }
3472    
# Line 3410  for (;;) Line 3551  for (;;)
3551        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3552        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3553        }        }
3554      ecode++;  #ifdef SUPPORT_UTF
3555      GETCHARINCTEST(c, eptr);      if (utf)
     if (op == OP_NOTI)         /* The caseless case */  
3556        {        {
3557  #ifdef SUPPORT_UTF8        register unsigned int ch, och;
3558        if (c < 256)  
3559  #endif        ecode++;
3560        c = md->lcc[c];        GETCHARINC(ch, ecode);
3561        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        GETCHARINC(c, eptr);
3562    
3563          if (op == OP_NOT)
3564            {
3565            if (ch == c) RRETURN(MATCH_NOMATCH);
3566            }
3567          else
3568            {
3569    #ifdef SUPPORT_UCP
3570            if (ch > 127)
3571              och = UCD_OTHERCASE(ch);
3572    #else
3573            if (ch > 127)
3574              och = ch;
3575    #endif /* SUPPORT_UCP */
3576            else
3577              och = TABLE_GET(ch, md->fcc, ch);
3578            if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3579            }
3580        }        }
3581      else    /* Caseful */      else
3582    #endif
3583        {        {
3584        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        register unsigned int ch = ecode[1];
3585          c = *eptr++;
3586          if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3587            RRETURN(MATCH_NOMATCH);
3588          ecode += 2;
3589        }        }
3590      break;      break;
3591    
# Line 3436  for (;;) Line 3599  for (;;)
3599      case OP_NOTEXACT:      case OP_NOTEXACT:
3600      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3601      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3602      ecode += 3;      ecode += 1 + IMM2_SIZE;
3603      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3604    
3605      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3446  for (;;) Line 3609  for (;;)
3609      min = 0;      min = 0;
3610      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3611      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3612      ecode += 3;      ecode += 1 + IMM2_SIZE;
3613      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3614    
3615      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3478  for (;;) Line 3641  for (;;)
3641      possessive = TRUE;      possessive = TRUE;
3642      min = 0;      min = 0;
3643      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3644      ecode += 3;      ecode += 1 + IMM2_SIZE;
3645      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3646    
3647      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3502  for (;;) Line 3665  for (;;)
3665      /* Common code for all repeated single-byte matches. */      /* Common code for all repeated single-byte matches. */
3666    
3667      REPEATNOTCHAR:      REPEATNOTCHAR:
3668      fc = *ecode++;      GETCHARINCTEST(fc, ecode);
3669    
3670      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
3671      since matching characters is likely to be quite common. First, ensure the      since matching characters is likely to be quite common. First, ensure the
# Line 3513  for (;;) Line 3676  for (;;)
3676      characters and work backwards. */      characters and work backwards. */
3677    
3678      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3679        max, eptr));        max, (char *)eptr));
3680    
3681      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3682        {        {
3683        fc = md->lcc[fc];  #ifdef SUPPORT_UTF
3684    #ifdef SUPPORT_UCP
3685          if (utf && fc > 127)
3686            foc = UCD_OTHERCASE(fc);
3687    #else
3688          if (utf && fc > 127)
3689            foc = fc;
3690    #endif /* SUPPORT_UCP */
3691          else
3692    #endif /* SUPPORT_UTF */
3693            foc = TABLE_GET(fc, md->fcc, fc);
3694    
3695  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3696        /* UTF-8 mode */        if (utf)
       if (utf8)  
3697          {          {
3698          register unsigned int d;          register unsigned int d;
3699          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3532  for (;;) Line 3704  for (;;)
3704              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3705              }              }
3706            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3707            if (d < 256) d = md->lcc[d];            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) RRETURN(MATCH_NOMATCH);  
3708            }            }
3709          }          }
3710        else        else
3711  #endif  #endif
3712          /* Not UTF mode */
       /* Not UTF-8 mode */  
3713          {          {
3714          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3715            {            {
# Line 3548  for (;;) Line 3718  for (;;)
3718              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3719              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3720              }              }
3721            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3722              eptr++;
3723            }            }
3724          }          }
3725    
# Line 3556  for (;;) Line 3727  for (;;)
3727    
3728        if (minimize)        if (minimize)
3729          {          {
3730  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3731          /* UTF-8 mode */          if (utf)
         if (utf8)  
3732            {            {
3733            register unsigned int d;            register unsigned int d;
3734            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3572  for (;;) Line 3742  for (;;)
3742                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3743                }                }
3744              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3745              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) RRETURN(MATCH_NOMATCH);  
3746              }              }
3747            }            }
3748          else          else
3749  #endif  #endif
3750          /* Not UTF-8 mode */          /* Not UTF mode */
3751            {            {
3752            for (fi = min;; fi++)            for (fi = min;; fi++)
3753              {              {
# Line 3590  for (;;) Line 3759  for (;;)
3759                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3760                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3761                }                }
3762              if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3763                eptr++;
3764              }              }
3765            }            }
3766          /* Control never gets here */          /* Control never gets here */
# Line 3602  for (;;) Line 3772  for (;;)
3772          {          {
3773          pp = eptr;          pp = eptr;
3774    
3775  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3776          /* UTF-8 mode */          if (utf)
         if (utf8)  
3777            {            {
3778            register unsigned int d;            register unsigned int d;
3779            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3616  for (;;) Line 3785  for (;;)
3785                break;                break;
3786                }                }
3787              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3788              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) break;
             if (fc == d) break;  
3789              eptr += len;              eptr += len;
3790              }              }
3791          if (possessive) continue;            if (possessive) continue;
3792          for(;;)            for(;;)
3793              {              {
3794              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3795              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3631  for (;;) Line 3799  for (;;)
3799            }            }
3800          else          else
3801  #endif  #endif
3802          /* Not UTF-8 mode */          /* Not UTF mode */
3803            {            {
3804            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3805              {              {
# Line 3640  for (;;) Line 3808  for (;;)
3808                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3809                break;                break;
3810                }                }
3811              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3812              eptr++;              eptr++;
3813              }              }
3814            if (possessive) continue;            if (possessive) continue;
# Line 3661  for (;;) Line 3829  for (;;)
3829    
3830      else      else
3831        {        {
3832  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3833        /* UTF-8 mode */        if (utf)
       if (utf8)  
3834          {          {
3835          register unsigned int d;          register unsigned int d;
3836          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3679  for (;;) Line 3846  for (;;)
3846          }          }
3847        else        else
3848  #endif  #endif
3849        /* Not UTF-8 mode */        /* Not UTF mode */
3850          {          {
3851          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3852            {            {
# Line 3696  for (;;) Line 3863  for (;;)
3863    
3864        if (minimize)        if (minimize)
3865          {          {
3866  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3867          /* UTF-8 mode */          if (utf)
         if (utf8)  
3868            {            {
3869            register unsigned int d;            register unsigned int d;
3870            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3717  for (;;) Line 3883  for (;;)
3883            }            }
3884          else          else
3885  #endif  #endif
3886          /* Not UTF-8 mode */          /* Not UTF mode */
3887            {            {
3888            for (fi = min;; fi++)            for (fi = min;; fi++)
3889              {              {
# Line 3741  for (;;) Line 3907  for (;;)
3907          {          {
3908          pp = eptr;          pp = eptr;
3909    
3910  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3911          /* UTF-8 mode */          if (utf)
         if (utf8)  
3912            {            {
3913            register unsigned int d;            register unsigned int d;
3914            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3769  for (;;) Line 3934  for (;;)
3934            }            }
3935          else          else
3936  #endif  #endif
3937          /* Not UTF-8 mode */          /* Not UTF mode */
3938            {            {
3939            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3940              {              {
# Line 3802  for (;;) Line 3967  for (;;)
3967      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3968      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3969      minimize = TRUE;      minimize = TRUE;
3970      ecode += 3;      ecode += 1 + IMM2_SIZE;
3971      goto REPEATTYPE;      goto REPEATTYPE;
3972    
3973      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3810  for (;;) Line 3975  for (;;)
3975      min = 0;      min = 0;
3976      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3977      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3978      ecode += 3;      ecode += 1 + IMM2_SIZE;
3979      goto REPEATTYPE;      goto REPEATTYPE;
3980    
3981      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3838  for (;;) Line 4003  for (;;)
4003      possessive = TRUE;      possessive = TRUE;
4004      min = 0;      min = 0;
4005      max = GET2(ecode, 1);      max = GET2(ecode, 1);
4006      ecode += 3;      ecode += 1 + IMM2_SIZE;
4007      goto REPEATTYPE;      goto REPEATTYPE;
4008    
4009      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 4020  for (;;) Line 4185  for (;;)
4185                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4186              }              }
4187            break;            break;
4188    
4189              case PT_CLIST:
4190              for (i = 1; i <= min; i++)
4191                {
4192                const pcre_uint32 *cp;
4193                if (eptr >= md->end_subject)
4194                  {
4195                  SCHECK_PARTIAL();
4196                  RRETURN(MATCH_NOMATCH);
4197                  }
4198                GETCHARINCTEST(c, eptr);
4199                cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);
4200                for (;;)
4201                  {
4202                  if (c < *cp)
4203                    { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4204                  if (c == *cp++)
4205                    { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4206                  }
4207                }
4208              break;
4209    
4210            /* This should not occur */            /* This should not occur */
4211    
4212            default:            default:
# Line 4040  for (;;) Line 4226  for (;;)
4226              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4227              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4228              }              }
4229            GETCHARINCTEST(c, eptr);            else
           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);  
           while (eptr < md->end_subject)  
4230              {              {
4231              int len = 1;              int lgb, rgb;
4232              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              GETCHARINCTEST(c, eptr);
4233              if (UCD_CATEGORY(c) != ucp_M) break;              lgb = UCD_GRAPHBREAK(c);
4234              eptr += len;             while (eptr < md->end_subject)
4235                  {
4236                  int len = 1;
4237                  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4238                  rgb = UCD_GRAPHBREAK(c);
4239                  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4240                  lgb = rgb;
4241                  eptr += len;
4242                  }
4243              }              }
4244              CHECK_PARTIAL();
4245            }            }
4246          }          }
4247    
# Line 4057  for (;;) Line 4250  for (;;)
4250    
4251  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4252    
4253  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4254        if (utf8) switch(ctype)        if (utf) switch(ctype)
4255          {          {
4256          case OP_ANY:          case OP_ANY:
4257          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 4069  for (;;) Line 4262  for (;;)
4262              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4263              }              }
4264            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4265              if (md->partial != 0 &&
4266                  eptr + 1 >= md->end_subject &&
4267                  NLBLOCK->nltype == NLTYPE_FIXED &&
4268                  NLBLOCK->nllen == 2 &&
4269                  *eptr == NLBLOCK->nl[0])
4270                {
4271                md->hitend = TRUE;
4272                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4273                }
4274            eptr++;            eptr++;
4275            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4276            }            }
4277          break;          break;
4278    
# Line 4083  for (;;) Line 4285  for (;;)
4285              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4286              }              }
4287            eptr++;            eptr++;
4288            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4289            }            }
4290          break;          break;
4291    
# Line 4105  for (;;) Line 4307  for (;;)
4307              {              {
4308              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4309    
4310              case 0x000d:              case CHAR_CR:
4311              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4312              break;              break;
4313    
4314              case 0x000a:              case CHAR_LF:
4315              break;              break;
4316    
4317              case 0x000b:              case CHAR_VT:
4318              case 0x000c:              case CHAR_FF:
4319              case 0x0085:              case CHAR_NEL:
4320    #ifndef EBCDIC
4321              case 0x2028:              case 0x2028:
4322              case 0x2029:              case 0x2029:
4323    #endif  /* Not EBCDIC */
4324              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4325              break;              break;
4326              }              }
# Line 4134  for (;;) Line 4338  for (;;)
4338            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4339            switch(c)            switch(c)
4340              {              {
4341                HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4342              default: break;              default: break;
             case 0x09:      /* HT */  
             case 0x20:      /* SPACE */  
             case 0xa0:      /* NBSP */  
             case 0x1680:    /* OGHAM SPACE MARK */  
             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
             case 0x2000:    /* EN QUAD */  
             case 0x2001:    /* EM QUAD */  
             case 0x2002:    /* EN SPACE */  
             case 0x2003:    /* EM SPACE */  
             case 0x2004:    /* THREE-PER-EM SPACE */  
             case 0x2005:    /* FOUR-PER-EM SPACE */  
             case 0x2006:    /* SIX-PER-EM SPACE */  
             case 0x2007:    /* FIGURE SPACE */  
             case 0x2008:    /* PUNCTUATION SPACE */  
             case 0x2009:    /* THIN SPACE */  
             case 0x200A:    /* HAIR SPACE */  
             case 0x202f:    /* NARROW NO-BREAK SPACE */  
             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
             case 0x3000:    /* IDEOGRAPHIC SPACE */  
             RRETURN(MATCH_NOMATCH);  
4343              }              }
4344            }            }
4345          break;          break;
# Line 4170  for (;;) Line 4355  for (;;)
4355            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4356            switch(c)            switch(c)
4357              {              {
4358                HSPACE_CASES: break;  /* Byte and multibyte cases */
4359              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
             case 0x09:      /* HT */  
             case 0x20:      /* SPACE */  
             case 0xa0:      /* NBSP */  
             case 0x1680:    /* OGHAM SPACE MARK */  
             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
             case 0x2000:    /* EN QUAD */  
             case 0x2001:    /* EM QUAD */  
             case 0x2002:    /* EN SPACE */  
             case 0x2003:    /* EM SPACE */  
             case 0x2004:    /* THREE-PER-EM SPACE */  
             case 0x2005:    /* FOUR-PER-EM SPACE */  
             case 0x2006:    /* SIX-PER-EM SPACE */  
             case 0x2007:    /* FIGURE SPACE */  
             case 0x2008:    /* PUNCTUATION SPACE */  
             case 0x2009:    /* THIN SPACE */  
             case 0x200A:    /* HAIR SPACE */  
             case 0x202f:    /* NARROW NO-BREAK SPACE */  
             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
             case 0x3000:    /* IDEOGRAPHIC SPACE */  
             break;  
4360              }              }
4361            }            }
4362          break;          break;
# Line 4206  for (;;) Line 4372  for (;;)
4372            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4373            switch(c)            switch(c)
4374              {              {
4375                VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4376              default: break;              default: break;
             case 0x0a:      /* LF */  
             case 0x0b:      /* VT */  
             case 0x0c:      /* FF */  
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
             case 0x2028:    /* LINE SEPARATOR */  
             case 0x2029:    /* PARAGRAPH SEPARATOR */  
             RRETURN(MATCH_NOMATCH);  
4377              }              }
4378            }            }
4379          break;          break;
# Line 4230  for (;;) Line 4389  for (;;)
4389            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4390            switch(c)            switch(c)
4391              {              {
4392                VSPACE_CASES: break;
4393              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
             case 0x0a:      /* LF */  
             case 0x0b:      /* VT */  
             case 0x0c:      /* FF */  
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
             case 0x2028:    /* LINE SEPARATOR */  
             case 0x2029:    /* PARAGRAPH SEPARATOR */  
             break;  
4394              }              }
4395            }            }
4396          break;          break;
# Line 4265  for (;;) Line 4417  for (;;)
4417              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4418              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4419              }              }
4420            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4421              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4422              eptr++;
4423            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4424            }            }
4425          break;          break;
# Line 4281  for (;;) Line 4434  for (;;)
4434              }              }
4435            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4436              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4437            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4438              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4439            }            }
4440          break;          break;
4441    
# Line 4293  for (;;) Line 4447  for (;;)
4447              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4448              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4449              }              }
4450            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4451              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4452              eptr++;
4453            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4454            }            }
4455          break;          break;
# Line 4309  for (;;) Line 4464  for (;;)
4464              }              }
4465            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4466              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4467            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4468              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4469            }            }
4470          break;          break;
4471    
# Line 4321  for (;;) Line 4477  for (;;)
4477              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4478              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4479              }              }
4480            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4481              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4482              eptr++;
4483            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4484            }            }
4485          break;          break;
# Line 4332  for (;;) Line 4489  for (;;)
4489          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4490    
4491        else        else
4492  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4493    
4494        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4495        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4348  for (;;) Line 4505  for (;;)
4505              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4506              }              }
4507            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4508              if (md->partial != 0 &&
4509                  eptr + 1 >= md->end_subject &&
4510                  NLBLOCK->nltype == NLTYPE_FIXED &&
4511                  NLBLOCK->nllen == 2 &&
4512                  *eptr == NLBLOCK->nl[0])
4513                {
4514                md->hitend = TRUE;
4515                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4516                }
4517            eptr++;            eptr++;
4518            }            }
4519          break;          break;
# Line 4382  for (;;) Line 4548  for (;;)
4548              {              {
4549              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4550    
4551              case 0x000d:              case CHAR_CR:
4552              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4553              break;              break;
4554    
4555              case 0x000a:              case CHAR_LF:
4556              break;              break;
4557    
4558              case 0x000b:              case CHAR_VT:
4559              case 0x000c:              case CHAR_FF:
4560              case 0x0085:              case CHAR_NEL:
4561    #ifdef COMPILE_PCRE16
4562                case 0x2028:
4563                case 0x2029:
4564    #endif
4565              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4566              break;              break;
4567              }              }
# Line 4409  for (;;) Line 4579  for (;;)
4579            switch(*eptr++)            switch(*eptr++)
4580              {              {
4581              default: break;              default: break;
4582              case 0x09:      /* HT */              HSPACE_BYTE_CASES:
4583              case 0x20:      /* SPACE */  #ifdef COMPILE_PCRE16
4584              case 0xa0:      /* NBSP */              HSPACE_MULTIBYTE_CASES:
4585    #endif
4586              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4587              }              }
4588            }            }
# Line 4428  for (;;) Line 4599  for (;;)
4599            switch(*eptr++)            switch(*eptr++)
4600              {              {
4601              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4602              case 0x09:      /* HT */              HSPACE_BYTE_CASES:
4603              case 0x20:      /* SPACE */  #ifdef COMPILE_PCRE16
4604              case 0xa0:      /* NBSP */              HSPACE_MULTIBYTE_CASES:
4605    #endif
4606              break;              break;
4607              }              }
4608            }            }
# Line 4446  for (;;) Line 4618  for (;;)
4618              }              }
4619            switch(*eptr++)            switch(*eptr++)
4620              {              {
4621              default: break;              VSPACE_BYTE_CASES:
4622              case 0x0a:      /* LF */  #ifdef COMPILE_PCRE16
4623              case 0x0b:      /* VT */              VSPACE_MULTIBYTE_CASES:
4624              case 0x0c:      /* FF */  #endif
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
4625              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4626                default: break;
4627              }              }
4628            }            }
4629          break;          break;
# Line 4468  for (;;) Line 4639  for (;;)
4639            switch(*eptr++)            switch(*eptr++)
4640              {              {
4641              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4642              case 0x0a:      /* LF */              VSPACE_BYTE_CASES:
4643              case 0x0b:      /* VT */  #ifdef COMPILE_PCRE16
4644              case 0x0c:      /* FF */              VSPACE_MULTIBYTE_CASES:
4645              case 0x0d:      /* CR */  #endif
             case 0x85:      /* NEL */  
4646              break;              break;
4647              }              }
4648            }            }
# Line 4486  for (;;) Line 4656  for (;;)
4656              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4657              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4658              }              }
4659            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4660                RRETURN(MATCH_NOMATCH);
4661              eptr++;
4662            }            }
4663          break;          break;
4664    
# Line 4498  for (;;) Line 4670  for (;;)
4670              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4671              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4672              }              }
4673            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4674                RRETURN(MATCH_NOMATCH);
4675              eptr++;
4676            }            }
4677          break;          break;
4678    
# Line 4510  for (;;) Line 4684  for (;;)
4684              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4685              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4686              }              }
4687            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4688                RRETURN(MATCH_NOMATCH);
4689              eptr++;
4690            }            }
4691          break;          break;
4692    
# Line 4522  for (;;) Line 4698  for (;;)
4698              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4699              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4700              }              }
4701            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4702                RRETURN(MATCH_NOMATCH);
4703              eptr++;
4704            }            }
4705          break;          break;
4706    
# Line 4534  for (;;) Line 4712  for (;;)
4712              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4713              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4714              }              }
4715            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4716              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4717              eptr++;
4718            }            }
4719          break;          break;
4720    
# Line 4547  for (;;) Line 4726  for (;;)
4726              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4727              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4728              }              }
4729            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4730              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4731              eptr++;
4732            }            }
4733          break;          break;
4734    
# Line 4739  for (;;) Line 4919  for (;;)
4919              }              }
4920            /* Control never gets here */            /* Control never gets here */
4921    
4922            /* This should never occur */            case PT_CLIST:
4923              for (fi = min;; fi++)
4924                {
4925                const pcre_uint32 *cp;
4926                RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4927                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4928                if (fi >= max) RRETURN(MATCH_NOMATCH);
4929                if (eptr >= md->end_subject)
4930                  {
4931                  SCHECK_PARTIAL();
4932                  RRETURN(MATCH_NOMATCH);
4933                  }
4934                GETCHARINCTEST(c, eptr);
4935                cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);
4936                for (;;)
4937                  {
4938                  if (c < *cp)
4939                    { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4940                  if (c == *cp++)
4941                    { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4942                  }
4943                }
4944              /* Control never gets here */
4945    
4946              /* This should never occur */
4947            default:            default:
4948            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4949            }            }
# Line 4761  for (;;) Line 4964  for (;;)
4964              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4965              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4966              }              }
4967            GETCHARINCTEST(c, eptr);            else
           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);  
           while (eptr < md->end_subject)  
4968              {              {
4969              int len = 1;              int lgb, rgb;
4970              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              GETCHARINCTEST(c, eptr);
4971              if (UCD_CATEGORY(c) != ucp_M) break;              lgb = UCD_GRAPHBREAK(c);
4972              eptr += len;              while (eptr < md->end_subject)
4973                  {
4974                  int len = 1;
4975                  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4976                  rgb = UCD_GRAPHBREAK(c);
4977                  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4978                  lgb = rgb;
4979                  eptr += len;
4980                  }
4981              }              }
4982              CHECK_PARTIAL();
4983            }            }
4984          }          }
4985        else        else
4986  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
4987    
4988  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4989        /* UTF-8 mode */        if (utf)
       if (utf8)  
4990          {          {
4991          for (fi = min;; fi++)          for (fi = min;; fi++)
4992            {            {
# Line 4794  for (;;) Line 5003  for (;;)
5003            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
5004            switch(ctype)            switch(ctype)
5005              {              {
5006              case OP_ANY:        /* This is the non-NL case */              case OP_ANY:               /* This is the non-NL case */
5007                if (md->partial != 0 &&    /* Take care with CRLF partial */
5008                    eptr >= md->end_subject &&
5009                    NLBLOCK->nltype == NLTYPE_FIXED &&
5010                    NLBLOCK->nllen == 2 &&
5011                    c == NLBLOCK->nl[0])
5012                  {
5013                  md->hitend = TRUE;
5014                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5015                  }
5016                break;
5017    
5018              case OP_ALLANY:              case OP_ALLANY:
5019              case OP_ANYBYTE:              case OP_ANYBYTE:
5020              break;              break;
# Line 4803  for (;;) Line 5023  for (;;)
5023              switch(c)              switch(c)
5024                {                {
5025                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
5026                case 0x000d:                case CHAR_CR:
5027                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5028                break;                break;
5029                case 0x000a:  
5030                  case CHAR_LF:
5031                break;                break;
5032    
5033                case 0x000b:                case CHAR_VT:
5034                case 0x000c:                case CHAR_FF:
5035                case 0x0085:                case CHAR_NEL:
5036    #ifndef EBCDIC
5037                case 0x2028:                case 0x2028:
5038                case 0x2029:                case 0x2029:
5039    #endif  /* Not EBCDIC */
5040                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5041                break;                break;
5042                }                }
# Line 4822  for (;;) Line 5045  for (;;)
5045              case OP_NOT_HSPACE:              case OP_NOT_HSPACE:
5046              switch(c)              switch(c)
5047                {                {
5048                  HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5049                default: break;                default: break;
               case 0x09:      /* HT */  
               case 0x20:      /* SPACE */  
               case 0xa0:      /* NBSP */  
               case 0x1680:    /* OGHAM SPACE MARK */  
               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
               case 0x2000:    /* EN QUAD */  
               case 0x2001:    /* EM QUAD */  
               case 0x2002:    /* EN SPACE */  
               case 0x2003:    /* EM SPACE */  
               case 0x2004:    /* THREE-PER-EM SPACE */  
               case 0x2005:    /* FOUR-PER-EM SPACE */  
               case 0x2006:    /* SIX-PER-EM SPACE */  
               case 0x2007:    /* FIGURE SPACE */  
               case 0x2008:    /* PUNCTUATION SPACE */  
               case 0x2009:    /* THIN SPACE */  
               case 0x200A:    /* HAIR SPACE */  
               case 0x202f:    /* NARROW NO-BREAK SPACE */  
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
               case 0x3000:    /* IDEOGRAPHIC SPACE */  
               RRETURN(MATCH_NOMATCH);  
5050                }                }
5051              break;              break;
5052    
5053              case OP_HSPACE:              case OP_HSPACE:
5054              switch(c)              switch(c)
5055                {                {
5056                  HSPACE_CASES: break;
5057                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
               case 0x09:      /* HT */  
               case 0x20:      /* SPACE */  
               case 0xa0:      /* NBSP */  
               case 0x1680:    /* OGHAM SPACE MARK */  
               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
               case 0x2000:    /* EN QUAD */  
               case 0x2001:    /* EM QUAD */  
               case 0x2002:    /* EN SPACE */  
               case 0x2003:    /* EM SPACE */  
               case 0x2004:    /* THREE-PER-EM SPACE */  
               case 0x2005:    /* FOUR-PER-EM SPACE */  
               case 0x2006:    /* SIX-PER-EM SPACE */  
               case 0x2007:    /* FIGURE SPACE */  
               case 0x2008:    /* PUNCTUATION SPACE */  
               case 0x2009:    /* THIN SPACE */  
               case 0x200A:    /* HAIR SPACE */  
               case 0x202f:    /* NARROW NO-BREAK SPACE */  
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
               case 0x3000:    /* IDEOGRAPHIC SPACE */  
               break;  
5058                }                }
5059              break;              break;
5060    
5061              case OP_NOT_VSPACE:              case OP_NOT_VSPACE:
5062              switch(c)              switch(c)
5063                {                {
5064                  VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5065                default: break;                default: break;
               case 0x0a:      /* LF */  
               case 0x0b:      /* VT */  
               case 0x0c:      /* FF */  
               case 0x0d:      /* CR */  
               case 0x85:      /* NEL */  
               case 0x2028:    /* LINE SEPARATOR */  
               case 0x2029:    /* PARAGRAPH SEPARATOR */  
               RRETURN(MATCH_NOMATCH);  
5066                }                }
5067              break;              break;
5068    
5069              case OP_VSPACE:              case OP_VSPACE:
5070              switch(c)              switch(c)
5071                {                {
5072                  VSPACE_CASES: break;
5073                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
               case 0x0a:      /* LF */  
               case 0x0b:      /* VT */  
               case 0x0c:      /* FF */  
               case 0x0d:      /* CR */  
               case 0x85:      /* NEL */  
               case 0x2028:    /* LINE SEPARATOR */  
               case 0x2029:    /* PARAGRAPH SEPARATOR */  
               break;  
5074                }                }
5075              break;              break;
5076    
# Line 4919  for (;;) Line 5090  for (;;)
5090              break;              break;
5091    
5092              case OP_WHITESPACE:              case OP_WHITESPACE:
5093              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5094                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5095              break;              break;
5096    
# Line 4940  for (;;) Line 5111  for (;;)
5111          }          }
5112        else        else
5113  #endif  #endif
5114        /* Not UTF-8 mode */        /* Not UTF mode */
5115          {          {
5116          for (fi = min;; fi++)          for (fi = min;; fi++)
5117            {            {
# Line 4957  for (;;) Line 5128  for (;;)
5128            c = *eptr++;            c = *eptr++;
5129            switch(ctype)            switch(ctype)
5130              {              {
5131              case OP_ANY:     /* This is the non-NL case */              case OP_ANY:               /* This is the non-NL case */
5132                if (md->partial != 0 &&    /* Take care with CRLF partial */
5133                    eptr >= md->end_subject &&
5134                    NLBLOCK->nltype == NLTYPE_FIXED &&
5135                    NLBLOCK->nllen == 2 &&
5136                    c == NLBLOCK->nl[0])
5137                  {
5138                  md->hitend = TRUE;
5139                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5140                  }
5141                break;
5142    
5143              case OP_ALLANY:              case OP_ALLANY:
5144              case OP_ANYBYTE:              case OP_ANYBYTE:
5145              break;              break;
# Line 4966  for (;;) Line 5148  for (;;)
5148              switch(c)              switch(c)
5149                {                {
5150                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
5151                case 0x000d:                case CHAR_CR:
5152                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5153                break;                break;
5154    
5155                case 0x000a:                case CHAR_LF:
5156                break;                break;
5157    
5158                case 0x000b:                case CHAR_VT:
5159                case 0x000c:                case CHAR_FF:
5160                case 0x0085:                case CHAR_NEL:
5161    #ifdef COMPILE_PCRE16
5162                  case 0x2028:
5163                  case 0x2029:
5164    #endif
5165                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5166                break;                break;
5167                }                }
# Line 4985  for (;;) Line 5171  for (;;)
5171              switch(c)              switch(c)
5172                {                {
5173                default: break;                default: break;
5174                case 0x09:      /* HT */                HSPACE_BYTE_CASES:
5175                case 0x20:      /* SPACE */  #ifdef COMPILE_PCRE16
5176                case 0xa0:      /* NBSP */                HSPACE_MULTIBYTE_CASES:
5177    #endif
5178                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5179                }                }
5180              break;              break;
# Line 4996  for (;;) Line 5183  for (;;)
5183              switch(c)              switch(c)
5184                {                {
5185                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
5186                case 0x09:      /* HT */                HSPACE_BYTE_CASES:
5187                case 0x20:      /* SPACE */  #ifdef COMPILE_PCRE16
5188                case 0xa0:      /* NBSP */                HSPACE_MULTIBYTE_CASES:
5189    #endif
5190                break;                break;
5191                }                }
5192              break;              break;
# Line 5007  for (;;) Line 5195  for (;;)
5195              switch(c)              switch(c)
5196                {                {
5197                default: break;                default: break;
5198                case 0x0a:      /* LF */                VSPACE_BYTE_CASES:
5199                case 0x0b:      /* VT */  #ifdef COMPILE_PCRE16
5200                case 0x0c:      /* FF */                VSPACE_MULTIBYTE_CASES:
5201                case 0x0d:      /* CR */  #endif
               case 0x85:      /* NEL */  
5202                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5203                }                }
5204              break;              break;
# Line 5020  for (;;) Line 5207  for (;;)
5207              switch(c)              switch(c)
5208                {                {
5209                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
5210                case 0x0a:      /* LF */                VSPACE_BYTE_CASES:
5211                case 0x0b:      /* VT */  #ifdef COMPILE_PCRE16
5212                case 0x0c:      /* FF */                VSPACE_MULTIBYTE_CASES:
5213                case 0x0d:      /* CR */  #endif
               case 0x85:      /* NEL */  
5214                break;                break;
5215                }                }
5216              break;              break;
5217    
5218              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
5219              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5220              break;              break;
5221    
5222              case OP_DIGIT:              case OP_DIGIT:
5223              if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5224              break;              break;
5225    
5226              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
5227              if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5228              break;              break;
5229    
5230              case OP_WHITESPACE:              case OP_WHITESPACE:
5231              if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5232              break;              break;
5233    
5234              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
5235              if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5236              break;              break;
5237    
5238              case OP_WORDCHAR:              case OP_WORDCHAR:
5239              if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5240              break;              break;
5241    
5242              default:              default:
# Line 5226  for (;;) Line 5412  for (;;)
5412              eptr+= len;              eptr+= len;
5413              }              }
5414            break;            break;
5415    
5416              case PT_CLIST:
5417              for (i = min; i < max; i++)
5418                {
5419                const pcre_uint32 *cp;
5420                int len = 1;
5421                if (eptr >= md->end_subject)
5422                  {
5423                  SCHECK_PARTIAL();
5424                  break;
5425                  }
5426                GETCHARLENTEST(c, eptr, len);
5427                cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);
5428                for (;;)
5429                  {
5430                  if (c < *cp)
5431                    { if (prop_fail_result) break; else goto GOT_MAX; }
5432                  if (c == *cp++)
5433                    { if (prop_fail_result) goto GOT_MAX; else break; }
5434                  }
5435                eptr += len;
5436                }
5437              GOT_MAX:
5438              break;
5439    
5440            default:            default:
5441            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
# Line 5239  for (;;) Line 5449  for (;;)
5449            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5450            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5451            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5452            if (utf8) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5453            }            }
5454          }          }
5455    
# Line 5250  for (;;) Line 5460  for (;;)
5460          {          {
5461          for (i = min; i < max; i++)          for (i = min; i < max; i++)
5462            {            {
           int len = 1;  
5463            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
5464              {              {
5465              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5466              break;              break;
5467              }              }
5468            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }            else
           if (UCD_CATEGORY(c) == ucp_M) break;  
           eptr += len;  
           while (eptr < md->end_subject)  
5469              {              {
5470              len = 1;              int lgb, rgb;
5471              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              GETCHARINCTEST(c, eptr);
5472              if (UCD_CATEGORY(c) != ucp_M) break;              lgb = UCD_GRAPHBREAK(c);
5473              eptr += len;              while (eptr < md->end_subject)
5474                  {
5475                  int len = 1;
5476                  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5477                  rgb = UCD_GRAPHBREAK(c);
5478                  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5479                  lgb = rgb;
5480                  eptr += len;
5481                  }
5482              }              }
5483              CHECK_PARTIAL();
5484            }            }
5485    
5486          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 5279  for (;;) Line 5494  for (;;)
5494            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5495            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5496              {              {
5497              if (!utf8) c = *eptr; else              if (!utf) c = *eptr; else
5498                {                {
5499                BACKCHAR(eptr);                BACKCHAR(eptr);
5500                GETCHAR(c, eptr);                GETCHAR(c, eptr);
# Line 5293  for (;;) Line 5508  for (;;)
5508        else        else
5509  #endif   /* SUPPORT_UCP */  #endif   /* SUPPORT_UCP */
5510    
5511  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5512        /* UTF-8 mode */        if (utf)
   
       if (utf8)  
5513          {          {
5514          switch(ctype)          switch(ctype)
5515            {            {
# Line 5311  for (;;) Line 5524  for (;;)
5524                  break;                  break;
5525                  }                  }
5526                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5527                  if (md->partial != 0 &&    /* Take care with CRLF partial */
5528                      eptr + 1 >= md->end_subject &&
5529                      NLBLOCK->nltype == NLTYPE_FIXED &&
5530                      NLBLOCK->nllen == 2 &&
5531                      *eptr == NLBLOCK->nl[0])
5532                    {
5533                    md->hitend = TRUE;
5534                    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5535                    }
5536                eptr++;                eptr++;
5537                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5538                }                }
5539              }              }
5540    
# Line 5328  for (;;) Line 5550  for (;;)
5550                  break;                  break;
5551                  }                  }
5552                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5553                  if (md->partial != 0 &&    /* Take care with CRLF partial */
5554                      eptr + 1 >= md->end_subject &&
5555                      NLBLOCK->nltype == NLTYPE_FIXED &&
5556                      NLBLOCK->nllen == 2 &&
5557                      *eptr == NLBLOCK->nl[0])
5558                    {
5559                    md->hitend = TRUE;
5560                    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5561                    }
5562                eptr++;                eptr++;
5563                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5564                }                }
5565              }              }
5566            break;            break;
# Line 5345  for (;;) Line 5576  for (;;)
5576                  break;                  break;
5577                  }                  }
5578                eptr++;                eptr++;
5579                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5580                }                }
5581              }              }
5582            else            else
# Line 5377  for (;;) Line 5608  for (;;)
5608                break;                break;
5609                }                }
5610              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
5611              if (c == 0x000d)              if (c == CHAR_CR)
5612                {                {
5613                if (++eptr >= md->end_subject) break;                if (++eptr >= md->end_subject) break;
5614                if (*eptr == 0x000a) eptr++;                if (*eptr == CHAR_LF) eptr++;
5615                }                }
5616              else              else
5617                {                {
5618                if (c != 0x000a &&                if (c != CHAR_LF &&
5619                    (md->bsr_anycrlf ||                    (md->bsr_anycrlf ||
5620                     (c != 0x000b && c != 0x000c &&                     (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5621                      c != 0x0085 && c != 0x2028 && c != 0x2029)))  #ifndef EBCDIC
5622                        && c != 0x2028 && c != 0x2029
5623    #endif  /* Not EBCDIC */
5624                        )))
5625                  break;                  break;
5626                eptr += len;                eptr += len;
5627                }                }
# Line 5408  for (;;) Line 5642  for (;;)
5642              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
5643              switch(c)              switch(c)
5644                {                {
5645                  HSPACE_CASES: gotspace = TRUE; break;
5646                default: gotspace = FALSE; break;                default: gotspace = FALSE; break;
               case 0x09:      /* HT */  
               case 0x20:      /* SPACE */  
               case 0xa0:      /* NBSP */  
               case 0x1680:    /* OGHAM SPACE MARK */  
               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
               case 0x2000:    /* EN QUAD */  
               case 0x2001:    /* EM QUAD */  
               case 0x2002:    /* EN SPACE */  
               case 0x2003:    /* EM SPACE */  
               case 0x2004:    /* THREE-PER-EM SPACE */  
               case 0x2005:    /* FOUR-PER-EM SPACE */  
               case 0x2006:    /* SIX-PER-EM SPACE */  
               case 0x2007:    /* FIGURE SPACE */  
               case 0x2008:    /* PUNCTUATION SPACE */  
               case 0x2009:    /* THIN SPACE */  
               case 0x200A:    /* HAIR SPACE */  
               case 0x202f:    /* NARROW NO-BREAK SPACE */  
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
               case 0x3000:    /* IDEOGRAPHIC SPACE */  
               gotspace = TRUE;  
               break;  
5647                }                }
5648              if (gotspace == (ctype == OP_NOT_HSPACE)) break;              if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5649              eptr += len;              eptr += len;
# Line 5450  for (;;) Line 5664  for (;;)
5664              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
5665              switch(c)              switch(c)
5666                {                {
5667                  VSPACE_CASES: gotspace = TRUE; break;
5668                default: gotspace = FALSE; break;                default: gotspace = FALSE; break;
               case 0x0a:      /* LF */  
               case 0x0b:      /* VT */  
               case 0x0c:      /* FF */  
               case 0x0d:      /* CR */  
               case 0x85:      /* NEL */  
               case 0x2028:    /* LINE SEPARATOR */  
               case 0x2029:    /* PARAGRAPH SEPARATOR */  
               gotspace = TRUE;  
               break;  
5669                }                }
5670              if (gotspace == (ctype == OP_NOT_VSPACE)) break;              if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5671              eptr += len;              eptr += len;
# Line 5573  for (;;) Line 5779  for (;;)
5779            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5780            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5781            BACKCHAR(eptr);            BACKCHAR(eptr);
5782            if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&            if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_NL &&
5783                eptr[-1] == '\r') eptr--;                eptr[-1] == CHAR_CR) eptr--;
5784            }            }
5785          }          }
5786        else        else
5787  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5788          /* Not UTF mode */
       /* Not UTF-8 mode */  
5789          {          {
5790          switch(ctype)          switch(ctype)
5791            {            {
# Line 5593  for (;;) Line 5798  for (;;)
5798                break;                break;
5799                }                }
5800              if (IS_NEWLINE(eptr)) break;              if (IS_NEWLINE(eptr)) break;
5801                if (md->partial != 0 &&    /* Take care with CRLF partial */
5802                    eptr + 1 >= md->end_subject &&
5803                    NLBLOCK->nltype == NLTYPE_FIXED &&
5804                    NLBLOCK->nllen == 2 &&
5805                    *eptr == NLBLOCK->nl[0])
5806                  {
5807                  md->hitend = TRUE;
5808                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5809                  }
5810              eptr++;              eptr++;
5811              }              }
5812            break;            break;
# Line 5617  for (;;) Line 5831  for (;;)
5831                break;                break;
5832                }                }
5833              c = *eptr;              c = *eptr;
5834              if (c == 0x000d)              if (c == CHAR_CR)
5835                {                {
5836                if (++eptr >= md->end_subject) break;                if (++eptr >= md->end_subject) break;
5837                if (*eptr == 0x000a) eptr++;                if (*eptr == CHAR_LF) eptr++;
5838                }                }
5839              else              else
5840                {                {
5841                if (c != 0x000a &&                if (c != CHAR_LF && (md->bsr_anycrlf ||
5842                    (md->bsr_anycrlf ||                   (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5843                      (c != 0x000b && c != 0x000c && c != 0x0085)))  #ifdef COMPILE_PCRE16
5844                  break;                   && c != 0x2028 && c != 0x2029
5845    #endif
5846                     ))) break;
5847                eptr++;                eptr++;
5848                }                }
5849              }              }
# Line 5641  for (;;) Line 5857  for (;;)
5857                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5858                break;                break;
5859                }                }
5860              c = *eptr;              switch(*eptr)
5861              if (c == 0x09 || c == 0x20 || c == 0xa0) break;                {
5862              eptr++;                default: eptr++; break;
5863                  HSPACE_BYTE_CASES:
5864    #ifdef COMPILE_PCRE16
5865                  HSPACE_MULTIBYTE_CASES:
5866    #endif
5867                  goto ENDLOOP00;
5868                  }
5869              }              }
5870              ENDLOOP00:
5871            break;            break;
5872    
5873            case OP_HSPACE:            case OP_HSPACE:
# Line 5655  for (;;) Line 5878  for (;;)
5878                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5879                break;                break;
5880                }                }
5881              c = *eptr;              switch(*eptr)
5882              if (c != 0x09 && c != 0x20 && c != 0xa0) break;                {
5883              eptr++;                default: goto ENDLOOP01;
5884                  HSPACE_BYTE_CASES:
5885    #ifdef COMPILE_PCRE16
5886                  HSPACE_MULTIBYTE_CASES:
5887    #endif
5888                  eptr++; break;
5889                  }
5890              }              }
5891              ENDLOOP01:
5892            break;            break;
5893    
5894            case OP_NOT_VSPACE:            case OP_NOT_VSPACE:
# Line 5669  for (;;) Line 5899  for (;;)
5899                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5900                break;                break;
5901                }                }
5902              c = *eptr;              switch(*eptr)
5903              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)                {
5904                break;                default: eptr++; break;
5905              eptr++;                VSPACE_BYTE_CASES:
5906    #ifdef COMPILE_PCRE16
5907                  VSPACE_MULTIBYTE_CASES:
5908    #endif
5909                  goto ENDLOOP02;
5910                  }
5911              }              }
5912              ENDLOOP02:
5913            break;            break;
5914    
5915            case OP_VSPACE:            case OP_VSPACE:
# Line 5684  for (;;) Line 5920  for (;;)
5920                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5921                break;                break;
5922                }                }
5923              c = *eptr;              switch(*eptr)
5924              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)                {
5925                break;                default: goto ENDLOOP03;
5926              eptr++;                VSPACE_BYTE_CASES:
5927    #ifdef COMPILE_PCRE16
5928                  VSPACE_MULTIBYTE_CASES:
5929    #endif
5930                  eptr++; break;
5931                  }
5932              }              }
5933              ENDLOOP03:
5934            break;            break;
5935    
5936            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
# Line 5699  for (;;) Line 5941  for (;;)
5941                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5942                break;                break;
5943                }                }
5944              if ((md->ctypes[*eptr] & ctype_digit) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
5945              eptr++;              eptr++;
5946              }              }
5947            break;            break;
# Line 5712  for (;;) Line 5954  for (;;)
5954                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5955                break;                break;
5956                }                }
5957              if ((md->ctypes[*eptr] & ctype_digit) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
5958              eptr++;              eptr++;
5959              }              }
5960            break;            break;
# Line 5725  for (;;) Line 5967  for (;;)
5967                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5968                break;                break;
5969                }                }
5970              if ((md->ctypes[*eptr] & ctype_space) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
5971              eptr++;              eptr++;
5972              }              }
5973            break;            break;
# Line 5738  for (;;) Line 5980  for (;;)
5980                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5981                break;                break;
5982                }                }
5983              if ((md->ctypes[*eptr] & ctype_space) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
5984              eptr++;              eptr++;
5985              }              }
5986            break;            break;
# Line 5751  for (;;) Line 5993  for (;;)
5993                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5994                break;                break;
5995                }                }
5996              if ((md->ctypes[*eptr] & ctype_word) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
5997              eptr++;              eptr++;
5998              }              }
5999            break;            break;
# Line 5764  for (;;) Line 6006  for (;;)
6006                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6007                break;                break;
6008                }                }
6009              if ((md->ctypes[*eptr] & ctype_word) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6010              eptr++;              eptr++;
6011              }              }
6012            break;            break;
# Line 5785  for (;;) Line 6027  for (;;)
6027            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6028            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6029            eptr--;            eptr--;
6030            if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&            if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6031                eptr[-1] == '\r') eptr--;                eptr[-1] == CHAR_CR) eptr--;
6032            }            }
6033          }          }
6034    
# Line 5827  switch (frame->Xwhere) Line 6069  switch (frame->Xwhere)
6069    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6070    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6071    LBL(65) LBL(66)    LBL(65) LBL(66)
6072  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6073    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(21)
6074    #endif
6075    #ifdef SUPPORT_UTF
6076      LBL(16) LBL(18) LBL(20)
6077      LBL(22) LBL(23) LBL(28) LBL(30)
6078    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6079  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6080    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6081    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62)
6082  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6083  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
6084    default:    default:
6085    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6086    
6087    printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6088    
6089    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6090    }    }
6091  #undef LBL  #undef LBL
# Line 5898  Undefine all the macros that were define Line 6147  Undefine all the macros that were define
6147  ***************************************************************************/  ***************************************************************************/
6148    
6149    
6150    #ifdef NO_RECURSE
6151    /*************************************************
6152    *          Release allocated heap frames         *
6153    *************************************************/
6154    
6155    /* This function releases all the allocated frames. The base frame is on the
6156    machine stack, and so must not be freed.
6157    
6158    Argument: the address of the base frame
6159    Returns:  nothing
6160    */
6161    
6162    static void
6163    release_match_heapframes (heapframe *frame_base)
6164    {
6165    heapframe *nextframe = frame_base->Xnextframe;
6166    while (nextframe != NULL)
6167      {
6168      heapframe *oldframe = nextframe;
6169      nextframe = nextframe->Xnextframe;
6170      (PUBL(stack_free))(oldframe);
6171      }
6172    }
6173    #endif
6174    
6175    
6176  /*************************************************  /*************************************************
6177  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
# Line 5923  Returns:          > 0 => success; value Line 6197  Returns:          > 0 => success; value
6197                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
6198  */  */
6199    
6200    #ifdef COMPILE_PCRE8
6201  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6202  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6203    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6204    int offsetcount)    int offsetcount)
6205    #else
6206    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6207    pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6208      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6209      int offsetcount)
6210    #endif
6211  {  {
6212  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
 int first_byte = -1;  
 int req_byte = -1;  
 int req_byte2 = -1;  
6213  int newline;  int newline;
6214  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
6215  BOOL anchored;  BOOL anchored;
6216  BOOL startline;  BOOL startline;
6217  BOOL firstline;  BOOL firstline;
6218  BOOL first_byte_caseless = FALSE;  BOOL utf;
6219  BOOL req_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
6220  BOOL utf8;  BOOL has_req_char = FALSE;
6221    pcre_uchar first_char = 0;
6222    pcre_uchar first_char2 = 0;
6223    pcre_uchar req_char = 0;
6224    pcre_uchar req_char2 = 0;
6225  match_data match_block;  match_data match_block;
6226  match_data *md = &match_block;  match_data *md = &match_block;
6227  const uschar *tables;  const pcre_uint8 *tables;
6228  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
6229  USPTR start_match = (USPTR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6230  USPTR end_subject;  PCRE_PUCHAR end_subject;
6231  USPTR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6232  USPTR req_byte_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6233    
 pcre_study_data internal_study;  
6234  const pcre_study_data *study;  const pcre_study_data *study;
6235    const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6236    
6237  real_pcre internal_re;  #ifdef NO_RECURSE
6238  const real_pcre *external_re = (const real_pcre *)argument_re;  heapframe frame_zero;
6239  const real_pcre *re = external_re;  frame_zero.Xprevframe = NULL;            /* Marks the top level */
6240    frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6241    md->match_frames_base = &frame_zero;
6242    #endif
6243    
6244    /* Check for the special magic call that measures the size of the stack used
6245    per recursive call of match(). Without the funny casting for sizeof, a Windows
6246    compiler gave this error: "unary minus operator applied to unsigned type,
6247    result still unsigned". Hopefully the cast fixes that. */
6248    
6249    if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6250        start_offset == -999)
6251    #ifdef NO_RECURSE
6252      return -((int)sizeof(heapframe));
6253    #else
6254      return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6255    #endif
6256    
6257  /* Plausibility checks */  /* Plausibility checks */
6258    
6259  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6260  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6261     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6262  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6263  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6264    
6265    /* Check that the first field in the block is the magic number. If it is not,
6266    return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6267    REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6268    means that the pattern is likely compiled with different endianness. */
6269    
6270    if (re->magic_number != MAGIC_NUMBER)
6271      return re->magic_number == REVERSED_MAGIC_NUMBER?
6272        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6273    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6274    
6275  /* These two settings are used in the code for checking a UTF-8 string that  /* These two settings are used in the code for checking a UTF-8 string that
6276  follows immediately afterwards. Other values in the md block are used only  follows immediately afterwards. Other values in the md block are used only
6277  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
6278  so they are set up later. */  so they are set up later. */
6279    
6280  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6281    utf = md->utf = (re->options & PCRE_UTF8) != 0;
6282  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6283                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6284    
6285  /* Check a UTF-8 string if required. Pass back the character offset and error  /* Check a UTF-8 string if required. Pass back the character offset and error
6286  code for an invalid string if a results vector is available. */  code for an invalid string if a results vector is available. */
6287    
6288  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6289  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6290    {    {
6291    int erroroffset;    int erroroffset;
6292    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6293    if (errorcode != 0)    if (errorcode != 0)
6294      {      {
6295      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 5988  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6297  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6297        offsets[0] = erroroffset;        offsets[0] = erroroffset;
6298        offsets[1] = errorcode;        offsets[1] = errorcode;
6299        }        }
6300    #ifdef COMPILE_PCRE16
6301        return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6302          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6303    #else
6304      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6305        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6306    #endif
6307      }      }
6308    
6309    /* Check that a start_offset points to the start of a UTF-8 character. */    /* Check that a start_offset points to the start of a UTF character. */
6310    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
6311        (((USPTR)subject)[start_offset] & 0xc0) == 0x80)        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6312      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
6313    }    }
6314  #endif  #endif
# Line 6002  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6316  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6316  /* If the pattern was successfully studied with JIT support, run the JIT  /* If the pattern was successfully studied with JIT support, run the JIT
6317  executable instead of the rest of this function. Most options must be set at  executable instead of the rest of this function. Most options must be set at
6318  compile time for the JIT code to be usable. Fallback to the normal code path if  compile time for the JIT code to be usable. Fallback to the normal code path if
6319  an unsupported flag is set. In particular, JIT does not support partial  an unsupported flag is set. */
 matching. */  
6320    
6321  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
6322  if (extra_data != NULL  if (extra_data != NULL
6323      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6324                                 PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6325      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
     && (extra_data->flags & PCRE_EXTRA_TABLES) == 0  
6326      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6327                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6328    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,                      PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6329      start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)    {
6330      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);    rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
6331           start_offset, options, offsets, offsetcount);
6332    
6333      /* PCRE_ERROR_NULL means that the selected normal or partial matching
6334      mode is not compiled. In this case we simply fallback to interpreter. */
6335    
6336      if (rc != PCRE_ERROR_NULL) return rc;
6337      }
6338  #endif  #endif
6339    
6340  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
6341  numbers associated with a given name, for condition testing. */  numbers associated with a given name, for condition testing. */
6342    
6343  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (pcre_uchar *)re + re->name_table_offset;
6344  md->name_count = re->name_count;  md->name_count = re->name_count;
6345  md->name_entry_size = re->name_entry_size;  md->name_entry_size = re->name_entry_size;
6346    
# Line 6034  md->callout_data = NULL; Line 6354  md->callout_data = NULL;
6354    
6355  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
6356    
6357  tables = external_re->tables;  tables = re->tables;
6358    
6359  if (extra_data != NULL)  if (extra_data != NULL)
6360    {    {
# Line 6054  if (extra_data != NULL) Line 6374  if (extra_data != NULL)
6374  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6375  in other programs later. */  in other programs later. */
6376    
6377  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
   
 /* Check that the first field in the block is the magic number. If it is not,  
 test for a regex that was compiled on a host of opposite endianness. If this is  
 the case, flipped values are put in internal_re and internal_study if there was  
 study data too. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   {  
   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);  
   if (re == NULL) return PCRE_ERROR_BADMAGIC;  
   if (study != NULL) study = &internal_study;  
   }  
6378    
6379  /* Set up other data */  /* Set up other data */
6380    
# Line 6076  firstline = (re->options & PCRE_FIRSTLIN Line 6384  firstline = (re->options & PCRE_FIRSTLIN
6384    
6385  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6386    
6387  md->start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6388    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6389    
6390  md->start_subject = (USPTR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
6391  md->start_offset = start_offset;  md->start_offset = start_offset;
6392  md->end_subject = md->start_subject + length;  md->end_subject = md->start_subject + length;
6393  end_subject = md->end_subject;  end_subject = md->end_subject;
# Line 6104  md->recursive = NULL; Line 6412  md->recursive = NULL;
6412  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6413    
6414  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6415    md->fcc = tables + fcc_offset;
6416  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
6417