/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 779 by ph10, Fri Dec 2 10:39:32 2011 UTC revision 926 by ph10, Wed Feb 22 15:01:32 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 113  Returns:     nothing Line 113  Returns:     nothing
113  */  */
114    
115  static void  static void
116  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117  {  {
118  unsigned int c;  unsigned int c;
119  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 140  Arguments: Line 140  Arguments:
140    md          points to match data block    md          points to match data block
141    caseless    TRUE if caseless    caseless    TRUE if caseless
142    
143  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      >= 0 the number of subject bytes matched
144                  -1 no match
145                  -2 partial match; always given if at end subject
146  */  */
147    
148  static int  static int
149  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
150    BOOL caseless)    BOOL caseless)
151  {  {
152  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
153  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
154    
155  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
156  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 163  pchars(p, length, FALSE, md); Line 165  pchars(p, length, FALSE, md);
165  printf("\n");  printf("\n");
166  #endif  #endif
167    
168  /* Always fail if reference not set (and not JavaScript compatible). */  /* Always fail if reference not set (and not JavaScript compatible - in that
169    case the length is passed as zero). */
170    
171  if (length < 0) return -1;  if (length < 0) return -1;
172    
# Line 173  ASCII characters. */ Line 176  ASCII characters. */
176    
177  if (caseless)  if (caseless)
178    {    {
179  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
180  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
181    if (md->utf8)    if (md->utf)
182      {      {
183      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
184      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
# Line 185  if (caseless) Line 188  if (caseless)
188      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
189      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
190    
191      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
192      while (p < endptr)      while (p < endptr)
193        {        {
194        int c, d;        int c, d;
195        if (eptr >= md->end_subject) return -1;        if (eptr >= md->end_subject) return -2;   /* Partial match */
196        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
197        GETCHARINC(d, p);        GETCHARINC(d, p);
198        if (c != d && c != UCD_OTHERCASE(d)) return -1;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
# Line 202  if (caseless) Line 205  if (caseless)
205    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206    is no UCP support. */    is no UCP support. */
207      {      {
     if (eptr + length > md->end_subject) return -1;  
208      while (length-- > 0)      while (length-- > 0)
209        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
210          if (eptr >= md->end_subject) return -2;   /* Partial match */
211          if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
212          p++;
213          eptr++;
214          }
215      }      }
216    }    }
217    
# Line 213  are in UTF-8 mode. */ Line 220  are in UTF-8 mode. */
220    
221  else  else
222    {    {
223    if (eptr + length > md->end_subject) return -1;    while (length-- > 0)
224    while (length-- > 0) if (*p++ != *eptr++) return -1;      {
225        if (eptr >= md->end_subject) return -2;   /* Partial match */
226        if (*p++ != *eptr++) return -1;
227        }
228    }    }
229    
230  return (int)(eptr - eptr_start);  return (int)(eptr - eptr_start);
# Line 307  argument of match(), which never changes Line 317  argument of match(), which never changes
317    
318  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
319    {\    {\
320    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
321    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
322    frame->Xwhere = rw; \    frame->Xwhere = rw; \
323    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
# Line 328  argument of match(), which never changes Line 338  argument of match(), which never changes
338    {\    {\
339    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
340    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
341    (pcre_stack_free)(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
342    if (frame != NULL)\    if (frame != NULL)\
343      {\      {\
344      rrc = ra;\      rrc = ra;\
# Line 345  typedef struct heapframe { Line 355  typedef struct heapframe {
355    
356    /* Function arguments that may change */    /* Function arguments that may change */
357    
358    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
359    const uschar *Xecode;    const pcre_uchar *Xecode;
360    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
361    int Xoffset_top;    int Xoffset_top;
362    eptrblock *Xeptrb;    eptrblock *Xeptrb;
363    unsigned int Xrdepth;    unsigned int Xrdepth;
364    
365    /* Function local variables */    /* Function local variables */
366    
367    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
369    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
370  #endif  #endif
371    USPTR Xdata;    PCRE_PUCHAR Xdata;
372    USPTR Xnext;    PCRE_PUCHAR Xnext;
373    USPTR Xpp;    PCRE_PUCHAR Xpp;
374    USPTR Xprev;    PCRE_PUCHAR Xprev;
375    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
376    
377    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
378    
# Line 375  typedef struct heapframe { Line 385  typedef struct heapframe {
385    int Xprop_value;    int Xprop_value;
386    int Xprop_fail_result;    int Xprop_fail_result;
387    int Xoclength;    int Xoclength;
388    uschar Xocchars[8];    pcre_uchar Xocchars[6];
389  #endif  #endif
390    
391    int Xcodelink;    int Xcodelink;
# Line 440  the subject. */ Line 450  the subject. */
450    
451    
452  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
453  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
454  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
455  made performance worse.  made performance worse.
456    
# Line 463  Returns:       MATCH_MATCH if matched Line 473  Returns:       MATCH_MATCH if matched
473  */  */
474    
475  static int  static int
476  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
477    int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
478      unsigned int rdepth)
479  {  {
480  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
481  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 473  so they can be ordinary variables in all Line 484  so they can be ordinary variables in all
484  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
485  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
486  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
487  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
488    
489  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
490  BOOL caseless;  BOOL caseless;
491  int condcode;  int condcode;
492    
493  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
494  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
495  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
496  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
497    the top-level on the stack rather than malloc-ing them all gives a performance
498    boost in many cases where there is not much "recursion". */
499    
500  #ifdef NO_RECURSE  #ifdef NO_RECURSE
501  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe frame_zero;
502  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
503  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
504    
505  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 513  HEAP_RECURSE: Line 526  HEAP_RECURSE:
526    
527  /* Ditto for the local variables */  /* Ditto for the local variables */
528    
529  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
530  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
531  #endif  #endif
532  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 571  declarations can be cut out in a block. Line 584  declarations can be cut out in a block.
584  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
585  to RMATCH(). */  to RMATCH(). */
586    
587  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
588  const uschar *charptr;  const pcre_uchar *charptr;
589  #endif  #endif
590  const uschar *callpat;  const pcre_uchar *callpat;
591  const uschar *data;  const pcre_uchar *data;
592  const uschar *next;  const pcre_uchar *next;
593  USPTR         pp;  PCRE_PUCHAR       pp;
594  const uschar *prev;  const pcre_uchar *prev;
595  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
596    
597  recursion_info new_recursive;  recursion_info new_recursive;
598    
# Line 592  int prop_type; Line 605  int prop_type;
605  int prop_value;  int prop_value;
606  int prop_fail_result;  int prop_fail_result;
607  int oclength;  int oclength;
608  uschar occhars[8];  pcre_uchar occhars[6];
609  #endif  #endif
610    
611  int codelink;  int codelink;
# Line 608  int save_offset1, save_offset2, save_off Line 621  int save_offset1, save_offset2, save_off
621  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
622    
623  eptrblock newptrb;  eptrblock newptrb;
624    
625    /* There is a special fudge for calling match() in a way that causes it to
626    measure the size of its basic stack frame when the stack is being used for
627    recursion. The second argument (ecode) being NULL triggers this behaviour. It
628    cannot normally ever be NULL. The return is the negated value of the frame
629    size. */
630    
631    if (ecode == NULL)
632      {
633      if (rdepth == 0)
634        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
635      else
636        {
637        int len = (char *)&rdepth - (char *)eptr;
638        return (len > 0)? -len : len;
639        }
640      }
641  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
642    
643  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 620  the alternative names that are used. */ Line 650  the alternative names that are used. */
650  #define code_offset   codelink  #define code_offset   codelink
651  #define condassert    condition  #define condassert    condition
652  #define matched_once  prev_is_word  #define matched_once  prev_is_word
653    #define foc           number
654    #define save_mark     data
655    
656  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
657  variables. */  variables. */
# Line 645  defined). However, RMATCH isn't like a f Line 677  defined). However, RMATCH isn't like a f
677  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
678  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
679    
680  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
681  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
682  #else  #else
683  utf8 = FALSE;  utf = FALSE;
684  #endif  #endif
685    
686  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 689  for (;;) Line 721  for (;;)
721      case OP_MARK:      case OP_MARK:
722      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
723      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
724      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
725        eptrb, RM55);        eptrb, RM55);
726      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
727           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 702  for (;;) Line 734  for (;;)
734      unaltered. */      unaltered. */
735    
736      else if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
737          strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
738        {        {
739        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
740        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
# Line 715  for (;;) Line 747  for (;;)
747      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
748    
749      case OP_COMMIT:      case OP_COMMIT:
750      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
751        eptrb, RM52);        eptrb, RM52);
752      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
753          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
# Line 726  for (;;) Line 758  for (;;)
758      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
759    
760      case OP_PRUNE:      case OP_PRUNE:
761      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
762        eptrb, RM51);        eptrb, RM51);
763      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
764      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 734  for (;;) Line 766  for (;;)
766      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
767      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
768      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
769      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
770        eptrb, RM56);        eptrb, RM56);
771      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
772           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 744  for (;;) Line 776  for (;;)
776      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
777    
778      case OP_SKIP:      case OP_SKIP:
779      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
780        eptrb, RM53);        eptrb, RM53);
781      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
782        RRETURN(rrc);        RRETURN(rrc);
# Line 752  for (;;) Line 784  for (;;)
784      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
785    
786      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
787      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. There is a flag that disables this opcode when re-matching a
788      pattern that ended with a SKIP for which there was not a matching MARK. */      pattern that ended with a SKIP for which there was not a matching MARK. */
789    
790      case OP_SKIP_ARG:      case OP_SKIP_ARG:
791      if (md->ignore_skip_arg)      if (md->ignore_skip_arg)
792        {        {
793        ecode += _pcre_OP_lengths[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
794        break;        break;
795        }        }
796      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
797        eptrb, RM57);        eptrb, RM57);
798      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
799        RRETURN(rrc);        RRETURN(rrc);
800    
801      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
802      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
803      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
804      with the md->ignore_skip_arg flag set. */      with the md->ignore_skip_arg flag set. */
805    
806      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
# Line 779  for (;;) Line 811  for (;;)
811      match pointer to do this. */      match pointer to do this. */
812    
813      case OP_THEN:      case OP_THEN:
814      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
815        eptrb, RM54);        eptrb, RM54);
816      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
817      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
# Line 788  for (;;) Line 820  for (;;)
820      case OP_THEN_ARG:      case OP_THEN_ARG:
821      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
822      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
823      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
824        md, eptrb, RM58);        md, eptrb, RM58);
825      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
826           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 812  for (;;) Line 844  for (;;)
844      case OP_ONCE_NC:      case OP_ONCE_NC:
845      prev = ecode;      prev = ecode;
846      saved_eptr = eptr;      saved_eptr = eptr;
847        save_mark = md->mark;
848      do      do
849        {        {
850        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 830  for (;;) Line 863  for (;;)
863    
864        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
865        ecode += GET(ecode,1);        ecode += GET(ecode,1);
866          md->mark = save_mark;
867        }        }
868      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
869    
# Line 909  for (;;) Line 943  for (;;)
943        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
944        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
945        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
946          save_mark = md->mark;
947    
948        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
949        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 917  for (;;) Line 952  for (;;)
952        for (;;)        for (;;)
953          {          {
954          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
955          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
956            eptrb, RM1);            eptrb, RM1);
957          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
958    
# Line 945  for (;;) Line 980  for (;;)
980          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
981          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
982          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
983            md->mark = save_mark;
984          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
985          }          }
986    
# Line 1004  for (;;) Line 1040  for (;;)
1040    
1041        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1042          {          {
1043          ecode += _pcre_OP_lengths[*ecode];          ecode += PRIV(OP_lengths)[*ecode];
1044          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1045          }          }
1046    
1047        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1048    
1049        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        save_mark = md->mark;
1050          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1051          RM2);          RM2);
1052    
1053        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
# Line 1028  for (;;) Line 1065  for (;;)
1065          {          {
1066          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1067            {            {
1068            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1069            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1070              {              {
1071              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 1039  for (;;) Line 1076  for (;;)
1076          RRETURN(rrc);          RRETURN(rrc);
1077          }          }
1078        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1079          md->mark = save_mark;
1080        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1081        }        }
1082    
# Line 1093  for (;;) Line 1131  for (;;)
1131          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1132            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1133          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1134          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1135            eptrb, RM63);            eptrb, RM63);
1136          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1137            {            {
# Line 1165  for (;;) Line 1203  for (;;)
1203      for (;;)      for (;;)
1204        {        {
1205        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1206        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1207          eptrb, RM48);          eptrb, RM48);
1208        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1209          {          {
# Line 1215  for (;;) Line 1253  for (;;)
1253    
1254      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1255        {        {
1256        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1257          {          {
1258          pcre_callout_block cb;          PUBL(callout_block) cb;
1259          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1260          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1261          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1262    #ifdef COMPILE_PCRE8
1263          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1264    #else
1265            cb.subject          = (PCRE_SPTR16)md->start_subject;
1266    #endif
1267          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1268          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1269          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1231  for (;;) Line 1273  for (;;)
1273          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1274          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1275          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1276          if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1277          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1278          }          }
1279        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1280        }        }
1281    
1282      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1260  for (;;) Line 1302  for (;;)
1302    
1303          if (!condition && condcode == OP_NRREF)          if (!condition && condcode == OP_NRREF)
1304            {            {
1305            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1306            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1307              {              {
1308              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1273  for (;;) Line 1315  for (;;)
1315    
1316            if (i < md->name_count)            if (i < md->name_count)
1317              {              {
1318              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1319              while (slotB > md->name_table)              while (slotB > md->name_table)
1320                {                {
1321                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1322                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1323                  {                  {
1324                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1325                  if (condition) break;                  if (condition) break;
# Line 1293  for (;;) Line 1335  for (;;)
1335                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1336                  {                  {
1337                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1338                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1339                    {                    {
1340                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1341                    if (condition) break;                    if (condition) break;
# Line 1306  for (;;) Line 1348  for (;;)
1348    
1349          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1350    
1351          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1352          }          }
1353        }        }
1354    
# Line 1323  for (;;) Line 1365  for (;;)
1365        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1366          {          {
1367          int refno = offset >> 1;          int refno = offset >> 1;
1368          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1369    
1370          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1371            {            {
# Line 1337  for (;;) Line 1379  for (;;)
1379    
1380          if (i < md->name_count)          if (i < md->name_count)
1381            {            {
1382            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1383            while (slotB > md->name_table)            while (slotB > md->name_table)
1384              {              {
1385              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1386              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1387                {                {
1388                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1389                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1359  for (;;) Line 1401  for (;;)
1401              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1402                {                {
1403                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1404                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1405                  {                  {
1406                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1407                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1374  for (;;) Line 1416  for (;;)
1416    
1417        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1418    
1419        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1420        }        }
1421    
1422      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1466  for (;;) Line 1508  for (;;)
1508        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1509        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1510        }        }
1511      ecode += 3;      ecode += 1 + IMM2_SIZE;
1512      break;      break;
1513    
1514    
# Line 1513  for (;;) Line 1555  for (;;)
1555    
1556      case OP_ASSERT:      case OP_ASSERT:
1557      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1558        save_mark = md->mark;
1559      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1560        {        {
1561        condassert = TRUE;        condassert = TRUE;
# Line 1534  for (;;) Line 1577  for (;;)
1577    
1578        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1579        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1580          md->mark = save_mark;
1581        }        }
1582      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1583    
# Line 1557  for (;;) Line 1601  for (;;)
1601    
1602      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1603      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1604        save_mark = md->mark;
1605      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1606        {        {
1607        condassert = TRUE;        condassert = TRUE;
# Line 1567  for (;;) Line 1612  for (;;)
1612      do      do
1613        {        {
1614        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1615          md->mark = save_mark;
1616        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1617        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1618          {          {
# Line 1593  for (;;) Line 1639  for (;;)
1639      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1640    
1641      case OP_REVERSE:      case OP_REVERSE:
1642  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1643      if (utf8)      if (utf)
1644        {        {
1645        i = GET(ecode, 1);        i = GET(ecode, 1);
1646        while (i-- > 0)        while (i-- > 0)
# Line 1625  for (;;) Line 1671  for (;;)
1671      function is able to force a failure. */      function is able to force a failure. */
1672    
1673      case OP_CALLOUT:      case OP_CALLOUT:
1674      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1675        {        {
1676        pcre_callout_block cb;        PUBL(callout_block) cb;
1677        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1678        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1679        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1680    #ifdef COMPILE_PCRE8
1681        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1682    #else
1683          cb.subject          = (PCRE_SPTR16)md->start_subject;
1684    #endif
1685        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1686        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1687        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1641  for (;;) Line 1691  for (;;)
1691        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1692        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1693        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1694        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1695        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1696        }        }
1697      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1700  for (;;) Line 1750  for (;;)
1750        else        else
1751          {          {
1752          new_recursive.offset_save =          new_recursive.offset_save =
1753            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1754          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1755          }          }
1756        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
# Line 1715  for (;;) Line 1765  for (;;)
1765        do        do
1766          {          {
1767          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1768          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1769            md, eptrb, RM6);            md, eptrb, RM6);
1770          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1771              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
# Line 1724  for (;;) Line 1774  for (;;)
1774            {            {
1775            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1776            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1777              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1778    
1779            /* Set where we got to in the subject, and reset the start in case            /* Set where we got to in the subject, and reset the start in case
1780            it was changed by \K. This *is* propagated back out of a recursion,            it was changed by \K. This *is* propagated back out of a recursion,
# Line 1742  for (;;) Line 1792  for (;;)
1792            {            {
1793            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1794            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1795              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1796            RRETURN(rrc);            RRETURN(rrc);
1797            }            }
1798    
# Line 1754  for (;;) Line 1804  for (;;)
1804        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1805        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1806        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1807          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1808        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1809        }        }
1810    
# Line 2015  for (;;) Line 2065  for (;;)
2065    
2066      case OP_DOLLM:      case OP_DOLLM:
2067      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2068        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }        {
2069          if (!IS_NEWLINE(eptr))
2070            {
2071            if (md->partial != 0 &&
2072                eptr + 1 >= md->end_subject &&
2073                NLBLOCK->nltype == NLTYPE_FIXED &&
2074                NLBLOCK->nllen == 2 &&
2075                *eptr == NLBLOCK->nl[0])
2076              {
2077              md->hitend = TRUE;
2078              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2079              }
2080            RRETURN(MATCH_NOMATCH);
2081            }
2082          }
2083      else      else
2084        {        {
2085        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
# Line 2047  for (;;) Line 2111  for (;;)
2111      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2112      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2113          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2114          {
2115          if (md->partial != 0 &&
2116              eptr + 1 >= md->end_subject &&
2117              NLBLOCK->nltype == NLTYPE_FIXED &&
2118              NLBLOCK->nllen == 2 &&
2119              *eptr == NLBLOCK->nl[0])
2120            {
2121            md->hitend = TRUE;
2122            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2123            }
2124        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2125          }
2126    
2127      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2128    
# Line 2066  for (;;) Line 2141  for (;;)
2141        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2142        partial matching. */        partial matching. */
2143    
2144  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2145        if (utf8)        if (utf)
2146          {          {
2147          /* Get status of previous character */          /* Get status of previous character */
2148    
2149          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2150            {            {
2151            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2152            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2153            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2154            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2155  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2139  for (;;) Line 2214  for (;;)
2214              }              }
2215            else            else
2216  #endif  #endif
2217            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2218                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2219            }            }
2220    
2221          /* Get status of next character */          /* Get status of next character */
# Line 2162  for (;;) Line 2238  for (;;)
2238            }            }
2239          else          else
2240  #endif  #endif
2241          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2242              && ((md->ctypes[*eptr] & ctype_word) != 0);
2243          }          }
2244    
2245        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 2173  for (;;) Line 2250  for (;;)
2250        }        }
2251      break;      break;
2252    
2253      /* Match a single character type; inline for speed */      /* Match any single character type except newline; have to take care with
2254        CRLF newlines and partial matching. */
2255    
2256      case OP_ANY:      case OP_ANY:
2257      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2258        if (md->partial != 0 &&
2259            eptr + 1 >= md->end_subject &&
2260            NLBLOCK->nltype == NLTYPE_FIXED &&
2261            NLBLOCK->nllen == 2 &&
2262            *eptr == NLBLOCK->nl[0])
2263          {
2264          md->hitend = TRUE;
2265          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2266          }
2267    
2268      /* Fall through */      /* Fall through */
2269    
2270        /* Match any single character whatsoever. */
2271    
2272      case OP_ALLANY:      case OP_ALLANY:
2273      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2274        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
# Line 2186  for (;;) Line 2276  for (;;)
2276        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2277        }        }
2278      eptr++;      eptr++;
2279      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2280        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2281    #endif
2282      ecode++;      ecode++;
2283      break;      break;
2284    
# Line 2211  for (;;) Line 2303  for (;;)
2303        }        }
2304      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2305      if (      if (
2306  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2307         c < 256 &&         c < 256 &&
2308  #endif  #endif
2309         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
# Line 2228  for (;;) Line 2320  for (;;)
2320        }        }
2321      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2322      if (      if (
2323  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2324         c >= 256 ||         c > 255 ||
2325  #endif  #endif
2326         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2327         )         )
# Line 2245  for (;;) Line 2337  for (;;)
2337        }        }
2338      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2339      if (      if (
2340  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2341         c < 256 &&         c < 256 &&
2342  #endif  #endif
2343         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
# Line 2262  for (;;) Line 2354  for (;;)
2354        }        }
2355      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2356      if (      if (
2357  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2358         c >= 256 ||         c > 255 ||
2359  #endif  #endif
2360         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2361         )         )
# Line 2279  for (;;) Line 2371  for (;;)
2371        }        }
2372      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2373      if (      if (
2374  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2375         c < 256 &&         c < 256 &&
2376  #endif  #endif
2377         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
# Line 2296  for (;;) Line 2388  for (;;)
2388        }        }
2389      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2390      if (      if (
2391  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2392         c >= 256 ||         c > 255 ||
2393  #endif  #endif
2394         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2395         )         )
# Line 2317  for (;;) Line 2409  for (;;)
2409        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2410    
2411        case 0x000d:        case 0x000d:
2412        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr >= md->end_subject)
2413            {
2414            SCHECK_PARTIAL();
2415            }
2416          else if (*eptr == 0x0a) eptr++;
2417        break;        break;
2418    
2419        case 0x000a:        case 0x000a:
# Line 2475  for (;;) Line 2571  for (;;)
2571          break;          break;
2572    
2573          case PT_GC:          case PT_GC:
2574          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2575            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2576          break;          break;
2577    
# Line 2492  for (;;) Line 2588  for (;;)
2588          /* These are specials */          /* These are specials */
2589    
2590          case PT_ALNUM:          case PT_ALNUM:
2591          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2592               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2593            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2594          break;          break;
2595    
2596          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2597          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2598               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2599                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2600            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2601          break;          break;
2602    
2603          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2604          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2605               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2606               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2607                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
# Line 2513  for (;;) Line 2609  for (;;)
2609          break;          break;
2610    
2611          case PT_WORD:          case PT_WORD:
2612          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2613               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2614               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2615            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2616          break;          break;
# Line 2543  for (;;) Line 2639  for (;;)
2639      while (eptr < md->end_subject)      while (eptr < md->end_subject)
2640        {        {
2641        int len = 1;        int len = 1;
2642        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2643        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2644        eptr += len;        eptr += len;
2645        }        }
2646        CHECK_PARTIAL();
2647      ecode++;      ecode++;
2648      break;      break;
2649  #endif  #endif
# Line 2564  for (;;) Line 2661  for (;;)
2661      case OP_REFI:      case OP_REFI:
2662      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2663      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2664      ecode += 3;      ecode += 1 + IMM2_SIZE;
2665    
2666      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2667    
# Line 2604  for (;;) Line 2701  for (;;)
2701        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2702        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2703        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2704        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2705        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2706        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2707        break;        break;
2708    
2709        default:               /* No repeat follows */        default:               /* No repeat follows */
2710        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2711          {          {
2712            if (length == -2) eptr = md->end_subject;   /* Partial match */
2713          CHECK_PARTIAL();          CHECK_PARTIAL();
2714          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2715          }          }
# Line 2620  for (;;) Line 2718  for (;;)
2718        }        }
2719    
2720      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2721      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2722        means the reference is unset in non-Java-compatible mode. If the minimum is
2723        zero, we can continue at the same level without recursion. For any other
2724        minimum, carrying on will result in NOMATCH. */
2725    
2726      if (length == 0) continue;      if (length == 0) continue;
2727        if (length < 0 && min == 0) continue;
2728    
2729      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2730      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2633  for (;;) Line 2735  for (;;)
2735        int slength;        int slength;
2736        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2737          {          {
2738            if (slength == -2) eptr = md->end_subject;   /* Partial match */
2739          CHECK_PARTIAL();          CHECK_PARTIAL();
2740          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2741          }          }
# Line 2656  for (;;) Line 2759  for (;;)
2759          if (fi >= max) RRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2760          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2761            {            {
2762              if (slength == -2) eptr = md->end_subject;   /* Partial match */
2763            CHECK_PARTIAL();            CHECK_PARTIAL();
2764            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2765            }            }
# Line 2674  for (;;) Line 2778  for (;;)
2778          int slength;          int slength;
2779          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2780            {            {
2781            CHECK_PARTIAL();            /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2782              the soft partial matching case. */
2783    
2784              if (slength == -2 && md->partial != 0 &&
2785                  md->end_subject > md->start_used_ptr)
2786                {
2787                md->hitend = TRUE;
2788                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2789                }
2790            break;            break;
2791            }            }
2792          eptr += slength;          eptr += slength;
2793          }          }
2794    
2795        while (eptr >= pp)        while (eptr >= pp)
2796          {          {
2797          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
# Line 2703  for (;;) Line 2816  for (;;)
2816      case OP_NCLASS:      case OP_NCLASS:
2817      case OP_CLASS:      case OP_CLASS:
2818        {        {
2819          /* The data variable is saved across frames, so the byte map needs to
2820          be stored there. */
2821    #define BYTE_MAP ((pcre_uint8 *)data)
2822        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2823        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2824    
2825        switch (*ecode)        switch (*ecode)
2826          {          {
# Line 2725  for (;;) Line 2841  for (;;)
2841          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2842          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2843          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2844          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2845          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2846          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2847          break;          break;
2848    
2849          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2737  for (;;) Line 2853  for (;;)
2853    
2854        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2855    
2856  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2857        /* UTF-8 mode */        if (utf)
       if (utf8)  
2858          {          {
2859          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2860            {            {
# Line 2754  for (;;) Line 2869  for (;;)
2869              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2870              }              }
2871            else            else
2872              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
             }  
2873            }            }
2874          }          }
2875        else        else
2876  #endif  #endif
2877        /* Not UTF-8 mode */        /* Not UTF mode */
2878          {          {
2879          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2880            {            {
# Line 2771  for (;;) Line 2884  for (;;)
2884              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2885              }              }
2886            c = *eptr++;            c = *eptr++;
2887            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2888              if (c > 255)
2889                {
2890                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2891                }
2892              else
2893    #endif
2894                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2895            }            }
2896          }          }
2897    
# Line 2785  for (;;) Line 2905  for (;;)
2905    
2906        if (minimize)        if (minimize)
2907          {          {
2908  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2909          /* UTF-8 mode */          if (utf)
         if (utf8)  
2910            {            {
2911            for (fi = min;; fi++)            for (fi = min;; fi++)
2912              {              {
# Line 2805  for (;;) Line 2924  for (;;)
2924                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2925                }                }
2926              else              else
2927                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
               }  
2928              }              }
2929            }            }
2930          else          else
2931  #endif  #endif
2932          /* Not UTF-8 mode */          /* Not UTF mode */
2933            {            {
2934            for (fi = min;; fi++)            for (fi = min;; fi++)
2935              {              {
# Line 2825  for (;;) Line 2942  for (;;)
2942                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2943                }                }
2944              c = *eptr++;              c = *eptr++;
2945              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2946                if (c > 255)
2947                  {
2948                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2949                  }
2950                else
2951    #endif
2952                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2953              }              }
2954            }            }
2955          /* Control never gets here */          /* Control never gets here */
# Line 2837  for (;;) Line 2961  for (;;)
2961          {          {
2962          pp = eptr;          pp = eptr;
2963    
2964  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2965          /* UTF-8 mode */          if (utf)
         if (utf8)  
2966            {            {
2967            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2968              {              {
# Line 2855  for (;;) Line 2978  for (;;)
2978                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2979                }                }
2980              else              else
2981                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2982              eptr += len;              eptr += len;
2983              }              }
2984            for (;;)            for (;;)
# Line 2870  for (;;) Line 2991  for (;;)
2991            }            }
2992          else          else
2993  #endif  #endif
2994            /* Not UTF-8 mode */            /* Not UTF mode */
2995            {            {
2996            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2997              {              {
# Line 2880  for (;;) Line 3001  for (;;)
3001                break;                break;
3002                }                }
3003              c = *eptr;              c = *eptr;
3004              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
3005                if (c > 255)
3006                  {
3007                  if (op == OP_CLASS) break;
3008                  }
3009                else
3010    #endif
3011                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3012              eptr++;              eptr++;
3013              }              }
3014            while (eptr >= pp)            while (eptr >= pp)
# Line 2893  for (;;) Line 3021  for (;;)
3021    
3022          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3023          }          }
3024    #undef BYTE_MAP
3025        }        }
3026      /* Control never gets here */      /* Control never gets here */
3027    
# Line 2901  for (;;) Line 3030  for (;;)
3030      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3031      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
3032    
3033  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3034      case OP_XCLASS:      case OP_XCLASS:
3035        {        {
3036        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2926  for (;;) Line 3055  for (;;)
3055          case OP_CRMINRANGE:          case OP_CRMINRANGE:
3056          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
3057          min = GET2(ecode, 1);          min = GET2(ecode, 1);
3058          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
3059          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
3060          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
3061          break;          break;
3062    
3063          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2946  for (;;) Line 3075  for (;;)
3075            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3076            }            }
3077          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3078          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3079          }          }
3080    
3081        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2970  for (;;) Line 3099  for (;;)
3099              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3100              }              }
3101            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3102            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3103            }            }
3104          /* Control never gets here */          /* Control never gets here */
3105          }          }
# Line 2988  for (;;) Line 3117  for (;;)
3117              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3118              break;              break;
3119              }              }
3120    #ifdef SUPPORT_UTF
3121            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3122            if (!_pcre_xclass(c, data)) break;  #else
3123              c = *eptr;
3124    #endif
3125              if (!PRIV(xclass)(c, data, utf)) break;
3126            eptr += len;            eptr += len;
3127            }            }
3128          for(;;)          for(;;)
# Line 2997  for (;;) Line 3130  for (;;)
3130            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3131            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3132            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3133            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3134              if (utf) BACKCHAR(eptr);
3135    #endif
3136            }            }
3137          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3138          }          }
# Line 3009  for (;;) Line 3144  for (;;)
3144      /* Match a single character, casefully */      /* Match a single character, casefully */
3145    
3146      case OP_CHAR:      case OP_CHAR:
3147  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3148      if (utf8)      if (utf)
3149        {        {
3150        length = 1;        length = 1;
3151        ecode++;        ecode++;
# Line 3024  for (;;) Line 3159  for (;;)
3159        }        }
3160      else      else
3161  #endif  #endif
3162        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3163        {        {
3164        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3165          {          {
# Line 3037  for (;;) Line 3171  for (;;)
3171        }        }
3172      break;      break;
3173    
3174      /* Match a single character, caselessly. If we are at the end of the      /* Match a single character, caselessly. If we are at the end of the
3175      subject, give up immediately. */      subject, give up immediately. */
3176    
3177      case OP_CHARI:      case OP_CHARI:
3178      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3179        {        {
3180        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3181        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3182        }        }
3183    
3184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3185      if (utf8)      if (utf)
3186        {        {
3187        length = 1;        length = 1;
3188        ecode++;        ecode++;
3189        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3190    
3191        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3192        we know that its other case must also be one byte long, so we can use the        we know that its other case must also be one byte long, so we can use the
3193        fast lookup table. We know that there is at least one byte left in the        fast lookup table. We know that there is at least one byte left in the
3194        subject. */        subject. */
3195    
3196        if (fc < 128)        if (fc < 128)
3197          {          {
3198          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3199                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3200            ecode++;
3201            eptr++;
3202          }          }
3203    
3204        /* Otherwise we must pick up the subject character. Note that we cannot        /* Otherwise we must pick up the subject character. Note that we cannot
# Line 3087  for (;;) Line 3224  for (;;)
3224          }          }
3225        }        }
3226      else      else
3227  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3228    
3229      /* Non-UTF-8 mode */      /* Not UTF mode */
3230        {        {
3231        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3232              != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3233          eptr++;
3234        ecode += 2;        ecode += 2;
3235        }        }
3236      break;      break;
# Line 3101  for (;;) Line 3240  for (;;)
3240      case OP_EXACT:      case OP_EXACT:
3241      case OP_EXACTI:      case OP_EXACTI:
3242      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3243      ecode += 3;      ecode += 1 + IMM2_SIZE;
3244      goto REPEATCHAR;      goto REPEATCHAR;
3245    
3246      case OP_POSUPTO:      case OP_POSUPTO:
# Line 3116  for (;;) Line 3255  for (;;)
3255      min = 0;      min = 0;
3256      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3257      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3258      ecode += 3;      ecode += 1 + IMM2_SIZE;
3259      goto REPEATCHAR;      goto REPEATCHAR;
3260    
3261      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3164  for (;;) Line 3303  for (;;)
3303      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3304    
3305      REPEATCHAR:      REPEATCHAR:
3306  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3307      if (utf8)      if (utf)
3308        {        {
3309        length = 1;        length = 1;
3310        charptr = ecode;        charptr = ecode;
# Line 3181  for (;;) Line 3320  for (;;)
3320          unsigned int othercase;          unsigned int othercase;
3321          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3322              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3323            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3324          else oclength = 0;          else oclength = 0;
3325  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3326    
3327          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3328            {            {
3329            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3330              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3331  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3332            else if (oclength > 0 &&            else if (oclength > 0 &&
3333                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3334                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3335  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3336            else            else
3337              {              {
# Line 3211  for (;;) Line 3350  for (;;)
3350              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3351              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3352              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3353                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3354  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3355              else if (oclength > 0 &&              else if (oclength > 0 &&
3356                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3357                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3358  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3359              else              else
3360                {                {
# Line 3232  for (;;) Line 3371  for (;;)
3371            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3372              {              {
3373              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3374                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3375  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3376              else if (oclength > 0 &&              else if (oclength > 0 &&
3377                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3378                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3379  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3380              else              else
3381                {                {
# Line 3268  for (;;) Line 3407  for (;;)
3407        value of fc will always be < 128. */        value of fc will always be < 128. */
3408        }        }
3409      else      else
3410  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3411          /* When not in UTF-8 mode, load a single-byte character. */
3412      /* When not in UTF-8 mode, load a single-byte character. */        fc = *ecode++;
3413    
3414      fc = *ecode++;      /* The value of fc at this point is always one character, though we may
3415        or may not be in UTF mode. The code is duplicated for the caseless and
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3416      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3417      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3418      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3288  for (;;) Line 3425  for (;;)
3425    
3426      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3427        {        {
3428        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3429          /* fc must be < 128 if UTF is enabled. */
3430          foc = md->fcc[fc];
3431    #else
3432    #ifdef SUPPORT_UTF
3433    #ifdef SUPPORT_UCP
3434          if (utf && fc > 127)
3435            foc = UCD_OTHERCASE(fc);
3436    #else
3437          if (utf && fc > 127)
3438            foc = fc;
3439    #endif /* SUPPORT_UCP */
3440          else
3441    #endif /* SUPPORT_UTF */
3442            foc = TABLE_GET(fc, md->fcc, fc);
3443    #endif /* COMPILE_PCRE8 */
3444    
3445        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3446          {          {
3447          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
# Line 3296  for (;;) Line 3449  for (;;)
3449            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3450            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3451            }            }
3452          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3453            eptr++;
3454          }          }
3455        if (min == max) continue;        if (min == max) continue;
3456        if (minimize)        if (minimize)
# Line 3311  for (;;) Line 3465  for (;;)
3465              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3466              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3467              }              }
3468            if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3469              eptr++;
3470            }            }
3471          /* Control never gets here */          /* Control never gets here */
3472          }          }
# Line 3325  for (;;) Line 3480  for (;;)
3480              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3481              break;              break;
3482              }              }
3483            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3484            eptr++;            eptr++;
3485            }            }
3486    
# Line 3410  for (;;) Line 3565  for (;;)
3565        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3566        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3567        }        }
3568      ecode++;  #ifdef SUPPORT_UTF
3569      GETCHARINCTEST(c, eptr);      if (utf)
     if (op == OP_NOTI)         /* The caseless case */  
3570        {        {
3571  #ifdef SUPPORT_UTF8        register unsigned int ch, och;
3572        if (c < 256)  
3573  #endif        ecode++;
3574        c = md->lcc[c];        GETCHARINC(ch, ecode);
3575        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        GETCHARINC(c, eptr);
3576    
3577          if (op == OP_NOT)
3578            {
3579            if (ch == c) RRETURN(MATCH_NOMATCH);
3580            }
3581          else
3582            {
3583    #ifdef SUPPORT_UCP
3584            if (ch > 127)
3585              och = UCD_OTHERCASE(ch);
3586    #else
3587            if (ch > 127)
3588              och = ch;
3589    #endif /* SUPPORT_UCP */
3590            else
3591              och = TABLE_GET(ch, md->fcc, ch);
3592            if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3593            }
3594        }        }
3595      else    /* Caseful */      else
3596    #endif
3597        {        {
3598        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        register unsigned int ch = ecode[1];
3599          c = *eptr++;
3600          if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3601            RRETURN(MATCH_NOMATCH);
3602          ecode += 2;
3603        }        }
3604      break;      break;
3605    
# Line 3436  for (;;) Line 3613  for (;;)
3613      case OP_NOTEXACT:      case OP_NOTEXACT:
3614      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3615      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3616      ecode += 3;      ecode += 1 + IMM2_SIZE;
3617      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3618    
3619      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3446  for (;;) Line 3623  for (;;)
3623      min = 0;      min = 0;
3624      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3625      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3626      ecode += 3;      ecode += 1 + IMM2_SIZE;
3627      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3628    
3629      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3478  for (;;) Line 3655  for (;;)
3655      possessive = TRUE;      possessive = TRUE;
3656      min = 0;      min = 0;
3657      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3658      ecode += 3;      ecode += 1 + IMM2_SIZE;
3659      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3660    
3661      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3502  for (;;) Line 3679  for (;;)
3679      /* Common code for all repeated single-byte matches. */      /* Common code for all repeated single-byte matches. */
3680    
3681      REPEATNOTCHAR:      REPEATNOTCHAR:
3682      fc = *ecode++;      GETCHARINCTEST(fc, ecode);
3683    
3684      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
3685      since matching characters is likely to be quite common. First, ensure the      since matching characters is likely to be quite common. First, ensure the
# Line 3517  for (;;) Line 3694  for (;;)
3694    
3695      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3696        {        {
3697        fc = md->lcc[fc];  #ifdef SUPPORT_UTF
3698    #ifdef SUPPORT_UCP
3699          if (utf && fc > 127)
3700            foc = UCD_OTHERCASE(fc);
3701    #else
3702          if (utf && fc > 127)
3703            foc = fc;
3704    #endif /* SUPPORT_UCP */
3705          else
3706    #endif /* SUPPORT_UTF */
3707            foc = TABLE_GET(fc, md->fcc, fc);
3708    
3709  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3710        /* UTF-8 mode */        if (utf)
       if (utf8)  
3711          {          {
3712          register unsigned int d;          register unsigned int d;
3713          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3532  for (;;) Line 3718  for (;;)
3718              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3719              }              }
3720            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3721            if (d < 256) d = md->lcc[d];            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) RRETURN(MATCH_NOMATCH);  
3722            }            }
3723          }          }
3724        else        else
3725  #endif  #endif
3726          /* Not UTF mode */
       /* Not UTF-8 mode */  
3727          {          {
3728          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3729            {            {
# Line 3548  for (;;) Line 3732  for (;;)
3732              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3733              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3734              }              }
3735            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3736              eptr++;
3737            }            }
3738          }          }
3739    
# Line 3556  for (;;) Line 3741  for (;;)
3741    
3742        if (minimize)        if (minimize)
3743          {          {
3744  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3745          /* UTF-8 mode */          if (utf)
         if (utf8)  
3746            {            {
3747            register unsigned int d;            register unsigned int d;
3748            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3572  for (;;) Line 3756  for (;;)
3756                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3757                }                }
3758              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3759              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) RRETURN(MATCH_NOMATCH);  
3760              }              }
3761            }            }
3762          else          else
3763  #endif  #endif
3764          /* Not UTF-8 mode */          /* Not UTF mode */
3765            {            {
3766            for (fi = min;; fi++)            for (fi = min;; fi++)
3767              {              {
# Line 3590  for (;;) Line 3773  for (;;)
3773                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3774                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3775                }                }
3776              if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3777                eptr++;
3778              }              }
3779            }            }
3780          /* Control never gets here */          /* Control never gets here */
# Line 3602  for (;;) Line 3786  for (;;)
3786          {          {
3787          pp = eptr;          pp = eptr;
3788    
3789  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3790          /* UTF-8 mode */          if (utf)
         if (utf8)  
3791            {            {
3792            register unsigned int d;            register unsigned int d;
3793            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3616  for (;;) Line 3799  for (;;)
3799                break;                break;
3800                }                }
3801              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3802              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) break;
             if (fc == d) break;  
3803              eptr += len;              eptr += len;
3804              }              }
3805          if (possessive) continue;            if (possessive) continue;
3806          for(;;)            for(;;)
3807              {              {
3808              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3809              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3631  for (;;) Line 3813  for (;;)
3813            }            }
3814          else          else
3815  #endif  #endif
3816          /* Not UTF-8 mode */          /* Not UTF mode */
3817            {            {
3818            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3819              {              {
# Line 3640  for (;;) Line 3822  for (;;)
3822                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3823                break;                break;
3824                }                }
3825              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3826              eptr++;              eptr++;
3827              }              }
3828            if (possessive) continue;            if (possessive) continue;
# Line 3661  for (;;) Line 3843  for (;;)
3843    
3844      else      else
3845        {        {
3846  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3847        /* UTF-8 mode */        if (utf)
       if (utf8)  
3848          {          {
3849          register unsigned int d;          register unsigned int d;
3850          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3679  for (;;) Line 3860  for (;;)
3860          }          }
3861        else        else
3862  #endif  #endif
3863        /* Not UTF-8 mode */        /* Not UTF mode */
3864          {          {
3865          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3866            {            {
# Line 3696  for (;;) Line 3877  for (;;)
3877    
3878        if (minimize)        if (minimize)
3879          {          {
3880  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3881          /* UTF-8 mode */          if (utf)
         if (utf8)  
3882            {            {
3883            register unsigned int d;            register unsigned int d;
3884            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3717  for (;;) Line 3897  for (;;)
3897            }            }
3898          else          else
3899  #endif  #endif
3900          /* Not UTF-8 mode */          /* Not UTF mode */
3901            {            {
3902            for (fi = min;; fi++)            for (fi = min;; fi++)
3903              {              {
# Line 3741  for (;;) Line 3921  for (;;)
3921          {          {
3922          pp = eptr;          pp = eptr;
3923    
3924  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3925          /* UTF-8 mode */          if (utf)
         if (utf8)  
3926            {            {
3927            register unsigned int d;            register unsigned int d;
3928            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3769  for (;;) Line 3948  for (;;)
3948            }            }
3949          else          else
3950  #endif  #endif
3951          /* Not UTF-8 mode */          /* Not UTF mode */
3952            {            {
3953            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3954              {              {
# Line 3802  for (;;) Line 3981  for (;;)
3981      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3982      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3983      minimize = TRUE;      minimize = TRUE;
3984      ecode += 3;      ecode += 1 + IMM2_SIZE;
3985      goto REPEATTYPE;      goto REPEATTYPE;
3986    
3987      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3810  for (;;) Line 3989  for (;;)
3989      min = 0;      min = 0;
3990      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3991      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3992      ecode += 3;      ecode += 1 + IMM2_SIZE;
3993      goto REPEATTYPE;      goto REPEATTYPE;
3994    
3995      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3838  for (;;) Line 4017  for (;;)
4017      possessive = TRUE;      possessive = TRUE;
4018      min = 0;      min = 0;
4019      max = GET2(ecode, 1);      max = GET2(ecode, 1);
4020      ecode += 3;      ecode += 1 + IMM2_SIZE;
4021      goto REPEATTYPE;      goto REPEATTYPE;
4022    
4023      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 4045  for (;;) Line 4224  for (;;)
4224            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4225              {              {
4226              int len = 1;              int len = 1;
4227              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4228              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4229              eptr += len;              eptr += len;
4230              }              }
4231              CHECK_PARTIAL();
4232            }            }
4233          }          }
4234    
# Line 4057  for (;;) Line 4237  for (;;)
4237    
4238  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4239    
4240  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4241        if (utf8) switch(ctype)        if (utf) switch(ctype)
4242          {          {
4243          case OP_ANY:          case OP_ANY:
4244          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 4069  for (;;) Line 4249  for (;;)
4249              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4250              }              }
4251            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4252              if (md->partial != 0 &&
4253                  eptr + 1 >= md->end_subject &&
4254                  NLBLOCK->nltype == NLTYPE_FIXED &&
4255                  NLBLOCK->nllen == 2 &&
4256                  *eptr == NLBLOCK->nl[0])
4257                {
4258                md->hitend = TRUE;
4259                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4260                }
4261            eptr++;            eptr++;
4262            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4263            }            }
4264          break;          break;
4265    
# Line 4083  for (;;) Line 4272  for (;;)
4272              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4273              }              }
4274            eptr++;            eptr++;
4275            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4276            }            }
4277          break;          break;
4278    
# Line 4265  for (;;) Line 4454  for (;;)
4454              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4455              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4456              }              }
4457            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4458              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4459              eptr++;
4460            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4461            }            }
4462          break;          break;
# Line 4281  for (;;) Line 4471  for (;;)
4471              }              }
4472            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4473              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4474            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4475              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4476            }            }
4477          break;          break;
4478    
# Line 4293  for (;;) Line 4484  for (;;)
4484              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4485              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4486              }              }
4487            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4488              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4489              eptr++;
4490            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4491            }            }
4492          break;          break;
# Line 4309  for (;;) Line 4501  for (;;)
4501              }              }
4502            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4503              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4504            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4505              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4506            }            }
4507          break;          break;
4508    
# Line 4321  for (;;) Line 4514  for (;;)
4514              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4515              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4516              }              }
4517            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4518              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4519              eptr++;
4520            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4521            }            }
4522          break;          break;
# Line 4332  for (;;) Line 4526  for (;;)
4526          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4527    
4528        else        else
4529  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4530    
4531        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4532        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4348  for (;;) Line 4542  for (;;)
4542              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4543              }              }
4544            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4545              if (md->partial != 0 &&
4546                  eptr + 1 >= md->end_subject &&
4547                  NLBLOCK->nltype == NLTYPE_FIXED &&
4548                  NLBLOCK->nllen == 2 &&
4549                  *eptr == NLBLOCK->nl[0])
4550                {
4551                md->hitend = TRUE;
4552                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4553                }
4554            eptr++;            eptr++;
4555            }            }
4556          break;          break;
# Line 4392  for (;;) Line 4595  for (;;)
4595              case 0x000b:              case 0x000b:
4596              case 0x000c:              case 0x000c:
4597              case 0x0085:              case 0x0085:
4598    #ifdef COMPILE_PCRE16
4599                case 0x2028:
4600                case 0x2029:
4601    #endif
4602              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4603              break;              break;
4604              }              }
# Line 4412  for (;;) Line 4619  for (;;)
4619              case 0x09:      /* HT */              case 0x09:      /* HT */
4620              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4621              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4622    #ifdef COMPILE_PCRE16
4623                case 0x1680:    /* OGHAM SPACE MARK */
4624                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4625                case 0x2000:    /* EN QUAD */
4626                case 0x2001:    /* EM QUAD */
4627                case 0x2002:    /* EN SPACE */
4628                case 0x2003:    /* EM SPACE */
4629                case 0x2004:    /* THREE-PER-EM SPACE */
4630                case 0x2005:    /* FOUR-PER-EM SPACE */
4631                case 0x2006:    /* SIX-PER-EM SPACE */
4632                case 0x2007:    /* FIGURE SPACE */
4633                case 0x2008:    /* PUNCTUATION SPACE */
4634                case 0x2009:    /* THIN SPACE */
4635                case 0x200A:    /* HAIR SPACE */
4636                case 0x202f:    /* NARROW NO-BREAK SPACE */
4637                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4638                case 0x3000:    /* IDEOGRAPHIC SPACE */
4639    #endif
4640              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4641              }              }
4642            }            }
# Line 4431  for (;;) Line 4656  for (;;)
4656              case 0x09:      /* HT */              case 0x09:      /* HT */
4657              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4658              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4659    #ifdef COMPILE_PCRE16
4660                case 0x1680:    /* OGHAM SPACE MARK */
4661                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4662                case 0x2000:    /* EN QUAD */
4663                case 0x2001:    /* EM QUAD */
4664                case 0x2002:    /* EN SPACE */
4665                case 0x2003:    /* EM SPACE */
4666                case 0x2004:    /* THREE-PER-EM SPACE */
4667                case 0x2005:    /* FOUR-PER-EM SPACE */
4668                case 0x2006:    /* SIX-PER-EM SPACE */
4669                case 0x2007:    /* FIGURE SPACE */
4670                case 0x2008:    /* PUNCTUATION SPACE */
4671                case 0x2009:    /* THIN SPACE */
4672                case 0x200A:    /* HAIR SPACE */
4673                case 0x202f:    /* NARROW NO-BREAK SPACE */
4674                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4675                case 0x3000:    /* IDEOGRAPHIC SPACE */
4676    #endif
4677              break;              break;
4678              }              }
4679            }            }
# Line 4452  for (;;) Line 4695  for (;;)
4695              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4696              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4697              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4698    #ifdef COMPILE_PCRE16
4699                case 0x2028:    /* LINE SEPARATOR */
4700                case 0x2029:    /* PARAGRAPH SEPARATOR */
4701    #endif
4702              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4703              }              }
4704            }            }
# Line 4473  for (;;) Line 4720  for (;;)
4720              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4721              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4722              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4723    #ifdef COMPILE_PCRE16
4724                case 0x2028:    /* LINE SEPARATOR */
4725                case 0x2029:    /* PARAGRAPH SEPARATOR */
4726    #endif
4727              break;              break;
4728              }              }
4729            }            }
# Line 4486  for (;;) Line 4737  for (;;)
4737              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4738              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4739              }              }
4740            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4741                RRETURN(MATCH_NOMATCH);
4742              eptr++;
4743            }            }
4744          break;          break;
4745    
# Line 4498  for (;;) Line 4751  for (;;)
4751              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4752              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4753              }              }
4754            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4755                RRETURN(MATCH_NOMATCH);
4756              eptr++;
4757            }            }
4758          break;          break;
4759    
# Line 4510  for (;;) Line 4765  for (;;)
4765              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4766              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4767              }              }
4768            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4769                RRETURN(MATCH_NOMATCH);
4770              eptr++;
4771            }            }
4772          break;          break;
4773    
# Line 4522  for (;;) Line 4779  for (;;)
4779              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4780              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4781              }              }
4782            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4783                RRETURN(MATCH_NOMATCH);
4784              eptr++;
4785            }            }
4786          break;          break;
4787    
# Line 4534  for (;;) Line 4793  for (;;)
4793              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4794              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4795              }              }
4796            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4797              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4798              eptr++;
4799            }            }
4800          break;          break;
4801    
# Line 4547  for (;;) Line 4807  for (;;)
4807              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4808              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4809              }              }
4810            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4811              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4812              eptr++;
4813            }            }
4814          break;          break;
4815    
# Line 4766  for (;;) Line 5027  for (;;)
5027            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5028              {              {
5029              int len = 1;              int len = 1;
5030              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5031              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5032              eptr += len;              eptr += len;
5033              }              }
5034              CHECK_PARTIAL();
5035            }            }
5036          }          }
5037        else        else
5038  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
5039    
5040  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5041        /* UTF-8 mode */        if (utf)
       if (utf8)  
5042          {          {
5043          for (fi = min;; fi++)          for (fi = min;; fi++)
5044            {            {
# Line 4794  for (;;) Line 5055  for (;;)
5055            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
5056            switch(ctype)            switch(ctype)
5057              {              {
5058              case OP_ANY:        /* This is the non-NL case */              case OP_ANY:               /* This is the non-NL case */
5059                if (md->partial != 0 &&    /* Take care with CRLF partial */
5060                    eptr >= md->end_subject &&
5061                    NLBLOCK->nltype == NLTYPE_FIXED &&
5062                    NLBLOCK->nllen == 2 &&
5063                    c == NLBLOCK->nl[0])
5064                  {
5065                  md->hitend = TRUE;
5066                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5067                  }
5068                break;
5069    
5070              case OP_ALLANY:              case OP_ALLANY:
5071              case OP_ANYBYTE:              case OP_ANYBYTE:
5072              break;              break;
# Line 4919  for (;;) Line 5191  for (;;)
5191              break;              break;
5192    
5193              case OP_WHITESPACE:              case OP_WHITESPACE:
5194              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5195                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5196              break;              break;
5197    
# Line 4940  for (;;) Line 5212  for (;;)
5212          }          }
5213        else        else
5214  #endif  #endif
5215        /* Not UTF-8 mode */        /* Not UTF mode */
5216          {          {
5217          for (fi = min;; fi++)          for (fi = min;; fi++)
5218            {            {
# Line 4957  for (;;) Line 5229  for (;;)
5229            c = *eptr++;            c = *eptr++;
5230            switch(ctype)            switch(ctype)
5231              {              {
5232              case OP_ANY:     /* This is the non-NL case */              case OP_ANY:               /* This is the non-NL case */
5233                if (md->partial != 0 &&    /* Take care with CRLF partial */
5234                    eptr >= md->end_subject &&
5235                    NLBLOCK->nltype == NLTYPE_FIXED &&
5236                    NLBLOCK->nllen == 2 &&
5237                    c == NLBLOCK->nl[0])
5238                  {
5239                  md->hitend = TRUE;
5240                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5241                  }
5242                break;
5243    
5244              case OP_ALLANY:              case OP_ALLANY:
5245              case OP_ANYBYTE:              case OP_ANYBYTE:
5246              break;              break;
# Line 4976  for (;;) Line 5259  for (;;)
5259                case 0x000b:                case 0x000b:
5260                case 0x000c:                case 0x000c:
5261                case 0x0085:                case 0x0085:
5262    #ifdef COMPILE_PCRE16
5263                  case 0x2028:
5264                  case 0x2029:
5265    #endif
5266                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5267                break;                break;
5268                }                }
# Line 4988  for (;;) Line 5275  for (;;)
5275                case 0x09:      /* HT */                case 0x09:      /* HT */
5276                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
5277                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
5278    #ifdef COMPILE_PCRE16
5279                  case 0x1680:    /* OGHAM SPACE MARK */
5280                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5281                  case 0x2000:    /* EN QUAD */
5282                  case 0x2001:    /* EM QUAD */
5283                  case 0x2002:    /* EN SPACE */
5284                  case 0x2003:    /* EM SPACE */
5285                  case 0x2004:    /* THREE-PER-EM SPACE */
5286                  case 0x2005:    /* FOUR-PER-EM SPACE */
5287                  case 0x2006:    /* SIX-PER-EM SPACE */
5288                  case 0x2007:    /* FIGURE SPACE */
5289                  case 0x2008:    /* PUNCTUATION SPACE */
5290                  case 0x2009:    /* THIN SPACE */
5291                  case 0x200A:    /* HAIR SPACE */
5292                  case 0x202f:    /* NARROW NO-BREAK SPACE */
5293                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5294                  case 0x3000:    /* IDEOGRAPHIC SPACE */
5295    #endif
5296                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5297                }                }
5298              break;              break;
# Line 4999  for (;;) Line 5304  for (;;)
5304                case 0x09:      /* HT */                case 0x09:      /* HT */
5305                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
5306                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
5307    #ifdef COMPILE_PCRE16
5308                  case 0x1680:    /* OGHAM SPACE MARK */
5309                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5310                  case 0x2000:    /* EN QUAD */
5311                  case 0x2001:    /* EM QUAD */
5312                  case 0x2002:    /* EN SPACE */
5313                  case 0x2003:    /* EM SPACE */
5314                  case 0x2004:    /* THREE-PER-EM SPACE */
5315                  case 0x2005:    /* FOUR-PER-EM SPACE */
5316                  case 0x2006:    /* SIX-PER-EM SPACE */
5317                  case 0x2007:    /* FIGURE SPACE */
5318                  case 0x2008:    /* PUNCTUATION SPACE */
5319                  case 0x2009:    /* THIN SPACE */
5320                  case 0x200A:    /* HAIR SPACE */
5321                  case 0x202f:    /* NARROW NO-BREAK SPACE */
5322                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5323                  case 0x3000:    /* IDEOGRAPHIC SPACE */
5324    #endif
5325                break;                break;
5326                }                }
5327              break;              break;
# Line 5012  for (;;) Line 5335  for (;;)
5335                case 0x0c:      /* FF */                case 0x0c:      /* FF */
5336                case 0x0d:      /* CR */                case 0x0d:      /* CR */
5337                case 0x85:      /* NEL */                case 0x85:      /* NEL */
5338    #ifdef COMPILE_PCRE16
5339                  case 0x2028:    /* LINE SEPARATOR */
5340                  case 0x2029:    /* PARAGRAPH SEPARATOR */
5341    #endif
5342                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5343                }                }
5344              break;              break;
# Line 5025  for (;;) Line 5352  for (;;)
5352                case 0x0c:      /* FF */                case 0x0c:      /* FF */
5353                case 0x0d:      /* CR */                case 0x0d:      /* CR */
5354                case 0x85:      /* NEL */                case 0x85:      /* NEL */
5355    #ifdef COMPILE_PCRE16
5356                  case 0x2028:    /* LINE SEPARATOR */
5357                  case 0x2029:    /* PARAGRAPH SEPARATOR */
5358    #endif
5359                break;                break;
5360                }                }
5361              break;              break;
5362    
5363              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
5364              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5365              break;              break;
5366    
5367              case OP_DIGIT:              case OP_DIGIT:
5368              if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5369              break;              break;
5370    
5371              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
5372              if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5373              break;              break;
5374    
5375              case OP_WHITESPACE:              case OP_WHITESPACE:
5376              if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5377              break;              break;
5378    
5379              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
5380              if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5381              break;              break;
5382    
5383              case OP_WORDCHAR:              case OP_WORDCHAR:
5384              if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5385              break;              break;
5386    
5387              default:              default:
# Line 5239  for (;;) Line 5570  for (;;)
5570            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5571            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5572            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5573            if (utf8) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5574            }            }
5575          }          }
5576    
# Line 5256  for (;;) Line 5587  for (;;)
5587              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5588              break;              break;
5589              }              }
5590            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5591            if (UCD_CATEGORY(c) == ucp_M) break;            if (UCD_CATEGORY(c) == ucp_M) break;
5592            eptr += len;            eptr += len;
5593            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5594              {              {
5595              len = 1;              len = 1;
5596              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5597              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5598              eptr += len;              eptr += len;
5599              }              }
5600              CHECK_PARTIAL();
5601            }            }
5602    
5603          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 5279  for (;;) Line 5611  for (;;)
5611            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5612            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5613              {              {
5614              if (!utf8) c = *eptr; else              if (!utf) c = *eptr; else
5615                {                {
5616                BACKCHAR(eptr);                BACKCHAR(eptr);
5617                GETCHAR(c, eptr);                GETCHAR(c, eptr);
# Line 5293  for (;;) Line 5625  for (;;)
5625        else        else
5626  #endif   /* SUPPORT_UCP */  #endif   /* SUPPORT_UCP */
5627    
5628  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5629        /* UTF-8 mode */        if (utf)
   
       if (utf8)  
5630          {          {
5631          switch(ctype)          switch(ctype)
5632            {            {
# Line 5311  for (;;) Line 5641  for (;;)
5641                  break;                  break;
5642                  }                  }
5643                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5644                  if (md->partial != 0 &&    /* Take care with CRLF partial */
5645                      eptr + 1 >= md->end_subject &&
5646                      NLBLOCK->nltype == NLTYPE_FIXED &&
5647                      NLBLOCK->nllen == 2 &&
5648                      *eptr == NLBLOCK->nl[0])
5649                    {
5650                    md->hitend = TRUE;
5651                    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5652                    }
5653                eptr++;                eptr++;
5654                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5655                }                }
5656              }              }
5657    
# Line 5328  for (;;) Line 5667  for (;;)
5667                  break;                  break;
5668                  }                  }
5669                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5670                  if (md->partial != 0 &&    /* Take care with CRLF partial */
5671                      eptr + 1 >= md->end_subject &&
5672                      NLBLOCK->nltype == NLTYPE_FIXED &&
5673                      NLBLOCK->nllen == 2 &&
5674                      *eptr == NLBLOCK->nl[0])
5675                    {
5676                    md->hitend = TRUE;
5677                    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5678                    }
5679                eptr++;                eptr++;
5680                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5681                }                }
5682              }              }
5683            break;            break;
# Line 5345  for (;;) Line 5693  for (;;)
5693                  break;                  break;
5694                  }                  }
5695                eptr++;                eptr++;
5696                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5697                }                }
5698              }              }
5699            else            else
# Line 5578  for (;;) Line 5926  for (;;)
5926            }            }
5927          }          }
5928        else        else
5929  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5930          /* Not UTF mode */
       /* Not UTF-8 mode */  
5931          {          {
5932          switch(ctype)          switch(ctype)
5933            {            {
# Line 5593  for (;;) Line 5940  for (;;)
5940                break;                break;
5941                }                }
5942              if (IS_NEWLINE(eptr)) break;              if (IS_NEWLINE(eptr)) break;
5943                if (md->partial != 0 &&    /* Take care with CRLF partial */
5944                    eptr + 1 >= md->end_subject &&
5945                    NLBLOCK->nltype == NLTYPE_FIXED &&
5946                    NLBLOCK->nllen == 2 &&
5947                    *eptr == NLBLOCK->nl[0])
5948                  {
5949                  md->hitend = TRUE;
5950                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5951                  }
5952              eptr++;              eptr++;
5953              }              }
5954            break;            break;
# Line 5624  for (;;) Line 5980  for (;;)
5980                }                }
5981              else              else
5982                {                {
5983                if (c != 0x000a &&                if (c != 0x000a && (md->bsr_anycrlf ||
5984                    (md->bsr_anycrlf ||                  (c != 0x000b && c != 0x000c && c != 0x0085
5985                      (c != 0x000b && c != 0x000c && c != 0x0085)))  #ifdef COMPILE_PCRE16
5986                  break;                  && c != 0x2028 && c != 0x2029
5987    #endif
5988                    ))) break;
5989                eptr++;                eptr++;
5990                }                }
5991              }              }
# Line 5642  for (;;) Line 6000  for (;;)
6000                break;                break;
6001                }                }
6002              c = *eptr;              c = *eptr;
6003              if (c == 0x09 || c == 0x20 || c == 0xa0) break;              if (c == 0x09 || c == 0x20 || c == 0xa0
6004    #ifdef COMPILE_PCRE16
6005                  || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
6006                  || c == 0x202f || c == 0x205f || c == 0x3000
6007    #endif
6008                  ) break;
6009              eptr++;              eptr++;
6010              }              }
6011            break;            break;
# Line 5656  for (;;) Line 6019  for (;;)
6019                break;                break;
6020                }                }
6021              c = *eptr;              c = *eptr;
6022              if (c != 0x09 && c != 0x20 && c != 0xa0) break;              if (c != 0x09 && c != 0x20 && c != 0xa0
6023    #ifdef COMPILE_PCRE16
6024                  && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
6025                  && c != 0x202f && c != 0x205f && c != 0x3000
6026    #endif
6027                  ) break;
6028              eptr++;              eptr++;
6029              }              }
6030            break;            break;
# Line 5670  for (;;) Line 6038  for (;;)
6038                break;                break;
6039                }                }
6040              c = *eptr;              c = *eptr;
6041              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
6042                break;  #ifdef COMPILE_PCRE16
6043                  || c == 0x2028 || c == 0x2029
6044    #endif
6045                  ) break;
6046              eptr++;              eptr++;
6047              }              }
6048            break;            break;
# Line 5685  for (;;) Line 6056  for (;;)
6056                break;                break;
6057                }                }
6058              c = *eptr;              c = *eptr;
6059              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
6060                break;  #ifdef COMPILE_PCRE16
6061                  && c != 0x2028 && c != 0x2029
6062    #endif
6063                  ) break;
6064              eptr++;              eptr++;
6065              }              }
6066            break;            break;
# Line 5699  for (;;) Line 6073  for (;;)
6073                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6074                break;                break;
6075                }                }
6076              if ((md->ctypes[*eptr] & ctype_digit) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6077              eptr++;              eptr++;
6078              }              }
6079            break;            break;
# Line 5712  for (;;) Line 6086  for (;;)
6086                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6087                break;                break;
6088                }                }
6089              if ((md->ctypes[*eptr] & ctype_digit) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6090              eptr++;              eptr++;
6091              }              }
6092            break;            break;
# Line 5725  for (;;) Line 6099  for (;;)
6099                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6100                break;                break;
6101                }                }
6102              if ((md->ctypes[*eptr] & ctype_space) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6103              eptr++;              eptr++;
6104              }              }
6105            break;            break;
# Line 5738  for (;;) Line 6112  for (;;)
6112                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6113                break;                break;
6114                }                }
6115              if ((md->ctypes[*eptr] & ctype_space) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6116              eptr++;              eptr++;
6117              }              }
6118            break;            break;
# Line 5751  for (;;) Line 6125  for (;;)
6125                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6126                break;                break;
6127                }                }
6128              if ((md->ctypes[*eptr] & ctype_word) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6129              eptr++;              eptr++;
6130              }              }
6131            break;            break;
# Line 5764  for (;;) Line 6138  for (;;)
6138                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6139                break;                break;
6140                }                }
6141              if ((md->ctypes[*eptr] & ctype_word) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6142              eptr++;              eptr++;
6143              }              }
6144            break;            break;
# Line 5827  switch (frame->Xwhere) Line 6201  switch (frame->Xwhere)
6201    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6202    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6203    LBL(65) LBL(66)    LBL(65) LBL(66)
6204  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6205    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(21)
6206    #endif
6207    #ifdef SUPPORT_UTF
6208      LBL(16) LBL(18) LBL(20)
6209      LBL(22) LBL(23) LBL(28) LBL(30)
6210    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6211  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6212    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6213    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62)
6214  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6215  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
6216    default:    default:
6217    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6218    
6219    printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6220    
6221    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6222    }    }
6223  #undef LBL  #undef LBL
# Line 5923  Returns:          > 0 => success; value Line 6304  Returns:          > 0 => success; value
6304                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
6305  */  */
6306    
6307    #ifdef COMPILE_PCRE8
6308  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6309  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6310    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6311    int offsetcount)    int offsetcount)
6312    #else
6313    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6314    pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6315      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6316      int offsetcount)
6317    #endif
6318  {  {
6319  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
 int first_byte = -1;  
 int req_byte = -1;  
 int req_byte2 = -1;  
6320  int newline;  int newline;
6321  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
6322  BOOL anchored;  BOOL anchored;
6323  BOOL startline;  BOOL startline;
6324  BOOL firstline;  BOOL firstline;
6325  BOOL first_byte_caseless = FALSE;  BOOL utf;
6326  BOOL req_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
6327  BOOL utf8;  BOOL has_req_char = FALSE;
6328    pcre_uchar first_char = 0;
6329    pcre_uchar first_char2 = 0;
6330    pcre_uchar req_char = 0;
6331    pcre_uchar req_char2 = 0;
6332  match_data match_block;  match_data match_block;
6333  match_data *md = &match_block;  match_data *md = &match_block;
6334  const uschar *tables;  const pcre_uint8 *tables;
6335  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
6336  USPTR start_match = (USPTR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6337  USPTR end_subject;  PCRE_PUCHAR end_subject;
6338  USPTR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6339  USPTR req_byte_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6340    
 pcre_study_data internal_study;  
6341  const pcre_study_data *study;  const pcre_study_data *study;
6342    const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6343    
6344  real_pcre internal_re;  /* Check for the special magic call that measures the size of the stack used
6345  const real_pcre *external_re = (const real_pcre *)argument_re;  per recursive call of match(). Without the funny casting for sizeof, a Windows
6346  const real_pcre *re = external_re;  compiler gave this error: "unary minus operator applied to unsigned type,
6347    result still unsigned". Hopefully the cast fixes that. */
6348    
6349    if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6350        start_offset == -999)
6351    #ifdef NO_RECURSE
6352      return -((int)sizeof(heapframe));
6353    #else
6354      return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6355    #endif
6356    
6357  /* Plausibility checks */  /* Plausibility checks */
6358    
6359  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6360  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6361     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6362  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6363  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6364    
6365    /* Check that the first field in the block is the magic number. If it is not,
6366    return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6367    REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6368    means that the pattern is likely compiled with different endianness. */
6369    
6370    if (re->magic_number != MAGIC_NUMBER)
6371      return re->magic_number == REVERSED_MAGIC_NUMBER?
6372        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6373    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6374    
6375  /* These two settings are used in the code for checking a UTF-8 string that  /* These two settings are used in the code for checking a UTF-8 string that
6376  follows immediately afterwards. Other values in the md block are used only  follows immediately afterwards. Other values in the md block are used only
6377  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
6378  so they are set up later. */  so they are set up later. */
6379    
6380  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6381    utf = md->utf = (re->options & PCRE_UTF8) != 0;
6382  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6383                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6384    
6385  /* Check a UTF-8 string if required. Pass back the character offset and error  /* Check a UTF-8 string if required. Pass back the character offset and error
6386  code for an invalid string if a results vector is available. */  code for an invalid string if a results vector is available. */
6387    
6388  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6389  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6390    {    {
6391    int erroroffset;    int erroroffset;
6392    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6393    if (errorcode != 0)    if (errorcode != 0)
6394      {      {
6395      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 5988  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6397  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6397        offsets[0] = erroroffset;        offsets[0] = erroroffset;
6398        offsets[1] = errorcode;        offsets[1] = errorcode;
6399        }        }
6400    #ifdef COMPILE_PCRE16
6401        return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6402          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6403    #else
6404      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6405        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6406    #endif
6407      }      }
6408    
6409    /* Check that a start_offset points to the start of a UTF-8 character. */    /* Check that a start_offset points to the start of a UTF character. */
6410    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
6411        (((USPTR)subject)[start_offset] & 0xc0) == 0x80)        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6412      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
6413    }    }
6414  #endif  #endif
# Line 6002  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6416  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6416  /* If the pattern was successfully studied with JIT support, run the JIT  /* If the pattern was successfully studied with JIT support, run the JIT
6417  executable instead of the rest of this function. Most options must be set at  executable instead of the rest of this function. Most options must be set at
6418  compile time for the JIT code to be usable. Fallback to the normal code path if  compile time for the JIT code to be usable. Fallback to the normal code path if
6419  an unsupported flag is set. In particular, JIT does not support partial  an unsupported flag is set. */
 matching. */  
6420    
6421  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
6422  if (extra_data != NULL  if (extra_data != NULL
6423      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6424                                 PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6425      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
     && (extra_data->flags & PCRE_EXTRA_TABLES) == 0  
6426      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6427                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6428    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,                      PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6429      start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)    {
6430      rc = PRIV(jit_exec)(re, extra_data->executable_jit,
6431        (const pcre_uchar *)subject, length, start_offset, options,
6432        ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6433      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6434    
6435      /* PCRE_ERROR_NULL means that the selected normal or partial matching
6436      mode is not compiled. In this case we simply fallback to interpreter. */
6437    
6438      if (rc != PCRE_ERROR_NULL) return rc;
6439      }
6440  #endif  #endif
6441    
6442  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
6443  numbers associated with a given name, for condition testing. */  numbers associated with a given name, for condition testing. */
6444    
6445  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (pcre_uchar *)re + re->name_table_offset;
6446  md->name_count = re->name_count;  md->name_count = re->name_count;
6447  md->name_entry_size = re->name_entry_size;  md->name_entry_size = re->name_entry_size;
6448    
# Line 6034  md->callout_data = NULL; Line 6456  md->callout_data = NULL;
6456    
6457  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
6458    
6459  tables = external_re->tables;  tables = re->tables;
6460    
6461  if (extra_data != NULL)  if (extra_data != NULL)
6462    {    {
# Line 6054  if (extra_data != NULL) Line 6476  if (extra_data != NULL)
6476  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6477  in other programs later. */  in other programs later. */
6478    
6479  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
   
 /* Check that the first field in the block is the magic number. If it is not,  
 test for a regex that was compiled on a host of opposite endianness. If this is  
 the case, flipped values are put in internal_re and internal_study if there was  
 study data too. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   {  
   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);  
   if (re == NULL) return PCRE_ERROR_BADMAGIC;  
   if (study != NULL) study = &internal_study;  
   }  
6480    
6481  /* Set up other data */  /* Set up other data */
6482    
# Line 6076  firstline = (re->options & PCRE_FIRSTLIN Line 6486  firstline = (re->options & PCRE_FIRSTLIN
6486    
6487  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6488    
6489  md->start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6490    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6491    
6492  md->start_subject = (USPTR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
6493  md->start_offset = start_offset;  md->start_offset = start_offset;
6494  md->end_subject = md->start_subject + length;  md->end_subject = md->start_subject + length;
6495  end_subject = md->end_subject;  end_subject = md->end_subject;
# Line 6104  md->recursive = NULL; Line 6514  md->recursive = NULL;
6514  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6515    
6516  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6517    md->fcc = tables + fcc_offset;
6518  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
6519    
6520  /* Handle different \R options. */  /* Handle different \R options. */
# Line 6190  arg_offset_max = (2*ocount)/3; Line 6601  arg_offset_max = (2*ocount)/3;
6601  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6602    {    {
6603    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
6604    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6605    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6606    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
6607    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
# Line 6217  if (md->offset_vector != NULL) Line 6628  if (md->offset_vector != NULL)
6628    md->offset_vector[0] = md->offset_vector[1] = -1;    md->offset_vector[0] = md->offset_vector[1] = -1;
6629    }    }
6630    
6631  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_char value is
6632  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
6633  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
6634  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
# Line 6227  if (!anchored) Line 6638  if (!anchored)
6638    {    {
6639    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6640      {      {
6641      first_byte = re->first_byte & 255;      has_first_char = TRUE;
6642      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
6643        first_byte = md->lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6644          {
6645          first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6646    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6647          if (utf && first_char > 127)
6648            first_char2 = UCD_OTHERCASE(first_char);
6649    #endif
6650          }
6651      }      }
6652    else    else
6653      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 6242  character" set. */ Line 6660  character" set. */
6660    
6661  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6662    {    {
6663    req_byte = re->req_byte & 255;    has_req_char = TRUE;
6664    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
6665    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6666        {
6667        req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6668    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6669        if (utf && req_char > 127)
6670          req_char2 = UCD_OTHERCASE(req_char);
6671    #endif
6672        }
6673    }    }
6674    
6675    
   
   
6676  /* ==========================================================================*/  /* ==========================================================================*/
6677    
6678  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6257  the loop runs just once. */ Line 6680  the loop runs just once. */
6680    
6681  for(;;)  for(;;)
6682    {    {
6683    USPTR save_end_subject = end_subject;    PCRE_PUCHAR save_end_subject = end_subject;
6684    USPTR new_start_match;    PCRE_PUCHAR new_start_match;
6685    
6686    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6687    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
# Line 6268  for(;;) Line 6691  for(;;)
6691    
6692    if (firstline)    if (firstline)
6693      {      {
6694      USPTR t = start_match;      PCRE_PUCHAR t = start_match;
6695  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6696      if (utf8)      if (utf)
6697        {        {
6698        while (t < md->end_subject && !IS_NEWLINE(t))        while (t < md->end_subject && !IS_NEWLINE(t))
6699          {          {
6700          t++;          t++;
6701          while (t < end_subject && (*t & 0xc0) == 0x80) t++;          ACROSSCHAR(t < end_subject, *t, t++);
6702          }          }
6703        }        }
6704      else      else
# Line 6292  for(;;) Line 6715  for(;;)
6715    
6716    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6717      {      {
6718      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first char if there is one. */
6719    
6720      if (first_byte >= 0)      if (has_first_char)
6721        {        {
6722        if (first_byte_caseless)        if (first_char != first_char2)
6723          while (start_match < end_subject && md->lcc[*start_match] != first_byte)          while (start_match < end_subject &&
6724                *start_match != first_char && *start_match != first_char2)
6725            start_match++;            start_match++;
6726        else        else
6727          while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && *start_match != first_char)
6728            start_match++;            start_match++;
6729        }        }
6730    
# Line 6310  for(;;) Line 6734  for(;;)
6734        {        {
6735        if (start_match > md->start_subject + start_offset)        if (start_match > md->start_subject + start_offset)
6736          {          {
6737  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6738          if (utf8)          if (utf)
6739            {            {
6740            while (start_match < end_subject && !WAS_NEWLINE(start_match))            while (start_match < end_subject && !WAS_NEWLINE(start_match))
6741              {              {
6742              start_match++;              start_match++;
6743              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              ACROSSCHAR(start_match < end_subject, *start_match,
6744                start_match++;                start_match++);
6745              }              }
6746            }            }
6747          else          else
# Line 6344  for(;;) Line 6768  for(;;)
6768        while (start_match < end_subject)        while (start_match < end_subject)
6769          {          {
6770          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6771    #ifndef COMPILE_PCRE8
6772            if (c > 255) c = 255;
6773    #endif
6774          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6775            {            {
6776            start_match++;            start_match++;
6777  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6778            if (utf8)            /* In non 8-bit mode, the iteration will stop for
6779              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)            characters > 255 at the beginning or not stop at all. */
6780                start_match++;            if (utf)
6781                ACROSSCHAR(start_match < end_subject, *start_match,
6782                  start_match++);
6783  #endif  #endif
6784            }            }
6785          else break;          else break;
# Line 6379  for(;;) Line 6808  for(;;)
6808        break;        break;
6809        }        }
6810    
6811      /* If req_byte is set, we know that that character must appear in the      /* If req_char is set, we know that that character must appear in the
6812      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_char
6813      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
6814      This optimization can save a huge amount of backtracking in patterns with      This optimization can save a huge amount of backtracking in patterns with
6815      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
# Line 6393  for(;;) Line 6822  for(;;)
6822      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
6823      long. */      long. */
6824    
6825      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6826        {        {
6827        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);        register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6828    
6829        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
6830        place we found it at last time. */        place we found it at last time. */
6831    
6832        if (p > req_byte_ptr)        if (p > req_char_ptr)
6833          {          {
6834          if (req_byte_caseless)          if (req_char != req_char2)
6835            {            {
6836            while (p < end_subject)            while (p < end_subject)
6837              {              {
6838              register int pp = *p++;              register int pp = *p++;
6839              if (pp == req_byte || pp == req_byte2) { p--; break; }              if (pp == req_char || pp == req_char2) { p--; break; }
6840              }              }
6841            }            }
6842          else          else
6843            {            {
6844            while (p < end_subject)            while (p < end_subject)
6845              {              {
6846              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_char) { p--; break; }
6847              }              }
6848            }            }
6849    
# Line 6431  for(;;) Line 6860  for(;;)
6860          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
6861          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
6862    
6863          req_byte_ptr = p;          req_char_ptr = p;
6864          }          }
6865        }        }
6866      }      }
# Line 6456  for(;;) Line 6885  for(;;)
6885    switch(rc)    switch(rc)
6886      {      {
6887      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6888      the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP      the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6889      entirely. The only way we can do that is to re-do the match at the same      entirely. The only way we can do that is to re-do the match at the same
6890      point, with a flag to force SKIP with an argument to be ignored. Just      point, with a flag to force SKIP with an argument to be ignored. Just
6891      treating this case as NOMATCH does not work because it does not check other      treating this case as NOMATCH does not work because it does not check other
6892      alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */      alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6893    
6894      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6895      new_start_match = start_match;      new_start_match = start_match;
6896      md->ignore_skip_arg = TRUE;      md->ignore_skip_arg = TRUE;
6897      break;      break;
6898    
6899      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is the
6900      same as the match we have just done, treat it as NOMATCH. */      same as the match we have just done, treat it as NOMATCH. */
# Line 6486  for(;;) Line 6915  for(;;)
6915      case MATCH_THEN:      case MATCH_THEN:
6916      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = FALSE;
6917      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6918  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6919      if (utf8)      if (utf)
6920        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)        ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6921          new_start_match++;          new_start_match++);
6922  #endif  #endif
6923      break;      break;
6924    
# Line 6527  for(;;) Line 6956  for(;;)
6956    
6957    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
6958    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
6959    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. In
6960      normal matching start_match will aways be greater than the first position at
6961      this stage, but a failed *SKIP can cause a return at the same point, which is
6962      why the first test exists. */
6963    
6964    if (start_match[-1] == CHAR_CR &&    if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6965          start_match[-1] == CHAR_CR &&
6966        start_match < end_subject &&        start_match < end_subject &&
6967        *start_match == CHAR_NL &&        *start_match == CHAR_NL &&
6968        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
# Line 6575  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7008  if (rc == MATCH_MATCH || rc == MATCH_ACC
7008        }        }
7009      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
7010      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
7011      (pcre_free)(md->offset_vector);      (PUBL(free))(md->offset_vector);
7012      }      }
7013    
7014    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
# Line 6614  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7047  if (rc == MATCH_MATCH || rc == MATCH_ACC
7047      }      }
7048    
7049    /* Return MARK data if requested */    /* Return MARK data if requested */
7050    
7051    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7052      *(extra_data->mark) = (unsigned char *)(md->mark);      *(extra_data->mark) = (pcre_uchar *)md->mark;
7053    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
7054    return rc;    return rc;
7055    }    }
# Line 6627  attempt has failed at all permitted star Line 7060  attempt has failed at all permitted star
7060  if (using_temporary_offsets)  if (using_temporary_offsets)
7061    {    {
7062    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
7063    (pcre_free)(md->offset_vector);    (PUBL(free))(md->offset_vector);
7064    }    }
7065    
7066  /* For anything other than nomatch or partial match, just return the code. */  /* For anything other than nomatch or partial match, just return the code. */
# Line 6646  if (start_partial != NULL) Line 7079  if (start_partial != NULL)
7079    md->mark = NULL;    md->mark = NULL;
7080    if (offsetcount > 1)    if (offsetcount > 1)
7081      {      {
7082      offsets[0] = (int)(start_partial - (USPTR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7083      offsets[1] = (int)(end_subject - (USPTR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7084      }      }
7085    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7086    }    }
# Line 6663  else Line 7096  else
7096  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
7097    
7098  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7099    *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);    *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7100  return rc;  return rc;
7101  }  }
7102    

Legend:
Removed from v.779  
changed lines
  Added in v.926

  ViewVC Help
Powered by ViewVC 1.1.5