/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 788 by ph10, Tue Dec 6 15:38:01 2011 UTC revision 916 by ph10, Wed Feb 15 09:50:53 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 113  Returns:     nothing Line 113  Returns:     nothing
113  */  */
114    
115  static void  static void
116  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117  {  {
118  unsigned int c;  unsigned int c;
119  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 140  Arguments: Line 140  Arguments:
140    md          points to match data block    md          points to match data block
141    caseless    TRUE if caseless    caseless    TRUE if caseless
142    
143  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      >= 0 the number of subject bytes matched
144                  -1 no match
145                  -2 partial match; always given if at end subject
146  */  */
147    
148  static int  static int
149  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
150    BOOL caseless)    BOOL caseless)
151  {  {
152  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
153  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
154    
155  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
156  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 163  pchars(p, length, FALSE, md); Line 165  pchars(p, length, FALSE, md);
165  printf("\n");  printf("\n");
166  #endif  #endif
167    
168  /* Always fail if reference not set (and not JavaScript compatible). */  /* Always fail if reference not set (and not JavaScript compatible - in that
169    case the length is passed as zero). */
170    
171  if (length < 0) return -1;  if (length < 0) return -1;
172    
# Line 173  ASCII characters. */ Line 176  ASCII characters. */
176    
177  if (caseless)  if (caseless)
178    {    {
179  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
180  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
181    if (md->utf8)    if (md->utf)
182      {      {
183      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
184      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
# Line 185  if (caseless) Line 188  if (caseless)
188      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
189      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
190    
191      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
192      while (p < endptr)      while (p < endptr)
193        {        {
194        int c, d;        int c, d;
195        if (eptr >= md->end_subject) return -1;        if (eptr >= md->end_subject) return -2;   /* Partial match */
196        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
197        GETCHARINC(d, p);        GETCHARINC(d, p);
198        if (c != d && c != UCD_OTHERCASE(d)) return -1;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
# Line 202  if (caseless) Line 205  if (caseless)
205    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206    is no UCP support. */    is no UCP support. */
207      {      {
     if (eptr + length > md->end_subject) return -1;  
208      while (length-- > 0)      while (length-- > 0)
209        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
210          if (eptr >= md->end_subject) return -2;   /* Partial match */
211          if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
212          p++;
213          eptr++;
214          }
215      }      }
216    }    }
217    
# Line 213  are in UTF-8 mode. */ Line 220  are in UTF-8 mode. */
220    
221  else  else
222    {    {
223    if (eptr + length > md->end_subject) return -1;    while (length-- > 0)
224    while (length-- > 0) if (*p++ != *eptr++) return -1;      {
225        if (eptr >= md->end_subject) return -2;   /* Partial match */
226        if (*p++ != *eptr++) return -1;
227        }
228    }    }
229    
230  return (int)(eptr - eptr_start);  return (int)(eptr - eptr_start);
# Line 307  argument of match(), which never changes Line 317  argument of match(), which never changes
317    
318  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
319    {\    {\
320    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
321    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
322    frame->Xwhere = rw; \    frame->Xwhere = rw; \
323    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
# Line 328  argument of match(), which never changes Line 338  argument of match(), which never changes
338    {\    {\
339    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
340    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
341    (pcre_stack_free)(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
342    if (frame != NULL)\    if (frame != NULL)\
343      {\      {\
344      rrc = ra;\      rrc = ra;\
# Line 345  typedef struct heapframe { Line 355  typedef struct heapframe {
355    
356    /* Function arguments that may change */    /* Function arguments that may change */
357    
358    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
359    const uschar *Xecode;    const pcre_uchar *Xecode;
360    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
361    int Xoffset_top;    int Xoffset_top;
362    eptrblock *Xeptrb;    eptrblock *Xeptrb;
363    unsigned int Xrdepth;    unsigned int Xrdepth;
364    
365    /* Function local variables */    /* Function local variables */
366    
367    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
369    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
370  #endif  #endif
371    USPTR Xdata;    PCRE_PUCHAR Xdata;
372    USPTR Xnext;    PCRE_PUCHAR Xnext;
373    USPTR Xpp;    PCRE_PUCHAR Xpp;
374    USPTR Xprev;    PCRE_PUCHAR Xprev;
375    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
376    
377    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
378    
# Line 375  typedef struct heapframe { Line 385  typedef struct heapframe {
385    int Xprop_value;    int Xprop_value;
386    int Xprop_fail_result;    int Xprop_fail_result;
387    int Xoclength;    int Xoclength;
388    uschar Xocchars[8];    pcre_uchar Xocchars[6];
389  #endif  #endif
390    
391    int Xcodelink;    int Xcodelink;
# Line 440  the subject. */ Line 450  the subject. */
450    
451    
452  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
453  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
454  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
455  made performance worse.  made performance worse.
456    
# Line 463  Returns:       MATCH_MATCH if matched Line 473  Returns:       MATCH_MATCH if matched
473  */  */
474    
475  static int  static int
476  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
477    int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
478      unsigned int rdepth)
479  {  {
480  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
481  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 473  so they can be ordinary variables in all Line 484  so they can be ordinary variables in all
484  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
485  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
486  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
487  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
488    
489  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
490  BOOL caseless;  BOOL caseless;
491  int condcode;  int condcode;
492    
493  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
494  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
495  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
496  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
497    the top-level on the stack rather than malloc-ing them all gives a performance
498    boost in many cases where there is not much "recursion". */
499    
500  #ifdef NO_RECURSE  #ifdef NO_RECURSE
501  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe frame_zero;
502  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
503  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
504    
505  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 513  HEAP_RECURSE: Line 526  HEAP_RECURSE:
526    
527  /* Ditto for the local variables */  /* Ditto for the local variables */
528    
529  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
530  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
531  #endif  #endif
532  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 571  declarations can be cut out in a block. Line 584  declarations can be cut out in a block.
584  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
585  to RMATCH(). */  to RMATCH(). */
586    
587  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
588  const uschar *charptr;  const pcre_uchar *charptr;
589  #endif  #endif
590  const uschar *callpat;  const pcre_uchar *callpat;
591  const uschar *data;  const pcre_uchar *data;
592  const uschar *next;  const pcre_uchar *next;
593  USPTR         pp;  PCRE_PUCHAR       pp;
594  const uschar *prev;  const pcre_uchar *prev;
595  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
596    
597  recursion_info new_recursive;  recursion_info new_recursive;
598    
# Line 592  int prop_type; Line 605  int prop_type;
605  int prop_value;  int prop_value;
606  int prop_fail_result;  int prop_fail_result;
607  int oclength;  int oclength;
608  uschar occhars[8];  pcre_uchar occhars[6];
609  #endif  #endif
610    
611  int codelink;  int codelink;
# Line 608  int save_offset1, save_offset2, save_off Line 621  int save_offset1, save_offset2, save_off
621  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
622    
623  eptrblock newptrb;  eptrblock newptrb;
624    
625    /* There is a special fudge for calling match() in a way that causes it to
626    measure the size of its basic stack frame when the stack is being used for
627    recursion. The second argument (ecode) being NULL triggers this behaviour. It
628    cannot normally ever be NULL. The return is the negated value of the frame
629    size. */
630    
631    if (ecode == NULL)
632      {
633      if (rdepth == 0)
634        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
635      else
636        {
637        int len = (char *)&rdepth - (char *)eptr;
638        return (len > 0)? -len : len;
639        }
640      }
641  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
642    
643  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 620  the alternative names that are used. */ Line 650  the alternative names that are used. */
650  #define code_offset   codelink  #define code_offset   codelink
651  #define condassert    condition  #define condassert    condition
652  #define matched_once  prev_is_word  #define matched_once  prev_is_word
653    #define foc           number
654    #define save_mark     data
655    
656  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
657  variables. */  variables. */
# Line 645  defined). However, RMATCH isn't like a f Line 677  defined). However, RMATCH isn't like a f
677  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
678  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
679    
680  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
681  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
682  #else  #else
683  utf8 = FALSE;  utf = FALSE;
684  #endif  #endif
685    
686  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 689  for (;;) Line 721  for (;;)
721      case OP_MARK:      case OP_MARK:
722      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
723      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
724      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
725        eptrb, RM55);        eptrb, RM55);
726      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
727           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 702  for (;;) Line 734  for (;;)
734      unaltered. */      unaltered. */
735    
736      else if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
737          strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
738        {        {
739        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
740        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
# Line 715  for (;;) Line 747  for (;;)
747      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
748    
749      case OP_COMMIT:      case OP_COMMIT:
750      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
751        eptrb, RM52);        eptrb, RM52);
752      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
753          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
# Line 726  for (;;) Line 758  for (;;)
758      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
759    
760      case OP_PRUNE:      case OP_PRUNE:
761      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
762        eptrb, RM51);        eptrb, RM51);
763      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
764      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 734  for (;;) Line 766  for (;;)
766      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
767      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
768      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
769      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
770        eptrb, RM56);        eptrb, RM56);
771      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
772           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 744  for (;;) Line 776  for (;;)
776      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
777    
778      case OP_SKIP:      case OP_SKIP:
779      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
780        eptrb, RM53);        eptrb, RM53);
781      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
782        RRETURN(rrc);        RRETURN(rrc);
# Line 758  for (;;) Line 790  for (;;)
790      case OP_SKIP_ARG:      case OP_SKIP_ARG:
791      if (md->ignore_skip_arg)      if (md->ignore_skip_arg)
792        {        {
793        ecode += _pcre_OP_lengths[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
794        break;        break;
795        }        }
796      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
797        eptrb, RM57);        eptrb, RM57);
798      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
799        RRETURN(rrc);        RRETURN(rrc);
# Line 779  for (;;) Line 811  for (;;)
811      match pointer to do this. */      match pointer to do this. */
812    
813      case OP_THEN:      case OP_THEN:
814      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
815        eptrb, RM54);        eptrb, RM54);
816      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
817      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
# Line 788  for (;;) Line 820  for (;;)
820      case OP_THEN_ARG:      case OP_THEN_ARG:
821      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
822      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
823      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
824        md, eptrb, RM58);        md, eptrb, RM58);
825      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
826           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 812  for (;;) Line 844  for (;;)
844      case OP_ONCE_NC:      case OP_ONCE_NC:
845      prev = ecode;      prev = ecode;
846      saved_eptr = eptr;      saved_eptr = eptr;
847        save_mark = md->mark;
848      do      do
849        {        {
850        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 830  for (;;) Line 863  for (;;)
863    
864        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
865        ecode += GET(ecode,1);        ecode += GET(ecode,1);
866          md->mark = save_mark;
867        }        }
868      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
869    
# Line 909  for (;;) Line 943  for (;;)
943        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
944        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
945        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
946          save_mark = md->mark;
947    
948        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
949        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 917  for (;;) Line 952  for (;;)
952        for (;;)        for (;;)
953          {          {
954          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
955          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
956            eptrb, RM1);            eptrb, RM1);
957          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
958    
# Line 945  for (;;) Line 980  for (;;)
980          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
981          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
982          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
983            md->mark = save_mark;
984          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
985          }          }
986    
# Line 1004  for (;;) Line 1040  for (;;)
1040    
1041        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1042          {          {
1043          ecode += _pcre_OP_lengths[*ecode];          ecode += PRIV(OP_lengths)[*ecode];
1044          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1045          }          }
1046    
1047        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1048    
1049        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        save_mark = md->mark;
1050          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1051          RM2);          RM2);
1052    
1053        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
# Line 1028  for (;;) Line 1065  for (;;)
1065          {          {
1066          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1067            {            {
1068            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1069            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1070              {              {
1071              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 1039  for (;;) Line 1076  for (;;)
1076          RRETURN(rrc);          RRETURN(rrc);
1077          }          }
1078        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1079          md->mark = save_mark;
1080        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1081        }        }
1082    
# Line 1093  for (;;) Line 1131  for (;;)
1131          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1132            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1133          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1134          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1135            eptrb, RM63);            eptrb, RM63);
1136          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1137            {            {
# Line 1165  for (;;) Line 1203  for (;;)
1203      for (;;)      for (;;)
1204        {        {
1205        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1206        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1207          eptrb, RM48);          eptrb, RM48);
1208        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1209          {          {
# Line 1215  for (;;) Line 1253  for (;;)
1253    
1254      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1255        {        {
1256        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1257          {          {
1258          pcre_callout_block cb;          PUBL(callout_block) cb;
1259          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1260          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1261          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1262    #ifdef COMPILE_PCRE8
1263          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1264    #else
1265            cb.subject          = (PCRE_SPTR16)md->start_subject;
1266    #endif
1267          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1268          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1269          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1231  for (;;) Line 1273  for (;;)
1273          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1274          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1275          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1276          if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1277          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1278          }          }
1279        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1280        }        }
1281    
1282      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1260  for (;;) Line 1302  for (;;)
1302    
1303          if (!condition && condcode == OP_NRREF)          if (!condition && condcode == OP_NRREF)
1304            {            {
1305            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1306            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1307              {              {
1308              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1273  for (;;) Line 1315  for (;;)
1315    
1316            if (i < md->name_count)            if (i < md->name_count)
1317              {              {
1318              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1319              while (slotB > md->name_table)              while (slotB > md->name_table)
1320                {                {
1321                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1322                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1323                  {                  {
1324                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1325                  if (condition) break;                  if (condition) break;
# Line 1293  for (;;) Line 1335  for (;;)
1335                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1336                  {                  {
1337                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1338                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1339                    {                    {
1340                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1341                    if (condition) break;                    if (condition) break;
# Line 1306  for (;;) Line 1348  for (;;)
1348    
1349          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1350    
1351          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1352          }          }
1353        }        }
1354    
# Line 1323  for (;;) Line 1365  for (;;)
1365        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1366          {          {
1367          int refno = offset >> 1;          int refno = offset >> 1;
1368          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1369    
1370          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1371            {            {
# Line 1337  for (;;) Line 1379  for (;;)
1379    
1380          if (i < md->name_count)          if (i < md->name_count)
1381            {            {
1382            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1383            while (slotB > md->name_table)            while (slotB > md->name_table)
1384              {              {
1385              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1386              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1387                {                {
1388                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1389                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1359  for (;;) Line 1401  for (;;)
1401              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1402                {                {
1403                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1404                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1405                  {                  {
1406                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1407                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1374  for (;;) Line 1416  for (;;)
1416    
1417        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1418    
1419        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1420        }        }
1421    
1422      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1466  for (;;) Line 1508  for (;;)
1508        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1509        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1510        }        }
1511      ecode += 3;      ecode += 1 + IMM2_SIZE;
1512      break;      break;
1513    
1514    
# Line 1513  for (;;) Line 1555  for (;;)
1555    
1556      case OP_ASSERT:      case OP_ASSERT:
1557      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1558        save_mark = md->mark;
1559      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1560        {        {
1561        condassert = TRUE;        condassert = TRUE;
# Line 1534  for (;;) Line 1577  for (;;)
1577    
1578        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1579        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1580          md->mark = save_mark;
1581        }        }
1582      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1583    
# Line 1557  for (;;) Line 1601  for (;;)
1601    
1602      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1603      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1604        save_mark = md->mark;
1605      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1606        {        {
1607        condassert = TRUE;        condassert = TRUE;
# Line 1567  for (;;) Line 1612  for (;;)
1612      do      do
1613        {        {
1614        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1615          md->mark = save_mark;
1616        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1617        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1618          {          {
# Line 1593  for (;;) Line 1639  for (;;)
1639      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1640    
1641      case OP_REVERSE:      case OP_REVERSE:
1642  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1643      if (utf8)      if (utf)
1644        {        {
1645        i = GET(ecode, 1);        i = GET(ecode, 1);
1646        while (i-- > 0)        while (i-- > 0)
# Line 1625  for (;;) Line 1671  for (;;)
1671      function is able to force a failure. */      function is able to force a failure. */
1672    
1673      case OP_CALLOUT:      case OP_CALLOUT:
1674      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1675        {        {
1676        pcre_callout_block cb;        PUBL(callout_block) cb;
1677        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1678        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1679        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1680    #ifdef COMPILE_PCRE8
1681        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1682    #else
1683          cb.subject          = (PCRE_SPTR16)md->start_subject;
1684    #endif
1685        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1686        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1687        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1641  for (;;) Line 1691  for (;;)
1691        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1692        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1693        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1694        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1695        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1696        }        }
1697      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1700  for (;;) Line 1750  for (;;)
1750        else        else
1751          {          {
1752          new_recursive.offset_save =          new_recursive.offset_save =
1753            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1754          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1755          }          }
1756        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
# Line 1715  for (;;) Line 1765  for (;;)
1765        do        do
1766          {          {
1767          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1768          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1769            md, eptrb, RM6);            md, eptrb, RM6);
1770          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1771              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
# Line 1724  for (;;) Line 1774  for (;;)
1774            {            {
1775            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1776            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1777              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1778    
1779            /* Set where we got to in the subject, and reset the start in case            /* Set where we got to in the subject, and reset the start in case
1780            it was changed by \K. This *is* propagated back out of a recursion,            it was changed by \K. This *is* propagated back out of a recursion,
# Line 1742  for (;;) Line 1792  for (;;)
1792            {            {
1793            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1794            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1795              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1796            RRETURN(rrc);            RRETURN(rrc);
1797            }            }
1798    
# Line 1754  for (;;) Line 1804  for (;;)
1804        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1805        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1806        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1807          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1808        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1809        }        }
1810    
# Line 2015  for (;;) Line 2065  for (;;)
2065    
2066      case OP_DOLLM:      case OP_DOLLM:
2067      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2068        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }        {
2069          if (!IS_NEWLINE(eptr))
2070            {
2071            if (eptr + 1 >= md->end_subject &&
2072                md->partial != 0 &&
2073                NLBLOCK->nltype == NLTYPE_FIXED &&
2074                NLBLOCK->nllen == 2 &&
2075                *eptr == NLBLOCK->nl[0])
2076              {
2077              md->hitend = TRUE;
2078              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2079              }
2080            RRETURN(MATCH_NOMATCH);
2081            }
2082          }
2083      else      else
2084        {        {
2085        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
# Line 2047  for (;;) Line 2111  for (;;)
2111      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2112      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2113          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2114          {
2115          if (eptr + 1 >= md->end_subject &&
2116              md->partial != 0 &&
2117              NLBLOCK->nltype == NLTYPE_FIXED &&
2118              NLBLOCK->nllen == 2 &&
2119              *eptr == NLBLOCK->nl[0])
2120            {
2121            md->hitend = TRUE;
2122            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2123            }
2124        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2125          }
2126    
2127      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2128    
# Line 2066  for (;;) Line 2141  for (;;)
2141        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2142        partial matching. */        partial matching. */
2143    
2144  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2145        if (utf8)        if (utf)
2146          {          {
2147          /* Get status of previous character */          /* Get status of previous character */
2148    
2149          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2150            {            {
2151            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2152            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2153            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2154            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2155  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2139  for (;;) Line 2214  for (;;)
2214              }              }
2215            else            else
2216  #endif  #endif
2217            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2218                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2219            }            }
2220    
2221          /* Get status of next character */          /* Get status of next character */
# Line 2162  for (;;) Line 2238  for (;;)
2238            }            }
2239          else          else
2240  #endif  #endif
2241          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2242              && ((md->ctypes[*eptr] & ctype_word) != 0);
2243          }          }
2244    
2245        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 2186  for (;;) Line 2263  for (;;)
2263        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2264        }        }
2265      eptr++;      eptr++;
2266      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2267        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2268    #endif
2269      ecode++;      ecode++;
2270      break;      break;
2271    
# Line 2211  for (;;) Line 2290  for (;;)
2290        }        }
2291      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2292      if (      if (
2293  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2294         c < 256 &&         c < 256 &&
2295  #endif  #endif
2296         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
# Line 2228  for (;;) Line 2307  for (;;)
2307        }        }
2308      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2309      if (      if (
2310  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2311         c >= 256 ||         c > 255 ||
2312  #endif  #endif
2313         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2314         )         )
# Line 2245  for (;;) Line 2324  for (;;)
2324        }        }
2325      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2326      if (      if (
2327  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2328         c < 256 &&         c < 256 &&
2329  #endif  #endif
2330         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
# Line 2262  for (;;) Line 2341  for (;;)
2341        }        }
2342      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2343      if (      if (
2344  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2345         c >= 256 ||         c > 255 ||
2346  #endif  #endif
2347         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2348         )         )
# Line 2279  for (;;) Line 2358  for (;;)
2358        }        }
2359      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2360      if (      if (
2361  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2362         c < 256 &&         c < 256 &&
2363  #endif  #endif
2364         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
# Line 2296  for (;;) Line 2375  for (;;)
2375        }        }
2376      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2377      if (      if (
2378  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2379         c >= 256 ||         c > 255 ||
2380  #endif  #endif
2381         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2382         )         )
# Line 2317  for (;;) Line 2396  for (;;)
2396        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2397    
2398        case 0x000d:        case 0x000d:
2399        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr >= md->end_subject)
2400            {
2401            SCHECK_PARTIAL();
2402            }
2403          else if (*eptr == 0x0a) eptr++;
2404        break;        break;
2405    
2406        case 0x000a:        case 0x000a:
# Line 2475  for (;;) Line 2558  for (;;)
2558          break;          break;
2559    
2560          case PT_GC:          case PT_GC:
2561          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2562            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2563          break;          break;
2564    
# Line 2492  for (;;) Line 2575  for (;;)
2575          /* These are specials */          /* These are specials */
2576    
2577          case PT_ALNUM:          case PT_ALNUM:
2578          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2579               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2580            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2581          break;          break;
2582    
2583          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2584          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2585               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2586                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2587            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2588          break;          break;
2589    
2590          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2591          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2592               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2593               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2594                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
# Line 2513  for (;;) Line 2596  for (;;)
2596          break;          break;
2597    
2598          case PT_WORD:          case PT_WORD:
2599          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2600               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2601               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2602            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2603          break;          break;
# Line 2543  for (;;) Line 2626  for (;;)
2626      while (eptr < md->end_subject)      while (eptr < md->end_subject)
2627        {        {
2628        int len = 1;        int len = 1;
2629        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2630        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2631        eptr += len;        eptr += len;
2632        }        }
2633        CHECK_PARTIAL();
2634      ecode++;      ecode++;
2635      break;      break;
2636  #endif  #endif
# Line 2564  for (;;) Line 2648  for (;;)
2648      case OP_REFI:      case OP_REFI:
2649      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2650      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2651      ecode += 3;      ecode += 1 + IMM2_SIZE;
2652    
2653      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2654    
# Line 2604  for (;;) Line 2688  for (;;)
2688        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2689        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2690        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2691        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2692        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2693        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2694        break;        break;
2695    
2696        default:               /* No repeat follows */        default:               /* No repeat follows */
2697        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2698          {          {
2699            if (length == -2) eptr = md->end_subject;   /* Partial match */
2700          CHECK_PARTIAL();          CHECK_PARTIAL();
2701          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2702          }          }
# Line 2620  for (;;) Line 2705  for (;;)
2705        }        }
2706    
2707      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2708      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2709        means the reference is unset in non-Java-compatible mode. If the minimum is
2710        zero, we can continue at the same level without recursion. For any other
2711        minimum, carrying on will result in NOMATCH. */
2712    
2713      if (length == 0) continue;      if (length == 0) continue;
2714        if (length < 0 && min == 0) continue;
2715    
2716      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2717      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2633  for (;;) Line 2722  for (;;)
2722        int slength;        int slength;
2723        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2724          {          {
2725            if (slength == -2) eptr = md->end_subject;   /* Partial match */
2726          CHECK_PARTIAL();          CHECK_PARTIAL();
2727          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2728          }          }
# Line 2656  for (;;) Line 2746  for (;;)
2746          if (fi >= max) RRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2747          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2748            {            {
2749              if (slength == -2) eptr = md->end_subject;   /* Partial match */
2750            CHECK_PARTIAL();            CHECK_PARTIAL();
2751            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2752            }            }
# Line 2674  for (;;) Line 2765  for (;;)
2765          int slength;          int slength;
2766          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2767            {            {
2768            CHECK_PARTIAL();            /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2769              the soft partial matching case. */
2770    
2771              if (slength == -2 && md->partial != 0 &&
2772                  md->end_subject > md->start_used_ptr)
2773                {
2774                md->hitend = TRUE;
2775                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2776                }
2777            break;            break;
2778            }            }
2779          eptr += slength;          eptr += slength;
2780          }          }
2781    
2782        while (eptr >= pp)        while (eptr >= pp)
2783          {          {
2784          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
# Line 2703  for (;;) Line 2803  for (;;)
2803      case OP_NCLASS:      case OP_NCLASS:
2804      case OP_CLASS:      case OP_CLASS:
2805        {        {
2806          /* The data variable is saved across frames, so the byte map needs to
2807          be stored there. */
2808    #define BYTE_MAP ((pcre_uint8 *)data)
2809        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2810        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2811    
2812        switch (*ecode)        switch (*ecode)
2813          {          {
# Line 2725  for (;;) Line 2828  for (;;)
2828          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2829          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2830          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2831          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2832          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2833          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2834          break;          break;
2835    
2836          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2737  for (;;) Line 2840  for (;;)
2840    
2841        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2842    
2843  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2844        /* UTF-8 mode */        if (utf)
       if (utf8)  
2845          {          {
2846          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2847            {            {
# Line 2754  for (;;) Line 2856  for (;;)
2856              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2857              }              }
2858            else            else
2859              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
             }  
2860            }            }
2861          }          }
2862        else        else
2863  #endif  #endif
2864        /* Not UTF-8 mode */        /* Not UTF mode */
2865          {          {
2866          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2867            {            {
# Line 2771  for (;;) Line 2871  for (;;)
2871              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2872              }              }
2873            c = *eptr++;            c = *eptr++;
2874            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2875              if (c > 255)
2876                {
2877                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2878                }
2879              else
2880    #endif
2881                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2882            }            }
2883          }          }
2884    
# Line 2785  for (;;) Line 2892  for (;;)
2892    
2893        if (minimize)        if (minimize)
2894          {          {
2895  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2896          /* UTF-8 mode */          if (utf)
         if (utf8)  
2897            {            {
2898            for (fi = min;; fi++)            for (fi = min;; fi++)
2899              {              {
# Line 2805  for (;;) Line 2911  for (;;)
2911                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2912                }                }
2913              else              else
2914                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
               }  
2915              }              }
2916            }            }
2917          else          else
2918  #endif  #endif
2919          /* Not UTF-8 mode */          /* Not UTF mode */
2920            {            {
2921            for (fi = min;; fi++)            for (fi = min;; fi++)
2922              {              {
# Line 2825  for (;;) Line 2929  for (;;)
2929                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2930                }                }
2931              c = *eptr++;              c = *eptr++;
2932              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2933                if (c > 255)
2934                  {
2935                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2936                  }
2937                else
2938    #endif
2939                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2940              }              }
2941            }            }
2942          /* Control never gets here */          /* Control never gets here */
# Line 2837  for (;;) Line 2948  for (;;)
2948          {          {
2949          pp = eptr;          pp = eptr;
2950    
2951  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2952          /* UTF-8 mode */          if (utf)
         if (utf8)  
2953            {            {
2954            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2955              {              {
# Line 2855  for (;;) Line 2965  for (;;)
2965                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2966                }                }
2967              else              else
2968                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2969              eptr += len;              eptr += len;
2970              }              }
2971            for (;;)            for (;;)
# Line 2870  for (;;) Line 2978  for (;;)
2978            }            }
2979          else          else
2980  #endif  #endif
2981            /* Not UTF-8 mode */            /* Not UTF mode */
2982            {            {
2983            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2984              {              {
# Line 2880  for (;;) Line 2988  for (;;)
2988                break;                break;
2989                }                }
2990              c = *eptr;              c = *eptr;
2991              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2992                if (c > 255)
2993                  {
2994                  if (op == OP_CLASS) break;
2995                  }
2996                else
2997    #endif
2998                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2999              eptr++;              eptr++;
3000              }              }
3001            while (eptr >= pp)            while (eptr >= pp)
# Line 2893  for (;;) Line 3008  for (;;)
3008    
3009          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3010          }          }
3011    #undef BYTE_MAP
3012        }        }
3013      /* Control never gets here */      /* Control never gets here */
3014    
# Line 2901  for (;;) Line 3017  for (;;)
3017      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3018      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
3019    
3020  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3021      case OP_XCLASS:      case OP_XCLASS:
3022        {        {
3023        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2926  for (;;) Line 3042  for (;;)
3042          case OP_CRMINRANGE:          case OP_CRMINRANGE:
3043          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
3044          min = GET2(ecode, 1);          min = GET2(ecode, 1);
3045          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
3046          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
3047          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
3048          break;          break;
3049    
3050          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2946  for (;;) Line 3062  for (;;)
3062            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3063            }            }
3064          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
3065          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3066          }          }
3067    
3068        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2970  for (;;) Line 3086  for (;;)
3086              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3087              }              }
3088            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3089            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3090            }            }
3091          /* Control never gets here */          /* Control never gets here */
3092          }          }
# Line 2988  for (;;) Line 3104  for (;;)
3104              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3105              break;              break;
3106              }              }
3107    #ifdef SUPPORT_UTF
3108            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3109            if (!_pcre_xclass(c, data)) break;  #else
3110              c = *eptr;
3111    #endif
3112              if (!PRIV(xclass)(c, data, utf)) break;
3113            eptr += len;            eptr += len;
3114            }            }
3115          for(;;)          for(;;)
# Line 2997  for (;;) Line 3117  for (;;)
3117            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3118            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3119            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3120            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3121              if (utf) BACKCHAR(eptr);
3122    #endif
3123            }            }
3124          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3125          }          }
# Line 3009  for (;;) Line 3131  for (;;)
3131      /* Match a single character, casefully */      /* Match a single character, casefully */
3132    
3133      case OP_CHAR:      case OP_CHAR:
3134  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3135      if (utf8)      if (utf)
3136        {        {
3137        length = 1;        length = 1;
3138        ecode++;        ecode++;
# Line 3024  for (;;) Line 3146  for (;;)
3146        }        }
3147      else      else
3148  #endif  #endif
3149        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3150        {        {
3151        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3152          {          {
# Line 3047  for (;;) Line 3168  for (;;)
3168        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3169        }        }
3170    
3171  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3172      if (utf8)      if (utf)
3173        {        {
3174        length = 1;        length = 1;
3175        ecode++;        ecode++;
# Line 3061  for (;;) Line 3182  for (;;)
3182    
3183        if (fc < 128)        if (fc < 128)
3184          {          {
3185          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3186                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3187            ecode++;
3188            eptr++;
3189          }          }
3190    
3191        /* Otherwise we must pick up the subject character. Note that we cannot        /* Otherwise we must pick up the subject character. Note that we cannot
# Line 3087  for (;;) Line 3211  for (;;)
3211          }          }
3212        }        }
3213      else      else
3214  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3215    
3216      /* Non-UTF-8 mode */      /* Not UTF mode */
3217        {        {
3218        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3219              != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3220          eptr++;
3221        ecode += 2;        ecode += 2;
3222        }        }
3223      break;      break;
# Line 3101  for (;;) Line 3227  for (;;)
3227      case OP_EXACT:      case OP_EXACT:
3228      case OP_EXACTI:      case OP_EXACTI:
3229      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3230      ecode += 3;      ecode += 1 + IMM2_SIZE;
3231      goto REPEATCHAR;      goto REPEATCHAR;
3232    
3233      case OP_POSUPTO:      case OP_POSUPTO:
# Line 3116  for (;;) Line 3242  for (;;)
3242      min = 0;      min = 0;
3243      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3244      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3245      ecode += 3;      ecode += 1 + IMM2_SIZE;
3246      goto REPEATCHAR;      goto REPEATCHAR;
3247    
3248      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3164  for (;;) Line 3290  for (;;)
3290      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3291    
3292      REPEATCHAR:      REPEATCHAR:
3293  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3294      if (utf8)      if (utf)
3295        {        {
3296        length = 1;        length = 1;
3297        charptr = ecode;        charptr = ecode;
# Line 3181  for (;;) Line 3307  for (;;)
3307          unsigned int othercase;          unsigned int othercase;
3308          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3309              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3310            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3311          else oclength = 0;          else oclength = 0;
3312  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3313    
3314          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3315            {            {
3316            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3317              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3318  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3319            else if (oclength > 0 &&            else if (oclength > 0 &&
3320                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3321                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3322  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3323            else            else
3324              {              {
# Line 3211  for (;;) Line 3337  for (;;)
3337              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3338              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3339              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3340                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3341  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3342              else if (oclength > 0 &&              else if (oclength > 0 &&
3343                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3344                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3345  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3346              else              else
3347                {                {
# Line 3232  for (;;) Line 3358  for (;;)
3358            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3359              {              {
3360              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3361                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3362  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3363              else if (oclength > 0 &&              else if (oclength > 0 &&
3364                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3365                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3366  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3367              else              else
3368                {                {
# Line 3268  for (;;) Line 3394  for (;;)
3394        value of fc will always be < 128. */        value of fc will always be < 128. */
3395        }        }
3396      else      else
3397  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3398          /* When not in UTF-8 mode, load a single-byte character. */
3399          fc = *ecode++;
3400    
3401      /* When not in UTF-8 mode, load a single-byte character. */      /* The value of fc at this point is always one character, though we may
3402        or may not be in UTF mode. The code is duplicated for the caseless and
     fc = *ecode++;  
   
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3403      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3404      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3405      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3288  for (;;) Line 3412  for (;;)
3412    
3413      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3414        {        {
3415        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3416          /* fc must be < 128 if UTF is enabled. */
3417          foc = md->fcc[fc];
3418    #else
3419    #ifdef SUPPORT_UTF
3420    #ifdef SUPPORT_UCP
3421          if (utf && fc > 127)
3422            foc = UCD_OTHERCASE(fc);
3423    #else
3424          if (utf && fc > 127)
3425            foc = fc;
3426    #endif /* SUPPORT_UCP */
3427          else
3428    #endif /* SUPPORT_UTF */
3429            foc = TABLE_GET(fc, md->fcc, fc);
3430    #endif /* COMPILE_PCRE8 */
3431    
3432        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3433          {          {
3434          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
# Line 3296  for (;;) Line 3436  for (;;)
3436            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3437            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3438            }            }
3439          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3440            eptr++;
3441          }          }
3442        if (min == max) continue;        if (min == max) continue;
3443        if (minimize)        if (minimize)
# Line 3311  for (;;) Line 3452  for (;;)
3452              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3453              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3454              }              }
3455            if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3456              eptr++;
3457            }            }
3458          /* Control never gets here */          /* Control never gets here */
3459          }          }
# Line 3325  for (;;) Line 3467  for (;;)
3467              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3468              break;              break;
3469              }              }
3470            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3471            eptr++;            eptr++;
3472            }            }
3473    
# Line 3414  for (;;) Line 3556  for (;;)
3556      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3557      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3558        {        {
3559  #ifdef SUPPORT_UTF8        register unsigned int ch, och;
3560        if (c < 256)        ch = *ecode++;
3561  #endif  #ifdef COMPILE_PCRE8
3562        c = md->lcc[c];        /* ch must be < 128 if UTF is enabled. */
3563        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        och = md->fcc[ch];
3564    #else
3565    #ifdef SUPPORT_UTF
3566    #ifdef SUPPORT_UCP
3567          if (utf && ch > 127)
3568            och = UCD_OTHERCASE(ch);
3569    #else
3570          if (utf && ch > 127)
3571            och = ch;
3572    #endif /* SUPPORT_UCP */
3573          else
3574    #endif /* SUPPORT_UTF */
3575            och = TABLE_GET(ch, md->fcc, ch);
3576    #endif /* COMPILE_PCRE8 */
3577          if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3578        }        }
3579      else    /* Caseful */      else    /* Caseful */
3580        {        {
# Line 3436  for (;;) Line 3592  for (;;)
3592      case OP_NOTEXACT:      case OP_NOTEXACT:
3593      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3594      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3595      ecode += 3;      ecode += 1 + IMM2_SIZE;
3596      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3597    
3598      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3446  for (;;) Line 3602  for (;;)
3602      min = 0;      min = 0;
3603      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3604      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3605      ecode += 3;      ecode += 1 + IMM2_SIZE;
3606      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3607    
3608      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3478  for (;;) Line 3634  for (;;)
3634      possessive = TRUE;      possessive = TRUE;
3635      min = 0;      min = 0;
3636      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3637      ecode += 3;      ecode += 1 + IMM2_SIZE;
3638      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3639    
3640      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3517  for (;;) Line 3673  for (;;)
3673    
3674      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3675        {        {
3676        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3677          /* fc must be < 128 if UTF is enabled. */
3678          foc = md->fcc[fc];
3679    #else
3680    #ifdef SUPPORT_UTF
3681    #ifdef SUPPORT_UCP
3682          if (utf && fc > 127)
3683            foc = UCD_OTHERCASE(fc);
3684    #else
3685          if (utf && fc > 127)
3686            foc = fc;
3687    #endif /* SUPPORT_UCP */
3688          else
3689    #endif /* SUPPORT_UTF */
3690            foc = TABLE_GET(fc, md->fcc, fc);
3691    #endif /* COMPILE_PCRE8 */
3692    
3693  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3694        /* UTF-8 mode */        if (utf)
       if (utf8)  
3695          {          {
3696          register unsigned int d;          register unsigned int d;
3697          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3532  for (;;) Line 3702  for (;;)
3702              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3703              }              }
3704            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3705            if (d < 256) d = md->lcc[d];            if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) RRETURN(MATCH_NOMATCH);  
3706            }            }
3707          }          }
3708        else        else
3709  #endif  #endif
3710          /* Not UTF mode */
       /* Not UTF-8 mode */  
3711          {          {
3712          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3713            {            {
# Line 3548  for (;;) Line 3716  for (;;)
3716              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3717              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3718              }              }
3719            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3720              eptr++;
3721            }            }
3722          }          }
3723    
# Line 3556  for (;;) Line 3725  for (;;)
3725    
3726        if (minimize)        if (minimize)
3727          {          {
3728  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3729          /* UTF-8 mode */          if (utf)
         if (utf8)  
3730            {            {
3731            register unsigned int d;            register unsigned int d;
3732            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3572  for (;;) Line 3740  for (;;)
3740                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3741                }                }
3742              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3743              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) RRETURN(MATCH_NOMATCH);  
3744              }              }
3745            }            }
3746          else          else
3747  #endif  #endif
3748          /* Not UTF-8 mode */          /* Not UTF mode */
3749            {            {
3750            for (fi = min;; fi++)            for (fi = min;; fi++)
3751              {              {
# Line 3590  for (;;) Line 3757  for (;;)
3757                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3758                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3759                }                }
3760              if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3761                eptr++;
3762              }              }
3763            }            }
3764          /* Control never gets here */          /* Control never gets here */
# Line 3602  for (;;) Line 3770  for (;;)
3770          {          {
3771          pp = eptr;          pp = eptr;
3772    
3773  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3774          /* UTF-8 mode */          if (utf)
         if (utf8)  
3775            {            {
3776            register unsigned int d;            register unsigned int d;
3777            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3616  for (;;) Line 3783  for (;;)
3783                break;                break;
3784                }                }
3785              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3786              if (d < 256) d = md->lcc[d];              if (fc == d || (unsigned int)foc == d) break;
             if (fc == d) break;  
3787              eptr += len;              eptr += len;
3788              }              }
3789          if (possessive) continue;            if (possessive) continue;
3790          for(;;)            for(;;)
3791              {              {
3792              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3793              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3631  for (;;) Line 3797  for (;;)
3797            }            }
3798          else          else
3799  #endif  #endif
3800          /* Not UTF-8 mode */          /* Not UTF mode */
3801            {            {
3802            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3803              {              {
# Line 3640  for (;;) Line 3806  for (;;)
3806                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3807                break;                break;
3808                }                }
3809              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3810              eptr++;              eptr++;
3811              }              }
3812            if (possessive) continue;            if (possessive) continue;
# Line 3661  for (;;) Line 3827  for (;;)
3827    
3828      else      else
3829        {        {
3830  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3831        /* UTF-8 mode */        if (utf)
       if (utf8)  
3832          {          {
3833          register unsigned int d;          register unsigned int d;
3834          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3679  for (;;) Line 3844  for (;;)
3844          }          }
3845        else        else
3846  #endif  #endif
3847        /* Not UTF-8 mode */        /* Not UTF mode */
3848          {          {
3849          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3850            {            {
# Line 3696  for (;;) Line 3861  for (;;)
3861    
3862        if (minimize)        if (minimize)
3863          {          {
3864  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3865          /* UTF-8 mode */          if (utf)
         if (utf8)  
3866            {            {
3867            register unsigned int d;            register unsigned int d;
3868            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3717  for (;;) Line 3881  for (;;)
3881            }            }
3882          else          else
3883  #endif  #endif
3884          /* Not UTF-8 mode */          /* Not UTF mode */
3885            {            {
3886            for (fi = min;; fi++)            for (fi = min;; fi++)
3887              {              {
# Line 3741  for (;;) Line 3905  for (;;)
3905          {          {
3906          pp = eptr;          pp = eptr;
3907    
3908  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3909          /* UTF-8 mode */          if (utf)
         if (utf8)  
3910            {            {
3911            register unsigned int d;            register unsigned int d;
3912            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3769  for (;;) Line 3932  for (;;)
3932            }            }
3933          else          else
3934  #endif  #endif
3935          /* Not UTF-8 mode */          /* Not UTF mode */
3936            {            {
3937            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3938              {              {
# Line 3802  for (;;) Line 3965  for (;;)
3965      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3966      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3967      minimize = TRUE;      minimize = TRUE;
3968      ecode += 3;      ecode += 1 + IMM2_SIZE;
3969      goto REPEATTYPE;      goto REPEATTYPE;
3970    
3971      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3810  for (;;) Line 3973  for (;;)
3973      min = 0;      min = 0;
3974      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3975      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3976      ecode += 3;      ecode += 1 + IMM2_SIZE;
3977      goto REPEATTYPE;      goto REPEATTYPE;
3978    
3979      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3838  for (;;) Line 4001  for (;;)
4001      possessive = TRUE;      possessive = TRUE;
4002      min = 0;      min = 0;
4003      max = GET2(ecode, 1);      max = GET2(ecode, 1);
4004      ecode += 3;      ecode += 1 + IMM2_SIZE;
4005      goto REPEATTYPE;      goto REPEATTYPE;
4006    
4007      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 4045  for (;;) Line 4208  for (;;)
4208            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4209              {              {
4210              int len = 1;              int len = 1;
4211              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4212              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4213              eptr += len;              eptr += len;
4214              }              }
4215              CHECK_PARTIAL();
4216            }            }
4217          }          }
4218    
# Line 4057  for (;;) Line 4221  for (;;)
4221    
4222  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4223    
4224  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4225        if (utf8) switch(ctype)        if (utf) switch(ctype)
4226          {          {
4227          case OP_ANY:          case OP_ANY:
4228          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 4070  for (;;) Line 4234  for (;;)
4234              }              }
4235            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4236            eptr++;            eptr++;
4237            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4238            }            }
4239          break;          break;
4240    
# Line 4083  for (;;) Line 4247  for (;;)
4247              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4248              }              }
4249            eptr++;            eptr++;
4250            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4251            }            }
4252          break;          break;
4253    
# Line 4265  for (;;) Line 4429  for (;;)
4429              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4430              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4431              }              }
4432            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4433              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4434              eptr++;
4435            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4436            }            }
4437          break;          break;
# Line 4281  for (;;) Line 4446  for (;;)
4446              }              }
4447            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4448              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4449            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4450              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4451            }            }
4452          break;          break;
4453    
# Line 4293  for (;;) Line 4459  for (;;)
4459              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4460              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4461              }              }
4462            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4463              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4464              eptr++;
4465            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4466            }            }
4467          break;          break;
# Line 4309  for (;;) Line 4476  for (;;)
4476              }              }
4477            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4478              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4479            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4480              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4481            }            }
4482          break;          break;
4483    
# Line 4321  for (;;) Line 4489  for (;;)
4489              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4490              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4491              }              }
4492            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4493              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4494              eptr++;
4495            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4496            }            }
4497          break;          break;
# Line 4332  for (;;) Line 4501  for (;;)
4501          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4502    
4503        else        else
4504  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4505    
4506        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4507        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4392  for (;;) Line 4561  for (;;)
4561              case 0x000b:              case 0x000b:
4562              case 0x000c:              case 0x000c:
4563              case 0x0085:              case 0x0085:
4564    #ifdef COMPILE_PCRE16
4565                case 0x2028:
4566                case 0x2029:
4567    #endif
4568              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4569              break;              break;
4570              }              }
# Line 4412  for (;;) Line 4585  for (;;)
4585              case 0x09:      /* HT */              case 0x09:      /* HT */
4586              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4587              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4588    #ifdef COMPILE_PCRE16
4589                case 0x1680:    /* OGHAM SPACE MARK */
4590                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4591                case 0x2000:    /* EN QUAD */
4592                case 0x2001:    /* EM QUAD */
4593                case 0x2002:    /* EN SPACE */
4594                case 0x2003:    /* EM SPACE */
4595                case 0x2004:    /* THREE-PER-EM SPACE */
4596                case 0x2005:    /* FOUR-PER-EM SPACE */
4597                case 0x2006:    /* SIX-PER-EM SPACE */
4598                case 0x2007:    /* FIGURE SPACE */
4599                case 0x2008:    /* PUNCTUATION SPACE */
4600                case 0x2009:    /* THIN SPACE */
4601                case 0x200A:    /* HAIR SPACE */
4602                case 0x202f:    /* NARROW NO-BREAK SPACE */
4603                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4604                case 0x3000:    /* IDEOGRAPHIC SPACE */
4605    #endif
4606              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4607              }              }
4608            }            }
# Line 4431  for (;;) Line 4622  for (;;)
4622              case 0x09:      /* HT */              case 0x09:      /* HT */
4623              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4624              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4625    #ifdef COMPILE_PCRE16
4626                case 0x1680:    /* OGHAM SPACE MARK */
4627                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4628                case 0x2000:    /* EN QUAD */
4629                case 0x2001:    /* EM QUAD */
4630                case 0x2002:    /* EN SPACE */
4631                case 0x2003:    /* EM SPACE */
4632                case 0x2004:    /* THREE-PER-EM SPACE */
4633                case 0x2005:    /* FOUR-PER-EM SPACE */
4634                case 0x2006:    /* SIX-PER-EM SPACE */
4635                case 0x2007:    /* FIGURE SPACE */
4636                case 0x2008:    /* PUNCTUATION SPACE */
4637                case 0x2009:    /* THIN SPACE */
4638                case 0x200A:    /* HAIR SPACE */
4639                case 0x202f:    /* NARROW NO-BREAK SPACE */
4640                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4641                case 0x3000:    /* IDEOGRAPHIC SPACE */
4642    #endif
4643              break;              break;
4644              }              }
4645            }            }
# Line 4452  for (;;) Line 4661  for (;;)
4661              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4662              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4663              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4664    #ifdef COMPILE_PCRE16
4665                case 0x2028:    /* LINE SEPARATOR */
4666                case 0x2029:    /* PARAGRAPH SEPARATOR */
4667    #endif
4668              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4669              }              }
4670            }            }
# Line 4473  for (;;) Line 4686  for (;;)
4686              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4687              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4688              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4689    #ifdef COMPILE_PCRE16
4690                case 0x2028:    /* LINE SEPARATOR */
4691                case 0x2029:    /* PARAGRAPH SEPARATOR */
4692    #endif
4693              break;              break;
4694              }              }
4695            }            }
# Line 4486  for (;;) Line 4703  for (;;)
4703              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4704              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4705              }              }
4706            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4707                RRETURN(MATCH_NOMATCH);
4708              eptr++;
4709            }            }
4710          break;          break;
4711    
# Line 4498  for (;;) Line 4717  for (;;)
4717              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4718              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4719              }              }
4720            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4721                RRETURN(MATCH_NOMATCH);
4722              eptr++;
4723            }            }
4724          break;          break;
4725    
# Line 4510  for (;;) Line 4731  for (;;)
4731              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4732              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4733              }              }
4734            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4735                RRETURN(MATCH_NOMATCH);
4736              eptr++;
4737            }            }
4738          break;          break;
4739    
# Line 4522  for (;;) Line 4745  for (;;)
4745              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4746              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4747              }              }
4748            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4749                RRETURN(MATCH_NOMATCH);
4750              eptr++;
4751            }            }
4752          break;          break;
4753    
# Line 4534  for (;;) Line 4759  for (;;)
4759              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4760              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4761              }              }
4762            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4763              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4764              eptr++;
4765            }            }
4766          break;          break;
4767    
# Line 4547  for (;;) Line 4773  for (;;)
4773              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4774              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4775              }              }
4776            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4777              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4778              eptr++;
4779            }            }
4780          break;          break;
4781    
# Line 4766  for (;;) Line 4993  for (;;)
4993            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4994              {              {
4995              int len = 1;              int len = 1;
4996              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4997              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4998              eptr += len;              eptr += len;
4999              }              }
5000              CHECK_PARTIAL();
5001            }            }
5002          }          }
5003        else        else
5004  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
5005    
5006  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5007        /* UTF-8 mode */        if (utf)
       if (utf8)  
5008          {          {
5009          for (fi = min;; fi++)          for (fi = min;; fi++)
5010            {            {
# Line 4919  for (;;) Line 5146  for (;;)
5146              break;              break;
5147    
5148              case OP_WHITESPACE:              case OP_WHITESPACE:
5149              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5150                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5151              break;              break;
5152    
# Line 4940  for (;;) Line 5167  for (;;)
5167          }          }
5168        else        else
5169  #endif  #endif
5170        /* Not UTF-8 mode */        /* Not UTF mode */
5171          {          {
5172          for (fi = min;; fi++)          for (fi = min;; fi++)
5173            {            {
# Line 4976  for (;;) Line 5203  for (;;)
5203                case 0x000b:                case 0x000b:
5204                case 0x000c:                case 0x000c:
5205                case 0x0085:                case 0x0085:
5206    #ifdef COMPILE_PCRE16
5207                  case 0x2028:
5208                  case 0x2029:
5209    #endif
5210                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5211                break;                break;
5212                }                }
# Line 4988  for (;;) Line 5219  for (;;)
5219                case 0x09:      /* HT */                case 0x09:      /* HT */
5220                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
5221                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
5222    #ifdef COMPILE_PCRE16
5223                  case 0x1680:    /* OGHAM SPACE MARK */
5224                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5225                  case 0x2000:    /* EN QUAD */
5226                  case 0x2001:    /* EM QUAD */
5227                  case 0x2002:    /* EN SPACE */
5228                  case 0x2003:    /* EM SPACE */
5229                  case 0x2004:    /* THREE-PER-EM SPACE */
5230                  case 0x2005:    /* FOUR-PER-EM SPACE */
5231                  case 0x2006:    /* SIX-PER-EM SPACE */
5232                  case 0x2007:    /* FIGURE SPACE */
5233                  case 0x2008:    /* PUNCTUATION SPACE */
5234                  case 0x2009:    /* THIN SPACE */
5235                  case 0x200A:    /* HAIR SPACE */
5236                  case 0x202f:    /* NARROW NO-BREAK SPACE */
5237                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5238                  case 0x3000:    /* IDEOGRAPHIC SPACE */
5239    #endif
5240                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5241                }                }
5242              break;              break;
# Line 4999  for (;;) Line 5248  for (;;)
5248                case 0x09:      /* HT */                case 0x09:      /* HT */
5249                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
5250                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
5251    #ifdef COMPILE_PCRE16
5252                  case 0x1680:    /* OGHAM SPACE MARK */
5253                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5254                  case 0x2000:    /* EN QUAD */
5255                  case 0x2001:    /* EM QUAD */
5256                  case 0x2002:    /* EN SPACE */
5257                  case 0x2003:    /* EM SPACE */
5258                  case 0x2004:    /* THREE-PER-EM SPACE */
5259                  case 0x2005:    /* FOUR-PER-EM SPACE */
5260                  case 0x2006:    /* SIX-PER-EM SPACE */
5261                  case 0x2007:    /* FIGURE SPACE */
5262                  case 0x2008:    /* PUNCTUATION SPACE */
5263                  case 0x2009:    /* THIN SPACE */
5264                  case 0x200A:    /* HAIR SPACE */
5265                  case 0x202f:    /* NARROW NO-BREAK SPACE */
5266                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5267                  case 0x3000:    /* IDEOGRAPHIC SPACE */
5268    #endif
5269                break;                break;
5270                }                }
5271              break;              break;
# Line 5012  for (;;) Line 5279  for (;;)
5279                case 0x0c:      /* FF */                case 0x0c:      /* FF */
5280                case 0x0d:      /* CR */                case 0x0d:      /* CR */
5281                case 0x85:      /* NEL */                case 0x85:      /* NEL */
5282    #ifdef COMPILE_PCRE16
5283                  case 0x2028:    /* LINE SEPARATOR */
5284                  case 0x2029:    /* PARAGRAPH SEPARATOR */
5285    #endif
5286                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5287                }                }
5288              break;              break;
# Line 5025  for (;;) Line 5296  for (;;)
5296                case 0x0c:      /* FF */                case 0x0c:      /* FF */
5297                case 0x0d:      /* CR */                case 0x0d:      /* CR */
5298                case 0x85:      /* NEL */                case 0x85:      /* NEL */
5299    #ifdef COMPILE_PCRE16
5300                  case 0x2028:    /* LINE SEPARATOR */
5301                  case 0x2029:    /* PARAGRAPH SEPARATOR */
5302    #endif
5303                break;                break;
5304                }                }
5305              break;              break;
5306    
5307              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
5308              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5309              break;              break;
5310    
5311              case OP_DIGIT:              case OP_DIGIT:
5312              if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5313              break;              break;
5314    
5315              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
5316              if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5317              break;              break;
5318    
5319              case OP_WHITESPACE:              case OP_WHITESPACE:
5320              if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5321              break;              break;
5322    
5323              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
5324              if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5325              break;              break;
5326    
5327              case OP_WORDCHAR:              case OP_WORDCHAR:
5328              if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5329              break;              break;
5330    
5331              default:              default:
# Line 5239  for (;;) Line 5514  for (;;)
5514            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5515            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5516            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5517            if (utf8) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5518            }            }
5519          }          }
5520    
# Line 5256  for (;;) Line 5531  for (;;)
5531              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5532              break;              break;
5533              }              }
5534            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5535            if (UCD_CATEGORY(c) == ucp_M) break;            if (UCD_CATEGORY(c) == ucp_M) break;
5536            eptr += len;            eptr += len;
5537            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5538              {              {
5539              len = 1;              len = 1;
5540              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5541              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5542              eptr += len;              eptr += len;
5543              }              }
5544              CHECK_PARTIAL();
5545            }            }
5546    
5547          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 5279  for (;;) Line 5555  for (;;)
5555            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5556            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5557              {              {
5558              if (!utf8) c = *eptr; else              if (!utf) c = *eptr; else
5559                {                {
5560                BACKCHAR(eptr);                BACKCHAR(eptr);
5561                GETCHAR(c, eptr);                GETCHAR(c, eptr);
# Line 5293  for (;;) Line 5569  for (;;)
5569        else        else
5570  #endif   /* SUPPORT_UCP */  #endif   /* SUPPORT_UCP */
5571    
5572  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5573        /* UTF-8 mode */        if (utf)
   
       if (utf8)  
5574          {          {
5575          switch(ctype)          switch(ctype)
5576            {            {
# Line 5312  for (;;) Line 5586  for (;;)
5586                  }                  }
5587                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5588                eptr++;                eptr++;
5589                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5590                }                }
5591              }              }
5592    
# Line 5329  for (;;) Line 5603  for (;;)
5603                  }                  }
5604                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5605                eptr++;                eptr++;
5606                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5607                }                }
5608              }              }
5609            break;            break;
# Line 5345  for (;;) Line 5619  for (;;)
5619                  break;                  break;
5620                  }                  }
5621                eptr++;                eptr++;
5622                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5623                }                }
5624              }              }
5625            else            else
# Line 5578  for (;;) Line 5852  for (;;)
5852            }            }
5853          }          }
5854        else        else
5855  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5856          /* Not UTF mode */
       /* Not UTF-8 mode */  
5857          {          {
5858          switch(ctype)          switch(ctype)
5859            {            {
# Line 5624  for (;;) Line 5897  for (;;)
5897                }                }
5898              else              else
5899                {                {
5900                if (c != 0x000a &&                if (c != 0x000a && (md->bsr_anycrlf ||
5901                    (md->bsr_anycrlf ||                  (c != 0x000b && c != 0x000c && c != 0x0085
5902                      (c != 0x000b && c != 0x000c && c != 0x0085)))  #ifdef COMPILE_PCRE16
5903                  break;                  && c != 0x2028 && c != 0x2029
5904    #endif
5905                    ))) break;
5906                eptr++;                eptr++;
5907                }                }
5908              }              }
# Line 5642  for (;;) Line 5917  for (;;)
5917                break;                break;
5918                }                }
5919              c = *eptr;              c = *eptr;
5920              if (c == 0x09 || c == 0x20 || c == 0xa0) break;              if (c == 0x09 || c == 0x20 || c == 0xa0
5921    #ifdef COMPILE_PCRE16
5922                  || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
5923                  || c == 0x202f || c == 0x205f || c == 0x3000
5924    #endif
5925                  ) break;
5926              eptr++;              eptr++;
5927              }              }
5928            break;            break;
# Line 5656  for (;;) Line 5936  for (;;)
5936                break;                break;
5937                }                }
5938              c = *eptr;              c = *eptr;
5939              if (c != 0x09 && c != 0x20 && c != 0xa0) break;              if (c != 0x09 && c != 0x20 && c != 0xa0
5940    #ifdef COMPILE_PCRE16
5941                  && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
5942                  && c != 0x202f && c != 0x205f && c != 0x3000
5943    #endif
5944                  ) break;
5945              eptr++;              eptr++;
5946              }              }
5947            break;            break;
# Line 5670  for (;;) Line 5955  for (;;)
5955                break;                break;
5956                }                }
5957              c = *eptr;              c = *eptr;
5958              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
5959                break;  #ifdef COMPILE_PCRE16
5960                  || c == 0x2028 || c == 0x2029
5961    #endif
5962                  ) break;
5963              eptr++;              eptr++;
5964              }              }
5965            break;            break;
# Line 5685  for (;;) Line 5973  for (;;)
5973                break;                break;
5974                }                }
5975              c = *eptr;              c = *eptr;
5976              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
5977                break;  #ifdef COMPILE_PCRE16
5978                  && c != 0x2028 && c != 0x2029
5979    #endif
5980                  ) break;
5981              eptr++;              eptr++;
5982              }              }
5983            break;            break;
# Line 5699  for (;;) Line 5990  for (;;)
5990                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5991                break;                break;
5992                }                }
5993              if ((md->ctypes[*eptr] & ctype_digit) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
5994              eptr++;              eptr++;
5995              }              }
5996            break;            break;
# Line 5712  for (;;) Line 6003  for (;;)
6003                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6004                break;                break;
6005                }                }
6006              if ((md->ctypes[*eptr] & ctype_digit) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6007              eptr++;              eptr++;
6008              }              }
6009            break;            break;
# Line 5725  for (;;) Line 6016  for (;;)
6016                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6017                break;                break;
6018                }                }
6019              if ((md->ctypes[*eptr] & ctype_space) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6020              eptr++;              eptr++;
6021              }              }
6022            break;            break;
# Line 5738  for (;;) Line 6029  for (;;)
6029                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6030                break;                break;
6031                }                }
6032              if ((md->ctypes[*eptr] & ctype_space) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6033              eptr++;              eptr++;
6034              }              }
6035            break;            break;
# Line 5751  for (;;) Line 6042  for (;;)
6042                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6043                break;                break;
6044                }                }
6045              if ((md->ctypes[*eptr] & ctype_word) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6046              eptr++;              eptr++;
6047              }              }
6048            break;            break;
# Line 5764  for (;;) Line 6055  for (;;)
6055                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6056                break;                break;
6057                }                }
6058              if ((md->ctypes[*eptr] & ctype_word) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6059              eptr++;              eptr++;
6060              }              }
6061            break;            break;
# Line 5827  switch (frame->Xwhere) Line 6118  switch (frame->Xwhere)
6118    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6119    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6120    LBL(65) LBL(66)    LBL(65) LBL(66)
6121  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6122    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(21)
6123    #endif
6124    #ifdef SUPPORT_UTF
6125      LBL(16) LBL(18) LBL(20)
6126      LBL(22) LBL(23) LBL(28) LBL(30)
6127    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6128  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6129    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6130    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62)
6131  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6132  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
6133    default:    default:
6134    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6135    
6136    printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6137    
6138    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6139    }    }
6140  #undef LBL  #undef LBL
# Line 5923  Returns:          > 0 => success; value Line 6221  Returns:          > 0 => success; value
6221                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
6222  */  */
6223    
6224    #ifdef COMPILE_PCRE8
6225  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6226  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6227    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6228    int offsetcount)    int offsetcount)
6229    #else
6230    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6231    pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6232      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6233      int offsetcount)
6234    #endif
6235  {  {
6236  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
 int first_byte = -1;  
 int req_byte = -1;  
 int req_byte2 = -1;  
6237  int newline;  int newline;
6238  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
6239  BOOL anchored;  BOOL anchored;
6240  BOOL startline;  BOOL startline;
6241  BOOL firstline;  BOOL firstline;
6242  BOOL first_byte_caseless = FALSE;  BOOL utf;
6243  BOOL req_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
6244  BOOL utf8;  BOOL has_req_char = FALSE;
6245    pcre_uchar first_char = 0;
6246    pcre_uchar first_char2 = 0;
6247    pcre_uchar req_char = 0;
6248    pcre_uchar req_char2 = 0;
6249  match_data match_block;  match_data match_block;
6250  match_data *md = &match_block;  match_data *md = &match_block;
6251  const uschar *tables;  const pcre_uint8 *tables;
6252  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
6253  USPTR start_match = (USPTR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6254  USPTR end_subject;  PCRE_PUCHAR end_subject;
6255  USPTR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6256  USPTR req_byte_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6257    
 pcre_study_data internal_study;  
6258  const pcre_study_data *study;  const pcre_study_data *study;
6259    const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6260    
6261  real_pcre internal_re;  /* Check for the special magic call that measures the size of the stack used
6262  const real_pcre *external_re = (const real_pcre *)argument_re;  per recursive call of match(). Without the funny casting for sizeof, a Windows
6263  const real_pcre *re = external_re;  compiler gave this error: "unary minus operator applied to unsigned type,
6264    result still unsigned". Hopefully the cast fixes that. */
6265    
6266    if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6267        start_offset == -999)
6268    #ifdef NO_RECURSE
6269      return -((int)sizeof(heapframe));
6270    #else
6271      return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6272    #endif
6273    
6274  /* Plausibility checks */  /* Plausibility checks */
6275    
6276  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6277  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6278     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6279  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6280  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6281    
6282    /* Check that the first field in the block is the magic number. If it is not,
6283    return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6284    REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6285    means that the pattern is likely compiled with different endianness. */
6286    
6287    if (re->magic_number != MAGIC_NUMBER)
6288      return re->magic_number == REVERSED_MAGIC_NUMBER?
6289        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6290    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6291    
6292  /* These two settings are used in the code for checking a UTF-8 string that  /* These two settings are used in the code for checking a UTF-8 string that
6293  follows immediately afterwards. Other values in the md block are used only  follows immediately afterwards. Other values in the md block are used only
6294  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
6295  so they are set up later. */  so they are set up later. */
6296    
6297  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6298    utf = md->utf = (re->options & PCRE_UTF8) != 0;
6299  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6300                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6301    
6302  /* Check a UTF-8 string if required. Pass back the character offset and error  /* Check a UTF-8 string if required. Pass back the character offset and error
6303  code for an invalid string if a results vector is available. */  code for an invalid string if a results vector is available. */
6304    
6305  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6306  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6307    {    {
6308    int erroroffset;    int erroroffset;
6309    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6310    if (errorcode != 0)    if (errorcode != 0)
6311      {      {
6312      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 5988  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6314  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6314        offsets[0] = erroroffset;        offsets[0] = erroroffset;
6315        offsets[1] = errorcode;        offsets[1] = errorcode;
6316        }        }
6317    #ifdef COMPILE_PCRE16
6318        return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6319          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6320    #else
6321      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6322        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6323    #endif
6324      }      }
6325    
6326    /* Check that a start_offset points to the start of a UTF-8 character. */    /* Check that a start_offset points to the start of a UTF character. */
6327    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
6328        (((USPTR)subject)[start_offset] & 0xc0) == 0x80)        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6329      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
6330    }    }
6331  #endif  #endif
# Line 6002  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6333  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6333  /* If the pattern was successfully studied with JIT support, run the JIT  /* If the pattern was successfully studied with JIT support, run the JIT
6334  executable instead of the rest of this function. Most options must be set at  executable instead of the rest of this function. Most options must be set at
6335  compile time for the JIT code to be usable. Fallback to the normal code path if  compile time for the JIT code to be usable. Fallback to the normal code path if
6336  an unsupported flag is set. In particular, JIT does not support partial  an unsupported flag is set. */
 matching. */  
6337    
6338  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
6339  if (extra_data != NULL  if (extra_data != NULL
6340      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6341                                 PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6342      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
     && (extra_data->flags & PCRE_EXTRA_TABLES) == 0  
6343      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6344                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6345    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,                      PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6346      start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)    {
6347      rc = PRIV(jit_exec)(re, extra_data->executable_jit,
6348        (const pcre_uchar *)subject, length, start_offset, options,
6349        ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6350      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6351    
6352      /* PCRE_ERROR_NULL means that the selected normal or partial matching
6353      mode is not compiled. In this case we simply fallback to interpreter. */
6354    
6355      if (rc != PCRE_ERROR_NULL) return rc;
6356      }
6357  #endif  #endif
6358    
6359  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
6360  numbers associated with a given name, for condition testing. */  numbers associated with a given name, for condition testing. */
6361    
6362  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (pcre_uchar *)re + re->name_table_offset;
6363  md->name_count = re->name_count;  md->name_count = re->name_count;
6364  md->name_entry_size = re->name_entry_size;  md->name_entry_size = re->name_entry_size;
6365    
# Line 6034  md->callout_data = NULL; Line 6373  md->callout_data = NULL;
6373    
6374  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
6375    
6376  tables = external_re->tables;  tables = re->tables;
6377    
6378  if (extra_data != NULL)  if (extra_data != NULL)
6379    {    {
# Line 6054  if (extra_data != NULL) Line 6393  if (extra_data != NULL)
6393  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6394  in other programs later. */  in other programs later. */
6395    
6396  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
   
 /* Check that the first field in the block is the magic number. If it is not,  
 test for a regex that was compiled on a host of opposite endianness. If this is  
 the case, flipped values are put in internal_re and internal_study if there was  
 study data too. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   {  
   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);  
   if (re == NULL) return PCRE_ERROR_BADMAGIC;  
   if (study != NULL) study = &internal_study;  
   }  
6397    
6398  /* Set up other data */  /* Set up other data */
6399    
# Line 6076  firstline = (re->options & PCRE_FIRSTLIN Line 6403  firstline = (re->options & PCRE_FIRSTLIN
6403    
6404  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6405    
6406  md->start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6407    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6408    
6409  md->start_subject = (USPTR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
6410  md->start_offset = start_offset;  md->start_offset = start_offset;
6411  md->end_subject = md->start_subject + length;  md->end_subject = md->start_subject + length;
6412  end_subject = md->end_subject;  end_subject = md->end_subject;
# Line 6104  md->recursive = NULL; Line 6431  md->recursive = NULL;
6431  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6432    
6433  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6434    md->fcc = tables + fcc_offset;
6435  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
6436    
6437  /* Handle different \R options. */  /* Handle different \R options. */
# Line 6190  arg_offset_max = (2*ocount)/3; Line 6518  arg_offset_max = (2*ocount)/3;
6518  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6519    {    {
6520    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
6521    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6522    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6523    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
6524    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
# Line 6217  if (md->offset_vector != NULL) Line 6545  if (md->offset_vector != NULL)
6545    md->offset_vector[0] = md->offset_vector[1] = -1;    md->offset_vector[0] = md->offset_vector[1] = -1;
6546    }    }
6547    
6548  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_char value is
6549  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
6550  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
6551  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
# Line 6227  if (!anchored) Line 6555  if (!anchored)
6555    {    {
6556    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6557      {      {
6558      first_byte = re->first_byte & 255;      has_first_char = TRUE;
6559      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
6560        first_byte = md->lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6561          {
6562          first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6563    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6564          if (utf && first_char > 127)
6565            first_char2 = UCD_OTHERCASE(first_char);
6566    #endif
6567          }
6568      }      }
6569    else    else
6570      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 6242  character" set. */ Line 6577  character" set. */
6577    
6578  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6579    {    {
6580    req_byte = re->req_byte & 255;    has_req_char = TRUE;
6581    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
6582    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6583        {
6584        req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6585    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6586        if (utf && req_char > 127)
6587          req_char2 = UCD_OTHERCASE(req_char);
6588    #endif
6589        }
6590    }    }
6591    
6592    
   
   
6593  /* ==========================================================================*/  /* ==========================================================================*/
6594    
6595  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6257  the loop runs just once. */ Line 6597  the loop runs just once. */
6597    
6598  for(;;)  for(;;)
6599    {    {
6600    USPTR save_end_subject = end_subject;    PCRE_PUCHAR save_end_subject = end_subject;
6601    USPTR new_start_match;    PCRE_PUCHAR new_start_match;
6602    
6603    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6604    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
# Line 6268  for(;;) Line 6608  for(;;)
6608    
6609    if (firstline)    if (firstline)
6610      {      {
6611      USPTR t = start_match;      PCRE_PUCHAR t = start_match;
6612  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6613      if (utf8)      if (utf)
6614        {        {
6615        while (t < md->end_subject && !IS_NEWLINE(t))        while (t < md->end_subject && !IS_NEWLINE(t))
6616          {          {
6617          t++;          t++;
6618          while (t < end_subject && (*t & 0xc0) == 0x80) t++;          ACROSSCHAR(t < end_subject, *t, t++);
6619          }          }
6620        }        }
6621      else      else
# Line 6292  for(;;) Line 6632  for(;;)
6632    
6633    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6634      {      {
6635      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first char if there is one. */
6636    
6637      if (first_byte >= 0)      if (has_first_char)
6638        {        {
6639        if (first_byte_caseless)        if (first_char != first_char2)
6640          while (start_match < end_subject && md->lcc[*start_match] != first_byte)          while (start_match < end_subject &&
6641                *start_match != first_char && *start_match != first_char2)
6642            start_match++;            start_match++;
6643        else        else
6644          while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && *start_match != first_char)
6645            start_match++;            start_match++;
6646        }        }
6647    
# Line 6310  for(;;) Line 6651  for(;;)
6651        {        {
6652        if (start_match > md->start_subject + start_offset)        if (start_match > md->start_subject + start_offset)
6653          {          {
6654  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6655          if (utf8)          if (utf)
6656            {            {
6657            while (start_match < end_subject && !WAS_NEWLINE(start_match))            while (start_match < end_subject && !WAS_NEWLINE(start_match))
6658              {              {
6659              start_match++;              start_match++;
6660              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              ACROSSCHAR(start_match < end_subject, *start_match,
6661                start_match++;                start_match++);
6662              }              }
6663            }            }
6664          else          else
# Line 6344  for(;;) Line 6685  for(;;)
6685        while (start_match < end_subject)        while (start_match < end_subject)
6686          {          {
6687          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6688    #ifndef COMPILE_PCRE8
6689            if (c > 255) c = 255;
6690    #endif
6691          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6692            {            {
6693            start_match++;            start_match++;
6694  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6695            if (utf8)            /* In non 8-bit mode, the iteration will stop for
6696              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)            characters > 255 at the beginning or not stop at all. */
6697                start_match++;            if (utf)
6698                ACROSSCHAR(start_match < end_subject, *start_match,
6699                  start_match++);
6700  #endif  #endif
6701            }            }
6702          else break;          else break;
# Line 6379  for(;;) Line 6725  for(;;)
6725        break;        break;
6726        }        }
6727    
6728      /* If req_byte is set, we know that that character must appear in the      /* If req_char is set, we know that that character must appear in the
6729      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_char
6730      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
6731      This optimization can save a huge amount of backtracking in patterns with      This optimization can save a huge amount of backtracking in patterns with
6732      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
# Line 6393  for(;;) Line 6739  for(;;)
6739      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
6740      long. */      long. */
6741    
6742      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6743        {        {
6744        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);        register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6745    
6746        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
6747        place we found it at last time. */        place we found it at last time. */
6748    
6749        if (p > req_byte_ptr)        if (p > req_char_ptr)
6750          {          {
6751          if (req_byte_caseless)          if (req_char != req_char2)
6752            {            {
6753            while (p < end_subject)            while (p < end_subject)
6754              {              {
6755              register int pp = *p++;              register int pp = *p++;
6756              if (pp == req_byte || pp == req_byte2) { p--; break; }              if (pp == req_char || pp == req_char2) { p--; break; }
6757              }              }
6758            }            }
6759          else          else
6760            {            {
6761            while (p < end_subject)            while (p < end_subject)
6762              {              {
6763              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_char) { p--; break; }
6764              }              }
6765            }            }
6766    
# Line 6431  for(;;) Line 6777  for(;;)
6777          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
6778          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
6779    
6780          req_byte_ptr = p;          req_char_ptr = p;
6781          }          }
6782        }        }
6783      }      }
# Line 6486  for(;;) Line 6832  for(;;)
6832      case MATCH_THEN:      case MATCH_THEN:
6833      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = FALSE;
6834      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6835  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6836      if (utf8)      if (utf)
6837        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)        ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6838          new_start_match++;          new_start_match++);
6839  #endif  #endif
6840      break;      break;
6841    
# Line 6527  for(;;) Line 6873  for(;;)
6873    
6874    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
6875    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
6876    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. In
6877      normal matching start_match will aways be greater than the first position at
6878      this stage, but a failed *SKIP can cause a return at the same point, which is
6879      why the first test exists. */
6880    
6881    if (start_match[-1] == CHAR_CR &&    if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6882          start_match[-1] == CHAR_CR &&
6883        start_match < end_subject &&        start_match < end_subject &&
6884        *start_match == CHAR_NL &&        *start_match == CHAR_NL &&
6885        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
# Line 6575  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6925  if (rc == MATCH_MATCH || rc == MATCH_ACC
6925        }        }
6926      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6927      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6928      (pcre_free)(md->offset_vector);      (PUBL(free))(md->offset_vector);
6929      }      }
6930    
6931    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
# Line 6616  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6966  if (rc == MATCH_MATCH || rc == MATCH_ACC
6966    /* Return MARK data if requested */    /* Return MARK data if requested */
6967    
6968    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6969      *(extra_data->mark) = (unsigned char *)(md->mark);      *(extra_data->mark) = (pcre_uchar *)md->mark;
6970    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
6971    return rc;    return rc;
6972    }    }
# Line 6627  attempt has failed at all permitted star Line 6977  attempt has failed at all permitted star
6977  if (using_temporary_offsets)  if (using_temporary_offsets)
6978    {    {
6979    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
6980    (pcre_free)(md->offset_vector);    (PUBL(free))(md->offset_vector);
6981    }    }
6982    
6983  /* For anything other than nomatch or partial match, just return the code. */  /* For anything other than nomatch or partial match, just return the code. */
# Line 6646  if (start_partial != NULL) Line 6996  if (start_partial != NULL)
6996    md->mark = NULL;    md->mark = NULL;
6997    if (offsetcount > 1)    if (offsetcount > 1)
6998      {      {
6999      offsets[0] = (int)(start_partial - (USPTR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7000      offsets[1] = (int)(end_subject - (USPTR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7001      }      }
7002    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7003    }    }
# Line 6663  else Line 7013  else
7013  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
7014    
7015  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7016    *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);    *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7017  return rc;  return rc;
7018  }  }
7019    

Legend:
Removed from v.788  
changed lines
  Added in v.916

  ViewVC Help
Powered by ViewVC 1.1.5