/[pcre]/code/branches/pcre16/pcre_exec.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_exec.c revision 723 by ph10, Sat Oct 8 15:55:23 2011 UTC code/branches/pcre16/pcre_exec.c revision 795 by zherczeg, Sat Dec 10 02:20:06 2011 UTC
# Line 121  Returns:     nothing Line 121  Returns:     nothing
121  */  */
122    
123  static void  static void
124  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
125  {  {
126  unsigned int c;  unsigned int c;
127  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 152  Returns:      < 0 if not matched, otherw Line 152  Returns:      < 0 if not matched, otherw
152  */  */
153    
154  static int  static int
155  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
156    BOOL caseless)    BOOL caseless)
157  {  {
158  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
159  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
160    
161  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
162  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 181  ASCII characters. */ Line 181  ASCII characters. */
181    
182  if (caseless)  if (caseless)
183    {    {
184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
185  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
186    if (md->utf8)    if (md->utf)
187      {      {
188      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
189      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
# Line 193  if (caseless) Line 193  if (caseless)
193      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
194      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
195    
196      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
197      while (p < endptr)      while (p < endptr)
198        {        {
199        int c, d;        int c, d;
# Line 354  typedef struct heapframe { Line 354  typedef struct heapframe {
354    
355    /* Function arguments that may change */    /* Function arguments that may change */
356    
357    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
358    const uschar *Xecode;    const pcre_uchar *Xecode;
359    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
360    USPTR Xmarkptr;    PCRE_PUCHAR Xmarkptr;
361    int Xoffset_top;    int Xoffset_top;
362    eptrblock *Xeptrb;    eptrblock *Xeptrb;
363    unsigned int Xrdepth;    unsigned int Xrdepth;
364    
365    /* Function local variables */    /* Function local variables */
366    
367    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
369    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
370  #endif  #endif
371    USPTR Xdata;    PCRE_PUCHAR Xdata;
372    USPTR Xnext;    PCRE_PUCHAR Xnext;
373    USPTR Xpp;    PCRE_PUCHAR Xpp;
374    USPTR Xprev;    PCRE_PUCHAR Xprev;
375    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
376    
377    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
378    
# Line 385  typedef struct heapframe { Line 385  typedef struct heapframe {
385    int Xprop_value;    int Xprop_value;
386    int Xprop_fail_result;    int Xprop_fail_result;
387    int Xoclength;    int Xoclength;
388    uschar Xocchars[8];    pcre_uchar Xocchars[6];
389  #endif  #endif
390    
391    int Xcodelink;    int Xcodelink;
# Line 450  the subject. */ Line 450  the subject. */
450    
451    
452  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
453  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
454  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
455  made performance worse.  made performance worse.
456    
# Line 474  Returns:       MATCH_MATCH if matched Line 474  Returns:       MATCH_MATCH if matched
474  */  */
475    
476  static int  static int
477  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
478    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, const pcre_uchar *markptr, int offset_top,
479    unsigned int rdepth)    match_data *md, eptrblock *eptrb, unsigned int rdepth)
480  {  {
481  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
482  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 485  so they can be ordinary variables in all Line 485  so they can be ordinary variables in all
485  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
486  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
487  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
488  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
489    
490  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
491  BOOL caseless;  BOOL caseless;
# Line 527  HEAP_RECURSE: Line 527  HEAP_RECURSE:
527    
528  /* Ditto for the local variables */  /* Ditto for the local variables */
529    
530  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
531  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
532  #endif  #endif
533  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 585  declarations can be cut out in a block. Line 585  declarations can be cut out in a block.
585  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
586  to RMATCH(). */  to RMATCH(). */
587    
588  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
589  const uschar *charptr;  const pcre_uchar *charptr;
590  #endif  #endif
591  const uschar *callpat;  const pcre_uchar *callpat;
592  const uschar *data;  const pcre_uchar *data;
593  const uschar *next;  const pcre_uchar *next;
594  USPTR         pp;  PCRE_PUCHAR       pp;
595  const uschar *prev;  const pcre_uchar *prev;
596  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
597    
598  recursion_info new_recursive;  recursion_info new_recursive;
599    
# Line 606  int prop_type; Line 606  int prop_type;
606  int prop_value;  int prop_value;
607  int prop_fail_result;  int prop_fail_result;
608  int oclength;  int oclength;
609  uschar occhars[8];  pcre_uchar occhars[6];
610  #endif  #endif
611    
612  int codelink;  int codelink;
# Line 634  the alternative names that are used. */ Line 634  the alternative names that are used. */
634  #define code_offset   codelink  #define code_offset   codelink
635  #define condassert    condition  #define condassert    condition
636  #define matched_once  prev_is_word  #define matched_once  prev_is_word
637    #define foc           number
638    
639  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
640  variables. */  variables. */
# Line 659  defined). However, RMATCH isn't like a f Line 660  defined). However, RMATCH isn't like a f
660  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
661  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
662    
663  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
664  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
665  #else  #else
666  utf8 = FALSE;  utf = FALSE;
667  #endif  #endif
668    
669  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 702  for (;;) Line 703  for (;;)
703      {      {
704      case OP_MARK:      case OP_MARK:
705      markptr = ecode + 2;      markptr = ecode + 2;
706      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
707        eptrb, RM55);        eptrb, RM55);
708    
709      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
# Line 713  for (;;) Line 714  for (;;)
714      unaltered. */      unaltered. */
715    
716      if (rrc == MATCH_SKIP_ARG &&      if (rrc == MATCH_SKIP_ARG &&
717          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(markptr, md->start_match_ptr) == 0)
718        {        {
719        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
720        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
# Line 728  for (;;) Line 729  for (;;)
729      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
730    
731      case OP_COMMIT:      case OP_COMMIT:
732      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
733        eptrb, RM52);        eptrb, RM52);
734      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
735          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
# Line 739  for (;;) Line 740  for (;;)
740      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
741    
742      case OP_PRUNE:      case OP_PRUNE:
743      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
744        eptrb, RM51);        eptrb, RM51);
745      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
746      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
747    
748      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
749      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
750        eptrb, RM56);        eptrb, RM56);
751      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
752      md->mark = ecode + 2;      md->mark = ecode + 2;
# Line 754  for (;;) Line 755  for (;;)
755      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
756    
757      case OP_SKIP:      case OP_SKIP:
758      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
759        eptrb, RM53);        eptrb, RM53);
760      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
761        RRETURN(rrc);        RRETURN(rrc);
# Line 762  for (;;) Line 763  for (;;)
763      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
764    
765      case OP_SKIP_ARG:      case OP_SKIP_ARG:
766      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
767        eptrb, RM57);        eptrb, RM57);
768      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
769        RRETURN(rrc);        RRETURN(rrc);
# Line 780  for (;;) Line 781  for (;;)
781      match pointer to do this. */      match pointer to do this. */
782    
783      case OP_THEN:      case OP_THEN:
784      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
785        eptrb, RM54);        eptrb, RM54);
786      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
787      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
788      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
789    
790      case OP_THEN_ARG:      case OP_THEN_ARG:
791      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
792        md, eptrb, RM58);        md, eptrb, RM58);
793      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
794      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
795      md->mark = ecode + 2;      md->mark = ecode + 2;
796      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
797    
798      /* Handle an atomic group that does not contain any capturing parentheses.      /* Handle an atomic group that does not contain any capturing parentheses.
799      This can be handled like an assertion. Prior to 8.13, all atomic groups      This can be handled like an assertion. Prior to 8.13, all atomic groups
800      were handled this way. In 8.13, the code was changed as below for ONCE, so      were handled this way. In 8.13, the code was changed as below for ONCE, so
801      that backups pass through the group and thereby reset captured values.      that backups pass through the group and thereby reset captured values.
802      However, this uses a lot more stack, so in 8.20, atomic groups that do not      However, this uses a lot more stack, so in 8.20, atomic groups that do not
803      contain any captures generate OP_ONCE_NC, which can be handled in the old,      contain any captures generate OP_ONCE_NC, which can be handled in the old,
804      less stack intensive way.      less stack intensive way.
805    
806      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
# Line 816  for (;;) Line 817  for (;;)
817        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
818          {          {
819          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
820            markptr = md->mark;
821          break;          break;
822          }          }
823        if (rrc == MATCH_THEN)        if (rrc == MATCH_THEN)
824          {          {
825          next = ecode + GET(ecode,1);          next = ecode + GET(ecode,1);
826          if (md->start_match_ptr < next &&          if (md->start_match_ptr < next &&
827              (*ecode == OP_ALT || *next == OP_ALT))              (*ecode == OP_ALT || *next == OP_ALT))
828            rrc = MATCH_NOMATCH;            rrc = MATCH_NOMATCH;
829          }          }
830    
831        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
832        ecode += GET(ecode,1);        ecode += GET(ecode,1);
833        }        }
# Line 867  for (;;) Line 869  for (;;)
869        }        }
870      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
871        {        {
872        md->match_function_type = MATCH_CBEGROUP;        md->match_function_type = MATCH_CBEGROUP;
873        RMATCH(eptr, prev, offset_top, md, eptrb, RM66);        RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
874        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
875        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
# Line 915  for (;;) Line 917  for (;;)
917        for (;;)        for (;;)
918          {          {
919          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
920          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
921            eptrb, RM1);            eptrb, RM1);
922          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
923    
924          /* If we backed up to a THEN, check whether it is within the current          /* If we backed up to a THEN, check whether it is within the current
925          branch by comparing the address of the THEN that is passed back with          branch by comparing the address of the THEN that is passed back with
926          the end of the branch. If it is within the current branch, and the          the end of the branch. If it is within the current branch, and the
927          branch is one of two or more alternatives (it either starts or ends          branch is one of two or more alternatives (it either starts or ends
928          with OP_ALT), we have reached the limit of THEN's action, so convert          with OP_ALT), we have reached the limit of THEN's action, so convert
929          the return code to NOMATCH, which will cause normal backtracking to          the return code to NOMATCH, which will cause normal backtracking to
930          happen from now on. Otherwise, THEN is passed back to an outer          happen from now on. Otherwise, THEN is passed back to an outer
931          alternative. This implements Perl's treatment of parenthesized groups,          alternative. This implements Perl's treatment of parenthesized groups,
932          where a group not containing | does not affect the current alternative,          where a group not containing | does not affect the current alternative,
933          that is, (X) is NOT the same as (X|(*F)). */          that is, (X) is NOT the same as (X|(*F)). */
934    
935          if (rrc == MATCH_THEN)          if (rrc == MATCH_THEN)
936            {            {
937            next = ecode + GET(ecode,1);            next = ecode + GET(ecode,1);
938            if (md->start_match_ptr < next &&            if (md->start_match_ptr < next &&
939                (*ecode == OP_ALT || *next == OP_ALT))                (*ecode == OP_ALT || *next == OP_ALT))
940              rrc = MATCH_NOMATCH;              rrc = MATCH_NOMATCH;
941            }            }
942    
943          /* Anything other than NOMATCH is passed back. */          /* Anything other than NOMATCH is passed back. */
944    
945          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 1003  for (;;) Line 1005  for (;;)
1005    
1006        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1007          {          {
1008          ecode += _pcre_OP_lengths[*ecode];          ecode += PRIV(OP_lengths)[*ecode];
1009          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1010          }          }
1011    
1012        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1013    
1014        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1015          RM2);          RM2);
1016    
1017        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1018        THEN. */        THEN. */
1019    
1020        if (rrc == MATCH_THEN)        if (rrc == MATCH_THEN)
1021          {          {
1022          next = ecode + GET(ecode,1);          next = ecode + GET(ecode,1);
1023          if (md->start_match_ptr < next &&          if (md->start_match_ptr < next &&
1024              (*ecode == OP_ALT || *next == OP_ALT))              (*ecode == OP_ALT || *next == OP_ALT))
1025            rrc = MATCH_NOMATCH;            rrc = MATCH_NOMATCH;
1026          }          }
1027    
1028        if (rrc != MATCH_NOMATCH)        if (rrc != MATCH_NOMATCH)
1029          {          {
1030          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1031            {            {
1032            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1033            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1034              {              {
1035              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 1040  for (;;) Line 1042  for (;;)
1042        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1043        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1044        }        }
1045    
1046      if (md->mark == NULL) md->mark = markptr;      if (md->mark == NULL) md->mark = markptr;
1047      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1048    
# Line 1093  for (;;) Line 1095  for (;;)
1095          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1096            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1097          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1098          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1099            eptrb, RM63);            eptrb, RM63);
1100          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1101            {            {
# Line 1104  for (;;) Line 1106  for (;;)
1106            matched_once = TRUE;            matched_once = TRUE;
1107            continue;            continue;
1108            }            }
1109    
1110          /* See comment in the code for capturing groups above about handling          /* See comment in the code for capturing groups above about handling
1111          THEN. */          THEN. */
1112    
1113          if (rrc == MATCH_THEN)          if (rrc == MATCH_THEN)
1114            {            {
1115            next = ecode + GET(ecode,1);            next = ecode + GET(ecode,1);
1116            if (md->start_match_ptr < next &&            if (md->start_match_ptr < next &&
1117                (*ecode == OP_ALT || *next == OP_ALT))                (*ecode == OP_ALT || *next == OP_ALT))
1118              rrc = MATCH_NOMATCH;              rrc = MATCH_NOMATCH;
1119            }            }
1120    
1121          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1122          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
# Line 1166  for (;;) Line 1168  for (;;)
1168      for (;;)      for (;;)
1169        {        {
1170        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1171        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1172          eptrb, RM48);          eptrb, RM48);
1173        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1174          {          {
# Line 1176  for (;;) Line 1178  for (;;)
1178          matched_once = TRUE;          matched_once = TRUE;
1179          continue;          continue;
1180          }          }
1181    
1182        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1183        THEN. */        THEN. */
1184    
1185        if (rrc == MATCH_THEN)        if (rrc == MATCH_THEN)
1186          {          {
1187          next = ecode + GET(ecode,1);          next = ecode + GET(ecode,1);
1188          if (md->start_match_ptr < next &&          if (md->start_match_ptr < next &&
1189              (*ecode == OP_ALT || *next == OP_ALT))              (*ecode == OP_ALT || *next == OP_ALT))
1190            rrc = MATCH_NOMATCH;            rrc = MATCH_NOMATCH;
1191          }          }
1192    
1193        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1194        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 1231  for (;;) Line 1233  for (;;)
1233          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1234          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1235          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1236          cb.mark             = markptr;          cb.mark             = (unsigned char *)markptr;
1237          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1238          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1239          }          }
1240        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1241        }        }
1242    
1243      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1252  for (;;) Line 1254  for (;;)
1254        else        else
1255          {          {
1256          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1257          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1258    
1259          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
1260          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
1261          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
1262          if any one is set. */          if any one is set. */
1263    
1264          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF)
1265            {            {
1266            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1267            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1268              {              {
1269              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1274  for (;;) Line 1276  for (;;)
1276    
1277            if (i < md->name_count)            if (i < md->name_count)
1278              {              {
1279              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1280              while (slotB > md->name_table)              while (slotB > md->name_table)
1281                {                {
1282                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1283                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1284                  {                  {
1285                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1286                  if (condition) break;                  if (condition) break;
# Line 1294  for (;;) Line 1296  for (;;)
1296                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1297                  {                  {
1298                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1299                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1300                    {                    {
1301                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1302                    if (condition) break;                    if (condition) break;
# Line 1307  for (;;) Line 1309  for (;;)
1309    
1310          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1311    
1312          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1313          }          }
1314        }        }
1315    
# Line 1324  for (;;) Line 1326  for (;;)
1326        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1327          {          {
1328          int refno = offset >> 1;          int refno = offset >> 1;
1329          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1330    
1331          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1332            {            {
# Line 1338  for (;;) Line 1340  for (;;)
1340    
1341          if (i < md->name_count)          if (i < md->name_count)
1342            {            {
1343            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1344            while (slotB > md->name_table)            while (slotB > md->name_table)
1345              {              {
1346              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1347              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1348                {                {
1349                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1350                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1360  for (;;) Line 1362  for (;;)
1362              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1363                {                {
1364                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1365                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1366                  {                  {
1367                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1368                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1375  for (;;) Line 1377  for (;;)
1377    
1378        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1379    
1380        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1381        }        }
1382    
1383      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1400  for (;;) Line 1402  for (;;)
1402          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1403          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1404          }          }
1405    
1406        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1407        assertion; it is therefore treated as NOMATCH. */        assertion; it is therefore treated as NOMATCH. */
1408    
1409        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1410          {          {
1411          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1412          }          }
# Line 1432  for (;;) Line 1434  for (;;)
1434          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
1435          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1436          }          }
1437    
1438        md->match_function_type = MATCH_CBEGROUP;        md->match_function_type = MATCH_CBEGROUP;
1439        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1440        RRETURN(rrc);        RRETURN(rrc);
# Line 1467  for (;;) Line 1469  for (;;)
1469        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1470        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1471        }        }
1472      ecode += 3;      ecode += 1 + IMM2_SIZE;
1473      break;      break;
1474    
1475    
# Line 1530  for (;;) Line 1532  for (;;)
1532          markptr = md->mark;          markptr = md->mark;
1533          break;          break;
1534          }          }
1535    
1536        /* PCRE does not allow THEN to escape beyond an assertion; it is treated        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1537        as NOMATCH. */        as NOMATCH. */
1538    
1539        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1540        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1541        }        }
# Line 1576  for (;;) Line 1578  for (;;)
1578          break;          break;
1579          }          }
1580    
1581        /* PCRE does not allow THEN to escape beyond an assertion; it is treated        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1582        as NOMATCH. */        as NOMATCH. */
1583    
1584        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
# Line 1595  for (;;) Line 1597  for (;;)
1597      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1598    
1599      case OP_REVERSE:      case OP_REVERSE:
1600  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1601      if (utf8)      if (utf)
1602        {        {
1603        i = GET(ecode, 1);        i = GET(ecode, 1);
1604        while (i-- > 0)        while (i-- > 0)
# Line 1642  for (;;) Line 1644  for (;;)
1644        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1645        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1646        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1647        cb.mark             = markptr;        cb.mark             = (unsigned char *)markptr;
1648        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1649        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1650        }        }
# Line 1717  for (;;) Line 1719  for (;;)
1719        do        do
1720          {          {
1721          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1722          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1723            md, eptrb, RM6);            md, eptrb, RM6);
1724          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1725              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
# Line 1740  for (;;) Line 1742  for (;;)
1742          /* PCRE does not allow THEN to escape beyond a recursion; it is treated          /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1743          as NOMATCH. */          as NOMATCH. */
1744    
1745          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1746            {            {
1747            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1748            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1826  for (;;) Line 1828  for (;;)
1828        }        }
1829      else saved_eptr = NULL;      else saved_eptr = NULL;
1830    
1831      /* If we are at the end of an assertion group or a non-capturing atomic      /* If we are at the end of an assertion group or a non-capturing atomic
1832      group, stop matching and return MATCH_MATCH, but record the current high      group, stop matching and return MATCH_MATCH, but record the current high
1833      water mark for use by positive assertions. We also need to record the match      water mark for use by positive assertions. We also need to record the match
1834      start in case it was changed by \K. */      start in case it was changed by \K. */
1835    
1836      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1837           *prev == OP_ONCE_NC)           *prev == OP_ONCE_NC)
1838        {        {
1839        md->end_match_ptr = eptr;      /* For ONCE_NC */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1840        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
# Line 2068  for (;;) Line 2070  for (;;)
2070        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2071        partial matching. */        partial matching. */
2072    
2073  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2074        if (utf8)        if (utf)
2075          {          {
2076          /* Get status of previous character */          /* Get status of previous character */
2077    
2078          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2079            {            {
2080            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2081            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2082            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2083            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2084  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2188  for (;;) Line 2190  for (;;)
2190        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2191        }        }
2192      eptr++;      eptr++;
2193      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2194        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2195    #endif
2196      ecode++;      ecode++;
2197      break;      break;
2198    
# Line 2213  for (;;) Line 2217  for (;;)
2217        }        }
2218      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2219      if (      if (
2220  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2221         c < 256 &&         c < 256 &&
2222  #endif  #endif
2223         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
# Line 2230  for (;;) Line 2234  for (;;)
2234        }        }
2235      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2236      if (      if (
2237  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2238         c >= 256 ||         c > 255 ||
2239  #endif  #endif
2240         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2241         )         )
# Line 2247  for (;;) Line 2251  for (;;)
2251        }        }
2252      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2253      if (      if (
2254  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2255         c < 256 &&         c < 256 &&
2256  #endif  #endif
2257         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
# Line 2264  for (;;) Line 2268  for (;;)
2268        }        }
2269      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2270      if (      if (
2271  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2272         c >= 256 ||         c > 255 ||
2273  #endif  #endif
2274         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2275         )         )
# Line 2281  for (;;) Line 2285  for (;;)
2285        }        }
2286      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2287      if (      if (
2288  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2289         c < 256 &&         c < 256 &&
2290  #endif  #endif
2291         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
# Line 2298  for (;;) Line 2302  for (;;)
2302        }        }
2303      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2304      if (      if (
2305  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2306         c >= 256 ||         c > 255 ||
2307  #endif  #endif
2308         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2309         )         )
# Line 2477  for (;;) Line 2481  for (;;)
2481          break;          break;
2482    
2483          case PT_GC:          case PT_GC:
2484          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2485            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2486          break;          break;
2487    
# Line 2494  for (;;) Line 2498  for (;;)
2498          /* These are specials */          /* These are specials */
2499    
2500          case PT_ALNUM:          case PT_ALNUM:
2501          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2502               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2503            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2504          break;          break;
2505    
2506          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2507          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2508               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2509                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2510            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2511          break;          break;
2512    
2513          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2514          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2515               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2516               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2517                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
# Line 2515  for (;;) Line 2519  for (;;)
2519          break;          break;
2520    
2521          case PT_WORD:          case PT_WORD:
2522          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2523               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2524               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2525            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2526          break;          break;
# Line 2545  for (;;) Line 2549  for (;;)
2549      while (eptr < md->end_subject)      while (eptr < md->end_subject)
2550        {        {
2551        int len = 1;        int len = 1;
2552        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2553        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2554        eptr += len;        eptr += len;
2555        }        }
# Line 2566  for (;;) Line 2570  for (;;)
2570      case OP_REFI:      case OP_REFI:
2571      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2572      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2573      ecode += 3;      ecode += 1 + IMM2_SIZE;
2574    
2575      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2576    
# Line 2606  for (;;) Line 2610  for (;;)
2610        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2611        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2612        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2613        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2614        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2615        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2616        break;        break;
2617    
2618        default:               /* No repeat follows */        default:               /* No repeat follows */
# Line 2705  for (;;) Line 2709  for (;;)
2709      case OP_NCLASS:      case OP_NCLASS:
2710      case OP_CLASS:      case OP_CLASS:
2711        {        {
2712          /* The data variable is saved across frames, so the byte map needs to
2713          be stored there. */
2714    #define BYTE_MAP ((pcre_uint8 *)data)
2715        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2716        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2717    
2718        switch (*ecode)        switch (*ecode)
2719          {          {
# Line 2727  for (;;) Line 2734  for (;;)
2734          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2735          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2736          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2737          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2738          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2739          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2740          break;          break;
2741    
2742          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2739  for (;;) Line 2746  for (;;)
2746    
2747        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2748    
2749  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2750        /* UTF-8 mode */        if (utf)
       if (utf8)  
2751          {          {
2752          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2753            {            {
# Line 2756  for (;;) Line 2762  for (;;)
2762              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2763              }              }
2764            else            else
2765              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
             }  
2766            }            }
2767          }          }
2768        else        else
2769  #endif  #endif
2770        /* Not UTF-8 mode */        /* Not UTF mode */
2771          {          {
2772          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2773            {            {
# Line 2773  for (;;) Line 2777  for (;;)
2777              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2778              }              }
2779            c = *eptr++;            c = *eptr++;
2780            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2781              if (c > 255)
2782                {
2783                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2784                }
2785              else
2786    #endif
2787                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2788            }            }
2789          }          }
2790    
# Line 2787  for (;;) Line 2798  for (;;)
2798    
2799        if (minimize)        if (minimize)
2800          {          {
2801  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2802          /* UTF-8 mode */          if (utf)
         if (utf8)  
2803            {            {
2804            for (fi = min;; fi++)            for (fi = min;; fi++)
2805              {              {
# Line 2807  for (;;) Line 2817  for (;;)
2817                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2818                }                }
2819              else              else
2820                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
               }  
2821              }              }
2822            }            }
2823          else          else
2824  #endif  #endif
2825          /* Not UTF-8 mode */          /* Not UTF mode */
2826            {            {
2827            for (fi = min;; fi++)            for (fi = min;; fi++)
2828              {              {
# Line 2827  for (;;) Line 2835  for (;;)
2835                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
2836                }                }
2837              c = *eptr++;              c = *eptr++;
2838              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2839                if (c > 255)
2840                  {
2841                  if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2842                  }
2843                else
2844    #endif
2845                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2846              }              }
2847            }            }
2848          /* Control never gets here */          /* Control never gets here */
# Line 2839  for (;;) Line 2854  for (;;)
2854          {          {
2855          pp = eptr;          pp = eptr;
2856    
2857  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2858          /* UTF-8 mode */          if (utf)
         if (utf8)  
2859            {            {
2860            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2861              {              {
# Line 2857  for (;;) Line 2871  for (;;)
2871                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2872                }                }
2873              else              else
2874                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2875              eptr += len;              eptr += len;
2876              }              }
2877            for (;;)            for (;;)
# Line 2872  for (;;) Line 2884  for (;;)
2884            }            }
2885          else          else
2886  #endif  #endif
2887            /* Not UTF-8 mode */            /* Not UTF mode */
2888            {            {
2889            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2890              {              {
# Line 2882  for (;;) Line 2894  for (;;)
2894                break;                break;
2895                }                }
2896              c = *eptr;              c = *eptr;
2897              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2898                if (c > 255)
2899                  {
2900                  if (op == OP_CLASS) break;
2901                  }
2902                else
2903    #endif
2904                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2905              eptr++;              eptr++;
2906              }              }
2907            while (eptr >= pp)            while (eptr >= pp)
# Line 2895  for (;;) Line 2914  for (;;)
2914    
2915          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2916          }          }
2917    #undef BYTE_MAP
2918        }        }
2919      /* Control never gets here */      /* Control never gets here */
2920    
# Line 2903  for (;;) Line 2923  for (;;)
2923      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2924      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
2925    
2926  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2927      case OP_XCLASS:      case OP_XCLASS:
2928        {        {
2929        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2928  for (;;) Line 2948  for (;;)
2948          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2949          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2950          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2951          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2952          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2953          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2954          break;          break;
2955    
2956          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2948  for (;;) Line 2968  for (;;)
2968            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2969            }            }
2970          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
2971          if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) MRRETURN(MATCH_NOMATCH);
2972          }          }
2973    
2974        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2972  for (;;) Line 2992  for (;;)
2992              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2993              }              }
2994            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2995            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) MRRETURN(MATCH_NOMATCH);
2996            }            }
2997          /* Control never gets here */          /* Control never gets here */
2998          }          }
# Line 2990  for (;;) Line 3010  for (;;)
3010              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3011              break;              break;
3012              }              }
3013    #ifdef SUPPORT_UTF
3014            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3015            if (!_pcre_xclass(c, data)) break;  #else
3016              c = *eptr;
3017    #endif
3018              if (!PRIV(xclass)(c, data, utf)) break;
3019            eptr += len;            eptr += len;
3020            }            }
3021          for(;;)          for(;;)
# Line 2999  for (;;) Line 3023  for (;;)
3023            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3024            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3025            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3026            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3027              if (utf) BACKCHAR(eptr);
3028    #endif
3029            }            }
3030          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3031          }          }
# Line 3011  for (;;) Line 3037  for (;;)
3037      /* Match a single character, casefully */      /* Match a single character, casefully */
3038    
3039      case OP_CHAR:      case OP_CHAR:
3040  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3041      if (utf8)      if (utf)
3042        {        {
3043        length = 1;        length = 1;
3044        ecode++;        ecode++;
# Line 3026  for (;;) Line 3052  for (;;)
3052        }        }
3053      else      else
3054  #endif  #endif
3055        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3056        {        {
3057        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3058          {          {
# Line 3042  for (;;) Line 3067  for (;;)
3067      /* Match a single character, caselessly */      /* Match a single character, caselessly */
3068    
3069      case OP_CHARI:      case OP_CHARI:
3070  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3071      if (utf8)      if (utf)
3072        {        {
3073        length = 1;        length = 1;
3074        ecode++;        ecode++;
# Line 3084  for (;;) Line 3109  for (;;)
3109          }          }
3110        }        }
3111      else      else
3112  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3113    
3114      /* Non-UTF-8 mode */      /* Not UTF mode */
3115        {        {
3116        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3117          {          {
3118          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3119          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3120          }          }
3121        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3122              != TABLE_GET(*eptr, md->lcc, *eptr)) MRRETURN(MATCH_NOMATCH);
3123          eptr++;
3124        ecode += 2;        ecode += 2;
3125        }        }
3126      break;      break;
# Line 3103  for (;;) Line 3130  for (;;)
3130      case OP_EXACT:      case OP_EXACT:
3131      case OP_EXACTI:      case OP_EXACTI:
3132      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3133      ecode += 3;      ecode += 1 + IMM2_SIZE;
3134      goto REPEATCHAR;      goto REPEATCHAR;
3135    
3136      case OP_POSUPTO:      case OP_POSUPTO:
# Line 3118  for (;;) Line 3145  for (;;)
3145      min = 0;      min = 0;
3146      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3147      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3148      ecode += 3;      ecode += 1 + IMM2_SIZE;
3149      goto REPEATCHAR;      goto REPEATCHAR;
3150    
3151      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3166  for (;;) Line 3193  for (;;)
3193      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3194    
3195      REPEATCHAR:      REPEATCHAR:
3196  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3197      if (utf8)      if (utf)
3198        {        {
3199        length = 1;        length = 1;
3200        charptr = ecode;        charptr = ecode;
# Line 3183  for (;;) Line 3210  for (;;)
3210          unsigned int othercase;          unsigned int othercase;
3211          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3212              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3213            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3214          else oclength = 0;          else oclength = 0;
3215  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3216    
3217          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3218            {            {
3219            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3220              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3221  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3222            else if (oclength > 0 &&            else if (oclength > 0 &&
3223                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3224                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3225  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3226            else            else
3227              {              {
# Line 3213  for (;;) Line 3240  for (;;)
3240              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3241              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3242              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3243                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3244  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3245              else if (oclength > 0 &&              else if (oclength > 0 &&
3246                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3247                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3248  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3249              else              else
3250                {                {
# Line 3234  for (;;) Line 3261  for (;;)
3261            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3262              {              {
3263              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3264                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3265  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3266              else if (oclength > 0 &&              else if (oclength > 0 &&
3267                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3268                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3269  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3270              else              else
3271                {                {
# Line 3270  for (;;) Line 3297  for (;;)
3297        value of fc will always be < 128. */        value of fc will always be < 128. */
3298        }        }
3299      else      else
3300  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3301          /* When not in UTF-8 mode, load a single-byte character. */
3302      /* When not in UTF-8 mode, load a single-byte character. */        fc = *ecode++;
   
     fc = *ecode++;  
3303    
3304      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always one character, though we may
3305      may not be in UTF-8 mode. The code is duplicated for the caseless and      or may not be in UTF mode. The code is duplicated for the caseless and
3306      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3307      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3308      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3290  for (;;) Line 3315  for (;;)
3315    
3316      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3317        {        {
3318        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3319          /* fc must be < 128 */
3320          foc = md->fcc[fc];
3321    #else
3322    #ifdef SUPPORT_UTF
3323    #ifdef SUPPORT_UCP
3324          if (utf && fc > 127)
3325            foc = UCD_OTHERCASE(fc);
3326    #else
3327          if (utf && fc > 127)
3328            foc = fc;
3329    #endif /* SUPPORT_UCP */
3330          else
3331    #endif /* SUPPORT_UTF */
3332            foc = TABLE_GET(fc, md->fcc, fc);
3333    #endif /* COMPILE_PCRE8 */
3334    
3335        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3336          {          {
3337          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
# Line 3298  for (;;) Line 3339  for (;;)
3339            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3340            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
3341            }            }
3342          if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) MRRETURN(MATCH_NOMATCH);
3343            eptr++;
3344          }          }
3345        if (min == max) continue;        if (min == max) continue;
3346        if (minimize)        if (minimize)
# Line 3313  for (;;) Line 3355  for (;;)
3355              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3356              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3357              }              }
3358            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) MRRETURN(MATCH_NOMATCH);
3359              eptr++;
3360            }            }
3361          /* Control never gets here */          /* Control never gets here */
3362          }          }
# Line 3327  for (;;) Line 3370  for (;;)
3370              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3371              break;              break;
3372              }              }
3373            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3374            eptr++;            eptr++;
3375            }            }
3376    
# Line 3416  for (;;) Line 3459  for (;;)
3459      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3460      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3461        {        {
3462  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
3463        if (c < 256)        if (c < 256)
3464  #endif  #endif
3465        c = md->lcc[c];          c = md->lcc[c];
3466        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3467        }        }
3468      else    /* Caseful */      else    /* Caseful */
# Line 3438  for (;;) Line 3481  for (;;)
3481      case OP_NOTEXACT:      case OP_NOTEXACT:
3482      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3483      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3484      ecode += 3;      ecode += 1 + IMM2_SIZE;
3485      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3486    
3487      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3448  for (;;) Line 3491  for (;;)
3491      min = 0;      min = 0;
3492      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3493      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3494      ecode += 3;      ecode += 1 + IMM2_SIZE;
3495      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3496    
3497      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3480  for (;;) Line 3523  for (;;)
3523      possessive = TRUE;      possessive = TRUE;
3524      min = 0;      min = 0;
3525      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3526      ecode += 3;      ecode += 1 + IMM2_SIZE;
3527      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3528    
3529      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3519  for (;;) Line 3562  for (;;)
3562    
3563      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3564        {        {
3565        fc = md->lcc[fc];        fc = TABLE_GET(fc, md->lcc, fc);
3566    
3567  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3568        /* UTF-8 mode */        if (utf)
       if (utf8)  
3569          {          {
3570          register unsigned int d;          register unsigned int d;
3571          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3540  for (;;) Line 3582  for (;;)
3582          }          }
3583        else        else
3584  #endif  #endif
3585          /* Not UTF mode */
       /* Not UTF-8 mode */  
3586          {          {
3587          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3588            {            {
# Line 3558  for (;;) Line 3599  for (;;)
3599    
3600        if (minimize)        if (minimize)
3601          {          {
3602  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3603          /* UTF-8 mode */          if (utf)
         if (utf8)  
3604            {            {
3605            register unsigned int d;            register unsigned int d;
3606            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3580  for (;;) Line 3620  for (;;)
3620            }            }
3621          else          else
3622  #endif  #endif
3623          /* Not UTF-8 mode */          /* Not UTF mode */
3624            {            {
3625            for (fi = min;; fi++)            for (fi = min;; fi++)
3626              {              {
# Line 3604  for (;;) Line 3644  for (;;)
3644          {          {
3645          pp = eptr;          pp = eptr;
3646    
3647  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3648          /* UTF-8 mode */          if (utf)
         if (utf8)  
3649            {            {
3650            register unsigned int d;            register unsigned int d;
3651            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3633  for (;;) Line 3672  for (;;)
3672            }            }
3673          else          else
3674  #endif  #endif
3675          /* Not UTF-8 mode */          /* Not UTF mode */
3676            {            {
3677            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3678              {              {
# Line 3663  for (;;) Line 3702  for (;;)
3702    
3703      else      else
3704        {        {
3705  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3706        /* UTF-8 mode */        if (utf)
       if (utf8)  
3707          {          {
3708          register unsigned int d;          register unsigned int d;
3709          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3681  for (;;) Line 3719  for (;;)
3719          }          }
3720        else        else
3721  #endif  #endif
3722        /* Not UTF-8 mode */        /* Not UTF mode */
3723          {          {
3724          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3725            {            {
# Line 3698  for (;;) Line 3736  for (;;)
3736    
3737        if (minimize)        if (minimize)
3738          {          {
3739  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3740          /* UTF-8 mode */          if (utf)
         if (utf8)  
3741            {            {
3742            register unsigned int d;            register unsigned int d;
3743            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3719  for (;;) Line 3756  for (;;)
3756            }            }
3757          else          else
3758  #endif  #endif
3759          /* Not UTF-8 mode */          /* Not UTF mode */
3760            {            {
3761            for (fi = min;; fi++)            for (fi = min;; fi++)
3762              {              {
# Line 3743  for (;;) Line 3780  for (;;)
3780          {          {
3781          pp = eptr;          pp = eptr;
3782    
3783  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3784          /* UTF-8 mode */          if (utf)
         if (utf8)  
3785            {            {
3786            register unsigned int d;            register unsigned int d;
3787            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3771  for (;;) Line 3807  for (;;)
3807            }            }
3808          else          else
3809  #endif  #endif
3810          /* Not UTF-8 mode */          /* Not UTF mode */
3811            {            {
3812            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3813              {              {
# Line 3804  for (;;) Line 3840  for (;;)
3840      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3841      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3842      minimize = TRUE;      minimize = TRUE;
3843      ecode += 3;      ecode += 1 + IMM2_SIZE;
3844      goto REPEATTYPE;      goto REPEATTYPE;
3845    
3846      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3812  for (;;) Line 3848  for (;;)
3848      min = 0;      min = 0;
3849      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3850      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3851      ecode += 3;      ecode += 1 + IMM2_SIZE;
3852      goto REPEATTYPE;      goto REPEATTYPE;
3853    
3854      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3840  for (;;) Line 3876  for (;;)
3876      possessive = TRUE;      possessive = TRUE;
3877      min = 0;      min = 0;
3878      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3879      ecode += 3;      ecode += 1 + IMM2_SIZE;
3880      goto REPEATTYPE;      goto REPEATTYPE;
3881    
3882      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 4047  for (;;) Line 4083  for (;;)
4083            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4084              {              {
4085              int len = 1;              int len = 1;
4086              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4087              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4088              eptr += len;              eptr += len;
4089              }              }
# Line 4059  for (;;) Line 4095  for (;;)
4095    
4096  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4097    
4098  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4099        if (utf8) switch(ctype)        if (utf) switch(ctype)
4100          {          {
4101          case OP_ANY:          case OP_ANY:
4102          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 4072  for (;;) Line 4108  for (;;)
4108              }              }
4109            if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4110            eptr++;            eptr++;
4111            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4112            }            }
4113          break;          break;
4114    
# Line 4085  for (;;) Line 4121  for (;;)
4121              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4122              }              }
4123            eptr++;            eptr++;
4124            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4125            }            }
4126          break;          break;
4127    
# Line 4283  for (;;) Line 4319  for (;;)
4319              }              }
4320            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4321              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4322            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4323              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4324            }            }
4325          break;          break;
4326    
# Line 4311  for (;;) Line 4348  for (;;)
4348              }              }
4349            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4350              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4351            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4352              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4353            }            }
4354          break;          break;
4355    
# Line 4334  for (;;) Line 4372  for (;;)
4372          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4373    
4374        else        else
4375  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4376    
4377        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4378        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4768  for (;;) Line 4806  for (;;)
4806            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4807              {              {
4808              int len = 1;              int len = 1;
4809              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4810              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4811              eptr += len;              eptr += len;
4812              }              }
# Line 4777  for (;;) Line 4815  for (;;)
4815        else        else
4816  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
4817    
4818  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4819        /* UTF-8 mode */        if (utf)
       if (utf8)  
4820          {          {
4821          for (fi = min;; fi++)          for (fi = min;; fi++)
4822            {            {
# Line 4942  for (;;) Line 4979  for (;;)
4979          }          }
4980        else        else
4981  #endif  #endif
4982        /* Not UTF-8 mode */        /* Not UTF mode */
4983          {          {
4984          for (fi = min;; fi++)          for (fi = min;; fi++)
4985            {            {
# Line 5241  for (;;) Line 5278  for (;;)
5278            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5279            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5280            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5281            if (utf8) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5282            }            }
5283          }          }
5284    
# Line 5258  for (;;) Line 5295  for (;;)
5295              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5296              break;              break;
5297              }              }
5298            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5299            if (UCD_CATEGORY(c) == ucp_M) break;            if (UCD_CATEGORY(c) == ucp_M) break;
5300            eptr += len;            eptr += len;
5301            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5302              {              {
5303              len = 1;              len = 1;
5304              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5305              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5306              eptr += len;              eptr += len;
5307              }              }
# Line 5281  for (;;) Line 5318  for (;;)
5318            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5319            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5320              {              {
5321              if (!utf8) c = *eptr; else              if (!utf) c = *eptr; else
5322                {                {
5323                BACKCHAR(eptr);                BACKCHAR(eptr);
5324                GETCHAR(c, eptr);                GETCHAR(c, eptr);
# Line 5295  for (;;) Line 5332  for (;;)
5332        else        else
5333  #endif   /* SUPPORT_UCP */  #endif   /* SUPPORT_UCP */
5334    
5335  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5336        /* UTF-8 mode */        if (utf)
   
       if (utf8)  
5337          {          {
5338          switch(ctype)          switch(ctype)
5339            {            {
# Line 5314  for (;;) Line 5349  for (;;)
5349                  }                  }
5350                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5351                eptr++;                eptr++;
5352                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5353                }                }
5354              }              }
5355    
# Line 5331  for (;;) Line 5366  for (;;)
5366                  }                  }
5367                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5368                eptr++;                eptr++;
5369                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5370                }                }
5371              }              }
5372            break;            break;
# Line 5347  for (;;) Line 5382  for (;;)
5382                  break;                  break;
5383                  }                  }
5384                eptr++;                eptr++;
5385                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5386                }                }
5387              }              }
5388            else            else
# Line 5580  for (;;) Line 5615  for (;;)
5615            }            }
5616          }          }
5617        else        else
5618  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5619          /* Not UTF mode */
       /* Not UTF-8 mode */  
5620          {          {
5621          switch(ctype)          switch(ctype)
5622            {            {
# Line 5828  switch (frame->Xwhere) Line 5862  switch (frame->Xwhere)
5862    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5863    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5864    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
5865    LBL(65) LBL(66)    LBL(65) LBL(66)
5866  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5867    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5868    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5869  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5870    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5871    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62)
5872  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5873  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5874    default:    default:
5875    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5876    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 5925  Returns:          > 0 => success; value Line 5959  Returns:          > 0 => success; value
5959                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
5960  */  */
5961    
5962    #ifdef COMPILE_PCRE8
5963  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5964  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5965    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5966    int offsetcount)    int offsetcount)
5967    #else
5968    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5969    pcre16_exec(const pcre *argument_re, const pcre_extra *extra_data,
5970      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
5971      int offsetcount)
5972    #endif
5973  {  {
5974  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
 int first_byte = -1;  
 int req_byte = -1;  
 int req_byte2 = -1;  
5975  int newline;  int newline;
5976  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
5977  BOOL anchored;  BOOL anchored;
5978  BOOL startline;  BOOL startline;
5979  BOOL firstline;  BOOL firstline;
5980  BOOL first_byte_caseless = FALSE;  BOOL utf;
5981  BOOL req_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
5982  BOOL utf8;  BOOL has_req_char = FALSE;
5983    pcre_uchar first_char = 0;
5984    pcre_uchar first_char2 = 0;
5985    pcre_uchar req_char = 0;
5986    pcre_uchar req_char2 = 0;
5987  match_data match_block;  match_data match_block;
5988  match_data *md = &match_block;  match_data *md = &match_block;
5989  const uschar *tables;  const pcre_uint8 *tables;
5990  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
5991  USPTR start_match = (USPTR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
5992  USPTR end_subject;  PCRE_PUCHAR end_subject;
5993  USPTR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
5994  USPTR req_byte_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
5995    
5996  pcre_study_data internal_study;  pcre_study_data internal_study;
5997  const pcre_study_data *study;  const pcre_study_data *study;
# Line 5971  follows immediately afterwards. Other va Line 6013  follows immediately afterwards. Other va
6013  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
6014  so they are set up later. */  so they are set up later. */
6015    
6016  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6017    utf = md->utf = (re->options & PCRE_UTF8) != 0;
6018  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6019                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6020    
6021  /* Check a UTF-8 string if required. Pass back the character offset and error  /* Check a UTF-8 string if required. Pass back the character offset and error
6022  code for an invalid string if a results vector is available. */  code for an invalid string if a results vector is available. */
6023    
6024  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6025  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6026    {    {
6027    int erroroffset;    int erroroffset;
6028    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6029    if (errorcode != 0)    if (errorcode != 0)
6030      {      {
6031      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 5994  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6037  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6037        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6038      }      }
6039    
6040    /* Check that a start_offset points to the start of a UTF-8 character. */    /* Check that a start_offset points to the start of a UTF character. */
6041    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
6042        (((USPTR)subject)[start_offset] & 0xc0) == 0x80)        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6043      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
6044    }    }
6045  #endif  #endif
# Line 6011  matching. */ Line 6054  matching. */
6054  if (extra_data != NULL  if (extra_data != NULL
6055      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
6056      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
6057        && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
6058      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6059                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
6060    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,    return PRIV(jit_exec)(re, extra_data->executable_jit,
6061      start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)      (const pcre_uchar *)subject, length, start_offset, options,
6062        ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6063      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6064  #endif  #endif
6065    
6066  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
6067  numbers associated with a given name, for condition testing. */  numbers associated with a given name, for condition testing. */
6068    
6069  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (pcre_uchar *)re + re->name_table_offset;
6070  md->name_count = re->name_count;  md->name_count = re->name_count;
6071  md->name_entry_size = re->name_entry_size;  md->name_entry_size = re->name_entry_size;
6072    
# Line 6055  if (extra_data != NULL) Line 6100  if (extra_data != NULL)
6100  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6101  in other programs later. */  in other programs later. */
6102    
6103  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
6104    
6105  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
6106  test for a regex that was compiled on a host of opposite endianness. If this is  test for a regex that was compiled on a host of opposite endianness. If this is
# Line 6064  study data too. */ Line 6109  study data too. */
6109    
6110  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
6111    {    {
6112    re = _pcre_try_flipped(re, &internal_re, study, &internal_study);    re = PRIV(try_flipped)(re, &internal_re, study, &internal_study);
6113    if (re == NULL) return PCRE_ERROR_BADMAGIC;    if (re == NULL) return PCRE_ERROR_BADMAGIC;
6114    if (study != NULL) study = &internal_study;    if (study != NULL) study = &internal_study;
6115    }    }
6116    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6117    
6118  /* Set up other data */  /* Set up other data */
6119    
# Line 6077  firstline = (re->options & PCRE_FIRSTLIN Line 6123  firstline = (re->options & PCRE_FIRSTLIN
6123    
6124  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6125    
6126  md->start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)external_re + re->name_table_offset +
6127    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6128    
6129  md->start_subject = (USPTR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
6130  md->start_offset = start_offset;  md->start_offset = start_offset;
6131  md->end_subject = md->start_subject + length;  md->end_subject = md->start_subject + length;
6132  end_subject = md->end_subject;  end_subject = md->end_subject;
# Line 6104  md->recursive = NULL; Line 6150  md->recursive = NULL;
6150  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6151    
6152  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6153    md->fcc = tables + fcc_offset;
6154  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
6155    
6156  /* Handle different \R options. */  /* Handle different \R options. */
# Line 6217  if (md->offset_vector != NULL) Line 6264  if (md->offset_vector != NULL)
6264    md->offset_vector[0] = md->offset_vector[1] = -1;    md->offset_vector[0] = md->offset_vector[1] = -1;
6265    }    }
6266    
6267  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_char value is
6268  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
6269  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
6270  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
# Line 6227  if (!anchored) Line 6274  if (!anchored)
6274    {    {
6275    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6276      {      {
6277      first_byte = re->first_byte & 255;      has_first_char = TRUE;
6278      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = re->first_char;
6279        first_byte = md->lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6280          {
6281          first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6282    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6283          if (utf && first_char > 127)
6284            first_char2 = UCD_OTHERCASE(first_char);
6285    #endif
6286          }
6287      }      }
6288    else    else
6289      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 6242  character" set. */ Line 6296  character" set. */
6296    
6297  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6298    {    {
6299    req_byte = re->req_byte & 255;    has_req_char = TRUE;
6300    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = re->req_char;
6301    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6302        {
6303        req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6304    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6305        if (utf && req_char > 127)
6306          req_char2 = UCD_OTHERCASE(req_char);
6307    #endif
6308        }
6309    }    }
6310    
6311    
   
   
6312  /* ==========================================================================*/  /* ==========================================================================*/
6313    
6314  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6257  the loop runs just once. */ Line 6316  the loop runs just once. */
6316    
6317  for(;;)  for(;;)
6318    {    {
6319    USPTR save_end_subject = end_subject;    PCRE_PUCHAR save_end_subject = end_subject;
6320    USPTR new_start_match;    PCRE_PUCHAR new_start_match;
6321    
6322    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6323    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
# Line 6268  for(;;) Line 6327  for(;;)
6327    
6328    if (firstline)    if (firstline)
6329      {      {
6330      USPTR t = start_match;      PCRE_PUCHAR t = start_match;
6331  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6332      if (utf8)      if (utf)
6333        {        {
6334        while (t < md->end_subject && !IS_NEWLINE(t))        while (t < md->end_subject && !IS_NEWLINE(t))
6335          {          {
6336          t++;          t++;
6337          while (t < end_subject && (*t & 0xc0) == 0x80) t++;          ACROSSCHAR(t < end_subject, *t, t++);
6338          }          }
6339        }        }
6340      else      else
# Line 6292  for(;;) Line 6351  for(;;)
6351    
6352    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6353      {      {
6354      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first char if there is one. */
6355    
6356      if (first_byte >= 0)      if (has_first_char)
6357        {        {
6358        if (first_byte_caseless)        if (first_char != first_char2)
6359          while (start_match < end_subject && md->lcc[*start_match] != first_byte)          while (start_match < end_subject &&
6360                *start_match != first_char && *start_match != first_char2)
6361            start_match++;            start_match++;
6362        else        else
6363          while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && *start_match != first_char)
6364            start_match++;            start_match++;
6365        }        }
6366    
# Line 6310  for(;;) Line 6370  for(;;)
6370        {        {
6371        if (start_match > md->start_subject + start_offset)        if (start_match > md->start_subject + start_offset)
6372          {          {
6373  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6374          if (utf8)          if (utf)
6375            {            {
6376            while (start_match < end_subject && !WAS_NEWLINE(start_match))            while (start_match < end_subject && !WAS_NEWLINE(start_match))
6377              {              {
6378              start_match++;              start_match++;
6379              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              ACROSSCHAR(start_match < end_subject, *start_match,
6380                start_match++;                start_match++);
6381              }              }
6382            }            }
6383          else          else
# Line 6344  for(;;) Line 6404  for(;;)
6404        while (start_match < end_subject)        while (start_match < end_subject)
6405          {          {
6406          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6407    #ifndef COMPILE_PCRE8
6408            if (c > 255) c = 255;
6409    #endif
6410          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6411            {            {
6412            start_match++;            start_match++;
6413  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6414            if (utf8)            /* In non 8-bit mode, the iteration will stop for
6415              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)            characters > 255 at the beginning or not stop at all. */
6416                start_match++;            if (utf)
6417                ACROSSCHAR(start_match < end_subject, *start_match,
6418                  start_match++);
6419  #endif  #endif
6420            }            }
6421          else break;          else break;
# Line 6365  for(;;) Line 6430  for(;;)
6430    /* The following two optimizations are disabled for partial matching or if    /* The following two optimizations are disabled for partial matching or if
6431    disabling is explicitly requested. */    disabling is explicitly requested. */
6432    
6433    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6434      {      {
6435      /* If the pattern was studied, a minimum subject length may be set. This is      /* If the pattern was studied, a minimum subject length may be set. This is
6436      a lower bound; no actual string of that length may actually match the      a lower bound; no actual string of that length may actually match the
# Line 6379  for(;;) Line 6444  for(;;)
6444        break;        break;
6445        }        }
6446    
6447      /* If req_byte is set, we know that that character must appear in the      /* If req_char is set, we know that that character must appear in the
6448      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_char
6449      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
6450      This optimization can save a huge amount of backtracking in patterns with      This optimization can save a huge amount of backtracking in patterns with
6451      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
# Line 6393  for(;;) Line 6458  for(;;)
6458      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
6459      long. */      long. */
6460    
6461      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6462        {        {
6463        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);        register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6464    
6465        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
6466        place we found it at last time. */        place we found it at last time. */
6467    
6468        if (p > req_byte_ptr)        if (p > req_char_ptr)
6469          {          {
6470          if (req_byte_caseless)          if (req_char != req_char2)
6471            {            {
6472            while (p < end_subject)            while (p < end_subject)
6473              {              {
6474              register int pp = *p++;              register int pp = *p++;
6475              if (pp == req_byte || pp == req_byte2) { p--; break; }              if (pp == req_char || pp == req_char2) { p--; break; }
6476              }              }
6477            }            }
6478          else          else
6479            {            {
6480            while (p < end_subject)            while (p < end_subject)
6481              {              {
6482              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_char) { p--; break; }
6483              }              }
6484            }            }
6485    
# Line 6431  for(;;) Line 6496  for(;;)
6496          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
6497          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
6498    
6499          req_byte_ptr = p;          req_char_ptr = p;
6500          }          }
6501        }        }
6502      }      }
# Line 6479  for(;;) Line 6544  for(;;)
6544      case MATCH_PRUNE:      case MATCH_PRUNE:
6545      case MATCH_THEN:      case MATCH_THEN:
6546      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6547  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6548      if (utf8)      if (utf)
6549        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)        ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6550          new_start_match++;          new_start_match++);
6551  #endif  #endif
6552      break;      break;
6553    
# Line 6635  if (start_partial != NULL) Line 6700  if (start_partial != NULL)
6700    md->mark = NULL;    md->mark = NULL;
6701    if (offsetcount > 1)    if (offsetcount > 1)
6702      {      {
6703      offsets[0] = (int)(start_partial - (USPTR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
6704      offsets[1] = (int)(end_subject - (USPTR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
6705      }      }
6706    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6707    }    }

Legend:
Removed from v.723  
changed lines
  Added in v.795

  ViewVC Help
Powered by ViewVC 1.1.5