/[pcre]/code/branches/pcre16/pcre_exec.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_exec.c revision 723 by ph10, Sat Oct 8 15:55:23 2011 UTC code/branches/pcre16/pcre_exec.c revision 781 by zherczeg, Sat Dec 3 07:58:30 2011 UTC
# Line 121  Returns:     nothing Line 121  Returns:     nothing
121  */  */
122    
123  static void  static void
124  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
125  {  {
126  unsigned int c;  unsigned int c;
127  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 152  Returns:      < 0 if not matched, otherw Line 152  Returns:      < 0 if not matched, otherw
152  */  */
153    
154  static int  static int
155  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
156    BOOL caseless)    BOOL caseless)
157  {  {
158  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
159  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
160    
161  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
162  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 183  if (caseless) Line 183  if (caseless)
183    {    {
184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
185  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
186    if (md->utf8)    if (md->utf)
187      {      {
188      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
189      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
# Line 193  if (caseless) Line 193  if (caseless)
193      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
194      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
195    
196      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
197      while (p < endptr)      while (p < endptr)
198        {        {
199        int c, d;        int c, d;
# Line 354  typedef struct heapframe { Line 354  typedef struct heapframe {
354    
355    /* Function arguments that may change */    /* Function arguments that may change */
356    
357    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
358    const uschar *Xecode;    const pcre_uchar *Xecode;
359    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
360    USPTR Xmarkptr;    PCRE_PUCHAR Xmarkptr;
361    int Xoffset_top;    int Xoffset_top;
362    eptrblock *Xeptrb;    eptrblock *Xeptrb;
363    unsigned int Xrdepth;    unsigned int Xrdepth;
364    
365    /* Function local variables */    /* Function local variables */
366    
367    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
369    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
370  #endif  #endif
371    USPTR Xdata;    PCRE_PUCHAR Xdata;
372    USPTR Xnext;    PCRE_PUCHAR Xnext;
373    USPTR Xpp;    PCRE_PUCHAR Xpp;
374    USPTR Xprev;    PCRE_PUCHAR Xprev;
375    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
376    
377    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
378    
# Line 385  typedef struct heapframe { Line 385  typedef struct heapframe {
385    int Xprop_value;    int Xprop_value;
386    int Xprop_fail_result;    int Xprop_fail_result;
387    int Xoclength;    int Xoclength;
388    uschar Xocchars[8];    pcre_uchar Xocchars[6];
389  #endif  #endif
390    
391    int Xcodelink;    int Xcodelink;
# Line 450  the subject. */ Line 450  the subject. */
450    
451    
452  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
453  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
454  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
455  made performance worse.  made performance worse.
456    
# Line 474  Returns:       MATCH_MATCH if matched Line 474  Returns:       MATCH_MATCH if matched
474  */  */
475    
476  static int  static int
477  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
478    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, const pcre_uchar *markptr, int offset_top,
479    unsigned int rdepth)    match_data *md, eptrblock *eptrb, unsigned int rdepth)
480  {  {
481  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
482  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 485  so they can be ordinary variables in all Line 485  so they can be ordinary variables in all
485  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
486  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
487  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
488  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
489    
490  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
491  BOOL caseless;  BOOL caseless;
# Line 586  below are for variables that do not have Line 586  below are for variables that do not have
586  to RMATCH(). */  to RMATCH(). */
587    
588  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
589  const uschar *charptr;  const pcre_uchar *charptr;
590  #endif  #endif
591  const uschar *callpat;  const pcre_uchar *callpat;
592  const uschar *data;  const pcre_uchar *data;
593  const uschar *next;  const pcre_uchar *next;
594  USPTR         pp;  PCRE_PUCHAR       pp;
595  const uschar *prev;  const pcre_uchar *prev;
596  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
597    
598  recursion_info new_recursive;  recursion_info new_recursive;
599    
# Line 606  int prop_type; Line 606  int prop_type;
606  int prop_value;  int prop_value;
607  int prop_fail_result;  int prop_fail_result;
608  int oclength;  int oclength;
609  uschar occhars[8];  pcre_uchar occhars[6];
610  #endif  #endif
611    
612  int codelink;  int codelink;
# Line 660  complicated macro. It has to be used in Line 660  complicated macro. It has to be used in
660  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
661    
662  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
663  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
664  #else  #else
665  utf8 = FALSE;  utf = FALSE;
666  #endif  #endif
667    
668  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 702  for (;;) Line 702  for (;;)
702      {      {
703      case OP_MARK:      case OP_MARK:
704      markptr = ecode + 2;      markptr = ecode + 2;
705      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
706        eptrb, RM55);        eptrb, RM55);
707    
708      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
# Line 713  for (;;) Line 713  for (;;)
713      unaltered. */      unaltered. */
714    
715      if (rrc == MATCH_SKIP_ARG &&      if (rrc == MATCH_SKIP_ARG &&
716          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(markptr, md->start_match_ptr) == 0)
717        {        {
718        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
719        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
# Line 728  for (;;) Line 728  for (;;)
728      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
729    
730      case OP_COMMIT:      case OP_COMMIT:
731      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
732        eptrb, RM52);        eptrb, RM52);
733      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
734          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
# Line 739  for (;;) Line 739  for (;;)
739      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
740    
741      case OP_PRUNE:      case OP_PRUNE:
742      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
743        eptrb, RM51);        eptrb, RM51);
744      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
745      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
746    
747      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
748      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
749        eptrb, RM56);        eptrb, RM56);
750      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
751      md->mark = ecode + 2;      md->mark = ecode + 2;
# Line 754  for (;;) Line 754  for (;;)
754      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
755    
756      case OP_SKIP:      case OP_SKIP:
757      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
758        eptrb, RM53);        eptrb, RM53);
759      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
760        RRETURN(rrc);        RRETURN(rrc);
# Line 762  for (;;) Line 762  for (;;)
762      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
763    
764      case OP_SKIP_ARG:      case OP_SKIP_ARG:
765      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
766        eptrb, RM57);        eptrb, RM57);
767      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
768        RRETURN(rrc);        RRETURN(rrc);
# Line 780  for (;;) Line 780  for (;;)
780      match pointer to do this. */      match pointer to do this. */
781    
782      case OP_THEN:      case OP_THEN:
783      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
784        eptrb, RM54);        eptrb, RM54);
785      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
786      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
787      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
788    
789      case OP_THEN_ARG:      case OP_THEN_ARG:
790      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
791        md, eptrb, RM58);        md, eptrb, RM58);
792      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
793      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
794      md->mark = ecode + 2;      md->mark = ecode + 2;
795      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
796    
797      /* Handle an atomic group that does not contain any capturing parentheses.      /* Handle an atomic group that does not contain any capturing parentheses.
798      This can be handled like an assertion. Prior to 8.13, all atomic groups      This can be handled like an assertion. Prior to 8.13, all atomic groups
799      were handled this way. In 8.13, the code was changed as below for ONCE, so      were handled this way. In 8.13, the code was changed as below for ONCE, so
800      that backups pass through the group and thereby reset captured values.      that backups pass through the group and thereby reset captured values.
801      However, this uses a lot more stack, so in 8.20, atomic groups that do not      However, this uses a lot more stack, so in 8.20, atomic groups that do not
802      contain any captures generate OP_ONCE_NC, which can be handled in the old,      contain any captures generate OP_ONCE_NC, which can be handled in the old,
803      less stack intensive way.      less stack intensive way.
804    
805      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
# Line 816  for (;;) Line 816  for (;;)
816        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
817          {          {
818          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
819            markptr = md->mark;
820          break;          break;
821          }          }
822        if (rrc == MATCH_THEN)        if (rrc == MATCH_THEN)
823          {          {
824          next = ecode + GET(ecode,1);          next = ecode + GET(ecode,1);
825          if (md->start_match_ptr < next &&          if (md->start_match_ptr < next &&
826              (*ecode == OP_ALT || *next == OP_ALT))              (*ecode == OP_ALT || *next == OP_ALT))
827            rrc = MATCH_NOMATCH;            rrc = MATCH_NOMATCH;
828          }          }
829    
830        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
831        ecode += GET(ecode,1);        ecode += GET(ecode,1);
832        }        }
# Line 867  for (;;) Line 868  for (;;)
868        }        }
869      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
870        {        {
871        md->match_function_type = MATCH_CBEGROUP;        md->match_function_type = MATCH_CBEGROUP;
872        RMATCH(eptr, prev, offset_top, md, eptrb, RM66);        RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
873        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
874        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
# Line 915  for (;;) Line 916  for (;;)
916        for (;;)        for (;;)
917          {          {
918          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
919          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
920            eptrb, RM1);            eptrb, RM1);
921          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
922    
923          /* If we backed up to a THEN, check whether it is within the current          /* If we backed up to a THEN, check whether it is within the current
924          branch by comparing the address of the THEN that is passed back with          branch by comparing the address of the THEN that is passed back with
925          the end of the branch. If it is within the current branch, and the          the end of the branch. If it is within the current branch, and the
926          branch is one of two or more alternatives (it either starts or ends          branch is one of two or more alternatives (it either starts or ends
927          with OP_ALT), we have reached the limit of THEN's action, so convert          with OP_ALT), we have reached the limit of THEN's action, so convert
928          the return code to NOMATCH, which will cause normal backtracking to          the return code to NOMATCH, which will cause normal backtracking to
929          happen from now on. Otherwise, THEN is passed back to an outer          happen from now on. Otherwise, THEN is passed back to an outer
930          alternative. This implements Perl's treatment of parenthesized groups,          alternative. This implements Perl's treatment of parenthesized groups,
931          where a group not containing | does not affect the current alternative,          where a group not containing | does not affect the current alternative,
932          that is, (X) is NOT the same as (X|(*F)). */          that is, (X) is NOT the same as (X|(*F)). */
933    
934          if (rrc == MATCH_THEN)          if (rrc == MATCH_THEN)
935            {            {
936            next = ecode + GET(ecode,1);            next = ecode + GET(ecode,1);
937            if (md->start_match_ptr < next &&            if (md->start_match_ptr < next &&
938                (*ecode == OP_ALT || *next == OP_ALT))                (*ecode == OP_ALT || *next == OP_ALT))
939              rrc = MATCH_NOMATCH;              rrc = MATCH_NOMATCH;
940            }            }
941    
942          /* Anything other than NOMATCH is passed back. */          /* Anything other than NOMATCH is passed back. */
943    
944          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 1003  for (;;) Line 1004  for (;;)
1004    
1005        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1006          {          {
1007          ecode += _pcre_OP_lengths[*ecode];          ecode += PRIV(OP_lengths)[*ecode];
1008          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1009          }          }
1010    
1011        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1012    
1013        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1014          RM2);          RM2);
1015    
1016        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1017        THEN. */        THEN. */
1018    
1019        if (rrc == MATCH_THEN)        if (rrc == MATCH_THEN)
1020          {          {
1021          next = ecode + GET(ecode,1);          next = ecode + GET(ecode,1);
1022          if (md->start_match_ptr < next &&          if (md->start_match_ptr < next &&
1023              (*ecode == OP_ALT || *next == OP_ALT))              (*ecode == OP_ALT || *next == OP_ALT))
1024            rrc = MATCH_NOMATCH;            rrc = MATCH_NOMATCH;
1025          }          }
1026    
1027        if (rrc != MATCH_NOMATCH)        if (rrc != MATCH_NOMATCH)
1028          {          {
1029          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1030            {            {
1031            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1032            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1033              {              {
1034              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 1040  for (;;) Line 1041  for (;;)
1041        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1042        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1043        }        }
1044    
1045      if (md->mark == NULL) md->mark = markptr;      if (md->mark == NULL) md->mark = markptr;
1046      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
1047    
# Line 1093  for (;;) Line 1094  for (;;)
1094          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1095            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1096          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1097          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1098            eptrb, RM63);            eptrb, RM63);
1099          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1100            {            {
# Line 1104  for (;;) Line 1105  for (;;)
1105            matched_once = TRUE;            matched_once = TRUE;
1106            continue;            continue;
1107            }            }
1108    
1109          /* See comment in the code for capturing groups above about handling          /* See comment in the code for capturing groups above about handling
1110          THEN. */          THEN. */
1111    
1112          if (rrc == MATCH_THEN)          if (rrc == MATCH_THEN)
1113            {            {
1114            next = ecode + GET(ecode,1);            next = ecode + GET(ecode,1);
1115            if (md->start_match_ptr < next &&            if (md->start_match_ptr < next &&
1116                (*ecode == OP_ALT || *next == OP_ALT))                (*ecode == OP_ALT || *next == OP_ALT))
1117              rrc = MATCH_NOMATCH;              rrc = MATCH_NOMATCH;
1118            }            }
1119    
1120          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1121          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
# Line 1166  for (;;) Line 1167  for (;;)
1167      for (;;)      for (;;)
1168        {        {
1169        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1170        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1171          eptrb, RM48);          eptrb, RM48);
1172        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1173          {          {
# Line 1176  for (;;) Line 1177  for (;;)
1177          matched_once = TRUE;          matched_once = TRUE;
1178          continue;          continue;
1179          }          }
1180    
1181        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1182        THEN. */        THEN. */
1183    
1184        if (rrc == MATCH_THEN)        if (rrc == MATCH_THEN)
1185          {          {
1186          next = ecode + GET(ecode,1);          next = ecode + GET(ecode,1);
1187          if (md->start_match_ptr < next &&          if (md->start_match_ptr < next &&
1188              (*ecode == OP_ALT || *next == OP_ALT))              (*ecode == OP_ALT || *next == OP_ALT))
1189            rrc = MATCH_NOMATCH;            rrc = MATCH_NOMATCH;
1190          }          }
1191    
1192        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1193        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 1231  for (;;) Line 1232  for (;;)
1232          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1233          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1234          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1235          cb.mark             = markptr;          cb.mark             = (unsigned char *)markptr;
1236          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);          if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1237          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1238          }          }
1239        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1240        }        }
1241    
1242      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1252  for (;;) Line 1253  for (;;)
1253        else        else
1254          {          {
1255          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1256          condition =  (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1257    
1258          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
1259          false, but the test was set up by name, scan the table to see if the          false, but the test was set up by name, scan the table to see if the
1260          name refers to any other numbers, and test them. The condition is true          name refers to any other numbers, and test them. The condition is true
1261          if any one is set. */          if any one is set. */
1262    
1263          if (!condition && condcode == OP_NRREF && recno != RREF_ANY)          if (!condition && condcode == OP_NRREF)
1264            {            {
1265            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1266            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1267              {              {
1268              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1274  for (;;) Line 1275  for (;;)
1275    
1276            if (i < md->name_count)            if (i < md->name_count)
1277              {              {
1278              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1279              while (slotB > md->name_table)              while (slotB > md->name_table)
1280                {                {
1281                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1282                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1283                  {                  {
1284                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1285                  if (condition) break;                  if (condition) break;
# Line 1294  for (;;) Line 1295  for (;;)
1295                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1296                  {                  {
1297                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1298                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1299                    {                    {
1300                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1301                    if (condition) break;                    if (condition) break;
# Line 1307  for (;;) Line 1308  for (;;)
1308    
1309          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1310    
1311          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1312          }          }
1313        }        }
1314    
# Line 1324  for (;;) Line 1325  for (;;)
1325        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1326          {          {
1327          int refno = offset >> 1;          int refno = offset >> 1;
1328          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1329    
1330          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1331            {            {
# Line 1338  for (;;) Line 1339  for (;;)
1339    
1340          if (i < md->name_count)          if (i < md->name_count)
1341            {            {
1342            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1343            while (slotB > md->name_table)            while (slotB > md->name_table)
1344              {              {
1345              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1346              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1347                {                {
1348                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1349                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1360  for (;;) Line 1361  for (;;)
1361              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1362                {                {
1363                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1364                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1365                  {                  {
1366                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1367                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1375  for (;;) Line 1376  for (;;)
1376    
1377        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1378    
1379        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1380        }        }
1381    
1382      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1400  for (;;) Line 1401  for (;;)
1401          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1402          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1403          }          }
1404    
1405        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1406        assertion; it is therefore treated as NOMATCH. */        assertion; it is therefore treated as NOMATCH. */
1407    
1408        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1409          {          {
1410          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1411          }          }
# Line 1432  for (;;) Line 1433  for (;;)
1433          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
1434          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1435          }          }
1436    
1437        md->match_function_type = MATCH_CBEGROUP;        md->match_function_type = MATCH_CBEGROUP;
1438        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1439        RRETURN(rrc);        RRETURN(rrc);
# Line 1467  for (;;) Line 1468  for (;;)
1468        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1469        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1470        }        }
1471      ecode += 3;      ecode += 1 + IMM2_SIZE;
1472      break;      break;
1473    
1474    
# Line 1530  for (;;) Line 1531  for (;;)
1531          markptr = md->mark;          markptr = md->mark;
1532          break;          break;
1533          }          }
1534    
1535        /* PCRE does not allow THEN to escape beyond an assertion; it is treated        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1536        as NOMATCH. */        as NOMATCH. */
1537    
1538        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1539        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1540        }        }
# Line 1576  for (;;) Line 1577  for (;;)
1577          break;          break;
1578          }          }
1579    
1580        /* PCRE does not allow THEN to escape beyond an assertion; it is treated        /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1581        as NOMATCH. */        as NOMATCH. */
1582    
1583        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
# Line 1596  for (;;) Line 1597  for (;;)
1597    
1598      case OP_REVERSE:      case OP_REVERSE:
1599  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1600      if (utf8)      if (utf)
1601        {        {
1602        i = GET(ecode, 1);        i = GET(ecode, 1);
1603        while (i-- > 0)        while (i-- > 0)
# Line 1642  for (;;) Line 1643  for (;;)
1643        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1644        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1645        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1646        cb.mark             = markptr;        cb.mark             = (unsigned char *)markptr;
1647        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1648        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1649        }        }
# Line 1717  for (;;) Line 1718  for (;;)
1718        do        do
1719          {          {
1720          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1721          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1722            md, eptrb, RM6);            md, eptrb, RM6);
1723          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1724              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
# Line 1740  for (;;) Line 1741  for (;;)
1741          /* PCRE does not allow THEN to escape beyond a recursion; it is treated          /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1742          as NOMATCH. */          as NOMATCH. */
1743    
1744          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1745            {            {
1746            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1747            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1826  for (;;) Line 1827  for (;;)
1827        }        }
1828      else saved_eptr = NULL;      else saved_eptr = NULL;
1829    
1830      /* If we are at the end of an assertion group or a non-capturing atomic      /* If we are at the end of an assertion group or a non-capturing atomic
1831      group, stop matching and return MATCH_MATCH, but record the current high      group, stop matching and return MATCH_MATCH, but record the current high
1832      water mark for use by positive assertions. We also need to record the match      water mark for use by positive assertions. We also need to record the match
1833      start in case it was changed by \K. */      start in case it was changed by \K. */
1834    
1835      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||      if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1836           *prev == OP_ONCE_NC)           *prev == OP_ONCE_NC)
1837        {        {
1838        md->end_match_ptr = eptr;      /* For ONCE_NC */        md->end_match_ptr = eptr;      /* For ONCE_NC */
1839        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
# Line 2069  for (;;) Line 2070  for (;;)
2070        partial matching. */        partial matching. */
2071    
2072  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2073        if (utf8)        if (utf)
2074          {          {
2075          /* Get status of previous character */          /* Get status of previous character */
2076    
2077          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2078            {            {
2079            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2080            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
2081            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2082            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
# Line 2188  for (;;) Line 2189  for (;;)
2189        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2190        }        }
2191      eptr++;      eptr++;
2192      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2193      ecode++;      ecode++;
2194      break;      break;
2195    
# Line 2477  for (;;) Line 2478  for (;;)
2478          break;          break;
2479    
2480          case PT_GC:          case PT_GC:
2481          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2482            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2483          break;          break;
2484    
# Line 2494  for (;;) Line 2495  for (;;)
2495          /* These are specials */          /* These are specials */
2496    
2497          case PT_ALNUM:          case PT_ALNUM:
2498          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2499               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2500            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2501          break;          break;
2502    
2503          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2504          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2505               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2506                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2507            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2508          break;          break;
2509    
2510          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2511          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2512               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2513               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2514                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
# Line 2515  for (;;) Line 2516  for (;;)
2516          break;          break;
2517    
2518          case PT_WORD:          case PT_WORD:
2519          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2520               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2521               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2522            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2523          break;          break;
# Line 2545  for (;;) Line 2546  for (;;)
2546      while (eptr < md->end_subject)      while (eptr < md->end_subject)
2547        {        {
2548        int len = 1;        int len = 1;
2549        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2550        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2551        eptr += len;        eptr += len;
2552        }        }
# Line 2566  for (;;) Line 2567  for (;;)
2567      case OP_REFI:      case OP_REFI:
2568      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2569      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2570      ecode += 3;      ecode += 1 + IMM2_SIZE;
2571    
2572      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2573    
# Line 2606  for (;;) Line 2607  for (;;)
2607        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2608        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2609        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2610        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2611        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2612        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2613        break;        break;
2614    
2615        default:               /* No repeat follows */        default:               /* No repeat follows */
# Line 2705  for (;;) Line 2706  for (;;)
2706      case OP_NCLASS:      case OP_NCLASS:
2707      case OP_CLASS:      case OP_CLASS:
2708        {        {
2709          /* The data variable is saved across frames, so the byte map needs to
2710          be stored there. */
2711    #define BYTE_MAP ((pcre_uint8 *)data)
2712        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2713        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2714    
2715        switch (*ecode)        switch (*ecode)
2716          {          {
# Line 2727  for (;;) Line 2731  for (;;)
2731          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2732          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2733          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2734          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2735          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2736          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2737          break;          break;
2738    
2739          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2739  for (;;) Line 2743  for (;;)
2743    
2744        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2745    
2746  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2747        /* UTF-8 mode */        if (utf)
       if (utf8)  
2748          {          {
2749          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2750            {            {
# Line 2756  for (;;) Line 2759  for (;;)
2759              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2760              }              }
2761            else            else
2762              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
             }  
2763            }            }
2764          }          }
2765        else        else
2766  #endif  #endif
2767        /* Not UTF-8 mode */        /* Not UTF mode */
2768          {          {
2769          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2770            {            {
# Line 2773  for (;;) Line 2774  for (;;)
2774              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2775              }              }
2776            c = *eptr++;            c = *eptr++;
2777            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2778              if (c > 255)
2779                {
2780                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2781                }
2782              else
2783    #endif
2784                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2785            }            }
2786          }          }
2787    
# Line 2787  for (;;) Line 2795  for (;;)
2795    
2796        if (minimize)        if (minimize)
2797          {          {
2798  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2799          /* UTF-8 mode */          if (utf)
         if (utf8)  
2800            {            {
2801            for (fi = min;; fi++)            for (fi = min;; fi++)
2802              {              {
# Line 2807  for (;;) Line 2814  for (;;)
2814                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2815                }                }
2816              else              else
2817                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  
               }  
2818              }              }
2819            }            }
2820          else          else
2821  #endif  #endif
2822          /* Not UTF-8 mode */          /* Not UTF mode */
2823            {            {
2824            for (fi = min;; fi++)            for (fi = min;; fi++)
2825              {              {
# Line 2827  for (;;) Line 2832  for (;;)
2832                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
2833                }                }
2834              c = *eptr++;              c = *eptr++;
2835              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2836                if (c > 255)
2837                  {
2838                  if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2839                  }
2840                else
2841    #endif
2842                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2843              }              }
2844            }            }
2845          /* Control never gets here */          /* Control never gets here */
# Line 2839  for (;;) Line 2851  for (;;)
2851          {          {
2852          pp = eptr;          pp = eptr;
2853    
2854  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2855          /* UTF-8 mode */          if (utf)
         if (utf8)  
2856            {            {
2857            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2858              {              {
# Line 2857  for (;;) Line 2868  for (;;)
2868                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2869                }                }
2870              else              else
2871                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2872              eptr += len;              eptr += len;
2873              }              }
2874            for (;;)            for (;;)
# Line 2872  for (;;) Line 2881  for (;;)
2881            }            }
2882          else          else
2883  #endif  #endif
2884            /* Not UTF-8 mode */            /* Not UTF mode */
2885            {            {
2886            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2887              {              {
# Line 2882  for (;;) Line 2891  for (;;)
2891                break;                break;
2892                }                }
2893              c = *eptr;              c = *eptr;
2894              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2895                if (c > 255)
2896                  {
2897                  if (op == OP_CLASS) break;
2898                  }
2899                else
2900    #endif
2901                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2902              eptr++;              eptr++;
2903              }              }
2904            while (eptr >= pp)            while (eptr >= pp)
# Line 2895  for (;;) Line 2911  for (;;)
2911    
2912          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2913          }          }
2914    #undef BYTE_MAP
2915        }        }
2916      /* Control never gets here */      /* Control never gets here */
2917    
# Line 2903  for (;;) Line 2920  for (;;)
2920      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2921      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
2922    
2923  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2924      case OP_XCLASS:      case OP_XCLASS:
2925        {        {
2926        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2928  for (;;) Line 2945  for (;;)
2945          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2946          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2947          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2948          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2949          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2950          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2951          break;          break;
2952    
2953          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2948  for (;;) Line 2965  for (;;)
2965            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2966            }            }
2967          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
2968          if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data)) MRRETURN(MATCH_NOMATCH);
2969          }          }
2970    
2971        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2972  for (;;) Line 2989  for (;;)
2989              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2990              }              }
2991            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2992            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data)) MRRETURN(MATCH_NOMATCH);
2993            }            }
2994          /* Control never gets here */          /* Control never gets here */
2995          }          }
# Line 2990  for (;;) Line 3007  for (;;)
3007              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3008              break;              break;
3009              }              }
3010    #ifdef SUPPORT_UTF
3011            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3012            if (!_pcre_xclass(c, data)) break;  #else
3013              c = *eptr;
3014    #endif
3015              if (!PRIV(xclass)(c, data)) break;
3016            eptr += len;            eptr += len;
3017            }            }
3018          for(;;)          for(;;)
# Line 2999  for (;;) Line 3020  for (;;)
3020            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3021            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3022            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3023            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3024              if (utf) BACKCHAR(eptr);
3025    #endif
3026            }            }
3027          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3028          }          }
# Line 3012  for (;;) Line 3035  for (;;)
3035    
3036      case OP_CHAR:      case OP_CHAR:
3037  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3038      if (utf8)      if (utf)
3039        {        {
3040        length = 1;        length = 1;
3041        ecode++;        ecode++;
# Line 3026  for (;;) Line 3049  for (;;)
3049        }        }
3050      else      else
3051  #endif  #endif
3052        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3053        {        {
3054        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3055          {          {
# Line 3043  for (;;) Line 3065  for (;;)
3065    
3066      case OP_CHARI:      case OP_CHARI:
3067  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3068      if (utf8)      if (utf)
3069        {        {
3070        length = 1;        length = 1;
3071        ecode++;        ecode++;
# Line 3086  for (;;) Line 3108  for (;;)
3108      else      else
3109  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF8 */
3110    
3111      /* Non-UTF-8 mode */      /* Not UTF mode */
3112        {        {
3113        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3114          {          {
# Line 3103  for (;;) Line 3125  for (;;)
3125      case OP_EXACT:      case OP_EXACT:
3126      case OP_EXACTI:      case OP_EXACTI:
3127      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3128      ecode += 3;      ecode += 1 + IMM2_SIZE;
3129      goto REPEATCHAR;      goto REPEATCHAR;
3130    
3131      case OP_POSUPTO:      case OP_POSUPTO:
# Line 3118  for (;;) Line 3140  for (;;)
3140      min = 0;      min = 0;
3141      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3142      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3143      ecode += 3;      ecode += 1 + IMM2_SIZE;
3144      goto REPEATCHAR;      goto REPEATCHAR;
3145    
3146      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3167  for (;;) Line 3189  for (;;)
3189    
3190      REPEATCHAR:      REPEATCHAR:
3191  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3192      if (utf8)      if (utf)
3193        {        {
3194        length = 1;        length = 1;
3195        charptr = ecode;        charptr = ecode;
# Line 3183  for (;;) Line 3205  for (;;)
3205          unsigned int othercase;          unsigned int othercase;
3206          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3207              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3208            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3209          else oclength = 0;          else oclength = 0;
3210  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3211    
# Line 3194  for (;;) Line 3216  for (;;)
3216  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3217            else if (oclength > 0 &&            else if (oclength > 0 &&
3218                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3219                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3220  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3221            else            else
3222              {              {
# Line 3217  for (;;) Line 3239  for (;;)
3239  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3240              else if (oclength > 0 &&              else if (oclength > 0 &&
3241                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3242                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3243  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3244              else              else
3245                {                {
# Line 3238  for (;;) Line 3260  for (;;)
3260  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3261              else if (oclength > 0 &&              else if (oclength > 0 &&
3262                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3263                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3264  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3265              else              else
3266                {                {
# Line 3438  for (;;) Line 3460  for (;;)
3460      case OP_NOTEXACT:      case OP_NOTEXACT:
3461      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3462      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3463      ecode += 3;      ecode += 1 + IMM2_SIZE;
3464      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3465    
3466      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3448  for (;;) Line 3470  for (;;)
3470      min = 0;      min = 0;
3471      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3472      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3473      ecode += 3;      ecode += 1 + IMM2_SIZE;
3474      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3475    
3476      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3480  for (;;) Line 3502  for (;;)
3502      possessive = TRUE;      possessive = TRUE;
3503      min = 0;      min = 0;
3504      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3505      ecode += 3;      ecode += 1 + IMM2_SIZE;
3506      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3507    
3508      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3522  for (;;) Line 3544  for (;;)
3544        fc = md->lcc[fc];        fc = md->lcc[fc];
3545    
3546  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3547        /* UTF-8 mode */        if (utf)
       if (utf8)  
3548          {          {
3549          register unsigned int d;          register unsigned int d;
3550          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3540  for (;;) Line 3561  for (;;)
3561          }          }
3562        else        else
3563  #endif  #endif
3564          /* Not UTF mode */
       /* Not UTF-8 mode */  
3565          {          {
3566          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3567            {            {
# Line 3559  for (;;) Line 3579  for (;;)
3579        if (minimize)        if (minimize)
3580          {          {
3581  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3582          /* UTF-8 mode */          if (utf)
         if (utf8)  
3583            {            {
3584            register unsigned int d;            register unsigned int d;
3585            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3580  for (;;) Line 3599  for (;;)
3599            }            }
3600          else          else
3601  #endif  #endif
3602          /* Not UTF-8 mode */          /* Not UTF mode */
3603            {            {
3604            for (fi = min;; fi++)            for (fi = min;; fi++)
3605              {              {
# Line 3605  for (;;) Line 3624  for (;;)
3624          pp = eptr;          pp = eptr;
3625    
3626  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3627          /* UTF-8 mode */          if (utf)
         if (utf8)  
3628            {            {
3629            register unsigned int d;            register unsigned int d;
3630            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3633  for (;;) Line 3651  for (;;)
3651            }            }
3652          else          else
3653  #endif  #endif
3654          /* Not UTF-8 mode */          /* Not UTF mode */
3655            {            {
3656            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3657              {              {
# Line 3664  for (;;) Line 3682  for (;;)
3682      else      else
3683        {        {
3684  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3685        /* UTF-8 mode */        if (utf)
       if (utf8)  
3686          {          {
3687          register unsigned int d;          register unsigned int d;
3688          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3681  for (;;) Line 3698  for (;;)
3698          }          }
3699        else        else
3700  #endif  #endif
3701        /* Not UTF-8 mode */        /* Not UTF mode */
3702          {          {
3703          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3704            {            {
# Line 3699  for (;;) Line 3716  for (;;)
3716        if (minimize)        if (minimize)
3717          {          {
3718  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3719          /* UTF-8 mode */          if (utf)
         if (utf8)  
3720            {            {
3721            register unsigned int d;            register unsigned int d;
3722            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3719  for (;;) Line 3735  for (;;)
3735            }            }
3736          else          else
3737  #endif  #endif
3738          /* Not UTF-8 mode */          /* Not UTF mode */
3739            {            {
3740            for (fi = min;; fi++)            for (fi = min;; fi++)
3741              {              {
# Line 3744  for (;;) Line 3760  for (;;)
3760          pp = eptr;          pp = eptr;
3761    
3762  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3763          /* UTF-8 mode */          if (utf)
         if (utf8)  
3764            {            {
3765            register unsigned int d;            register unsigned int d;
3766            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3771  for (;;) Line 3786  for (;;)
3786            }            }
3787          else          else
3788  #endif  #endif
3789          /* Not UTF-8 mode */          /* Not UTF mode */
3790            {            {
3791            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3792              {              {
# Line 3804  for (;;) Line 3819  for (;;)
3819      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3820      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3821      minimize = TRUE;      minimize = TRUE;
3822      ecode += 3;      ecode += 1 + IMM2_SIZE;
3823      goto REPEATTYPE;      goto REPEATTYPE;
3824    
3825      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3812  for (;;) Line 3827  for (;;)
3827      min = 0;      min = 0;
3828      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3829      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3830      ecode += 3;      ecode += 1 + IMM2_SIZE;
3831      goto REPEATTYPE;      goto REPEATTYPE;
3832    
3833      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3840  for (;;) Line 3855  for (;;)
3855      possessive = TRUE;      possessive = TRUE;
3856      min = 0;      min = 0;
3857      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3858      ecode += 3;      ecode += 1 + IMM2_SIZE;
3859      goto REPEATTYPE;      goto REPEATTYPE;
3860    
3861      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 4047  for (;;) Line 4062  for (;;)
4062            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4063              {              {
4064              int len = 1;              int len = 1;
4065              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4066              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4067              eptr += len;              eptr += len;
4068              }              }
# Line 4060  for (;;) Line 4075  for (;;)
4075  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4076    
4077  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4078        if (utf8) switch(ctype)        if (utf) switch(ctype)
4079          {          {
4080          case OP_ANY:          case OP_ANY:
4081          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 4768  for (;;) Line 4783  for (;;)
4783            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4784              {              {
4785              int len = 1;              int len = 1;
4786              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4787              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4788              eptr += len;              eptr += len;
4789              }              }
# Line 4778  for (;;) Line 4793  for (;;)
4793  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
4794    
4795  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4796        /* UTF-8 mode */        if (utf)
       if (utf8)  
4797          {          {
4798          for (fi = min;; fi++)          for (fi = min;; fi++)
4799            {            {
# Line 4942  for (;;) Line 4956  for (;;)
4956          }          }
4957        else        else
4958  #endif  #endif
4959        /* Not UTF-8 mode */        /* Not UTF mode */
4960          {          {
4961          for (fi = min;; fi++)          for (fi = min;; fi++)
4962            {            {
# Line 5241  for (;;) Line 5255  for (;;)
5255            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5256            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5257            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5258            if (utf8) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5259            }            }
5260          }          }
5261    
# Line 5258  for (;;) Line 5272  for (;;)
5272              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5273              break;              break;
5274              }              }
5275            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5276            if (UCD_CATEGORY(c) == ucp_M) break;            if (UCD_CATEGORY(c) == ucp_M) break;
5277            eptr += len;            eptr += len;
5278            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5279              {              {
5280              len = 1;              len = 1;
5281              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5282              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5283              eptr += len;              eptr += len;
5284              }              }
# Line 5281  for (;;) Line 5295  for (;;)
5295            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5296            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5297              {              {
5298              if (!utf8) c = *eptr; else              if (!utf) c = *eptr; else
5299                {                {
5300                BACKCHAR(eptr);                BACKCHAR(eptr);
5301                GETCHAR(c, eptr);                GETCHAR(c, eptr);
# Line 5296  for (;;) Line 5310  for (;;)
5310  #endif   /* SUPPORT_UCP */  #endif   /* SUPPORT_UCP */
5311    
5312  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5313        /* UTF-8 mode */        if (utf)
   
       if (utf8)  
5314          {          {
5315          switch(ctype)          switch(ctype)
5316            {            {
# Line 5581  for (;;) Line 5593  for (;;)
5593          }          }
5594        else        else
5595  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5596          /* Not UTF mode */
       /* Not UTF-8 mode */  
5597          {          {
5598          switch(ctype)          switch(ctype)
5599            {            {
# Line 5828  switch (frame->Xwhere) Line 5839  switch (frame->Xwhere)
5839    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5840    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5841    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
5842    LBL(65) LBL(66)    LBL(65) LBL(66)
5843  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5844    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5845    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
# Line 5925  Returns:          > 0 => success; value Line 5936  Returns:          > 0 => success; value
5936                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
5937  */  */
5938    
5939    #ifdef COMPILE_PCRE8
5940  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5941  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5942    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5943    int offsetcount)    int offsetcount)
5944    #else
5945    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5946    pcre16_exec(const pcre *argument_re, const pcre_extra *extra_data,
5947      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
5948      int offsetcount)
5949    #endif
5950  {  {
5951  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
 int first_byte = -1;  
 int req_byte = -1;  
 int req_byte2 = -1;  
5952  int newline;  int newline;
5953  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
5954  BOOL anchored;  BOOL anchored;
5955  BOOL startline;  BOOL startline;
5956  BOOL firstline;  BOOL firstline;
5957  BOOL first_byte_caseless = FALSE;  BOOL utf;
5958  BOOL req_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
5959  BOOL utf8;  BOOL has_req_char = FALSE;
5960    pcre_uchar first_char = 0;
5961    pcre_uchar first_char2 = 0;
5962    pcre_uchar req_char = 0;
5963    pcre_uchar req_char2 = 0;
5964  match_data match_block;  match_data match_block;
5965  match_data *md = &match_block;  match_data *md = &match_block;
5966  const uschar *tables;  const pcre_uint8 *tables;
5967  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
5968  USPTR start_match = (USPTR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
5969  USPTR end_subject;  PCRE_PUCHAR end_subject;
5970  USPTR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
5971  USPTR req_byte_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
5972    
5973  pcre_study_data internal_study;  pcre_study_data internal_study;
5974  const pcre_study_data *study;  const pcre_study_data *study;
# Line 5971  follows immediately afterwards. Other va Line 5990  follows immediately afterwards. Other va
5990  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
5991  so they are set up later. */  so they are set up later. */
5992    
5993  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
5994    utf = md->utf = (re->options & PCRE_UTF8) != 0;
5995  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5996                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5997    
# Line 5979  md->partial = ((options & PCRE_PARTIAL_H Line 5999  md->partial = ((options & PCRE_PARTIAL_H
5999  code for an invalid string if a results vector is available. */  code for an invalid string if a results vector is available. */
6000    
6001  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6002  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6003    {    {
6004    int erroroffset;    int erroroffset;
6005    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6006    if (errorcode != 0)    if (errorcode != 0)
6007      {      {
6008      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 5996  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6016  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6016    
6017    /* Check that a start_offset points to the start of a UTF-8 character. */    /* Check that a start_offset points to the start of a UTF-8 character. */
6018    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
6019        (((USPTR)subject)[start_offset] & 0xc0) == 0x80)        (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
6020      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
6021    }    }
6022  #endif  #endif
# Line 6011  matching. */ Line 6031  matching. */
6031  if (extra_data != NULL  if (extra_data != NULL
6032      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
6033      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
6034        && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
6035      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6036                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
6037    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,    return PRIV(jit_exec)(re, extra_data->executable_jit,
6038      start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)      (const pcre_uchar *)subject, length, start_offset, options,
6039        ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6040      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6041  #endif  #endif
6042    
6043  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
6044  numbers associated with a given name, for condition testing. */  numbers associated with a given name, for condition testing. */
6045    
6046  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (pcre_uchar *)re + re->name_table_offset;
6047  md->name_count = re->name_count;  md->name_count = re->name_count;
6048  md->name_entry_size = re->name_entry_size;  md->name_entry_size = re->name_entry_size;
6049    
# Line 6055  if (extra_data != NULL) Line 6077  if (extra_data != NULL)
6077  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6078  in other programs later. */  in other programs later. */
6079    
6080  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
6081    
6082  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
6083  test for a regex that was compiled on a host of opposite endianness. If this is  test for a regex that was compiled on a host of opposite endianness. If this is
# Line 6064  study data too. */ Line 6086  study data too. */
6086    
6087  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
6088    {    {
6089    re = _pcre_try_flipped(re, &internal_re, study, &internal_study);    re = PRIV(try_flipped)(re, &internal_re, study, &internal_study);
6090    if (re == NULL) return PCRE_ERROR_BADMAGIC;    if (re == NULL) return PCRE_ERROR_BADMAGIC;
6091    if (study != NULL) study = &internal_study;    if (study != NULL) study = &internal_study;
6092    }    }
# Line 6077  firstline = (re->options & PCRE_FIRSTLIN Line 6099  firstline = (re->options & PCRE_FIRSTLIN
6099    
6100  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6101    
6102  md->start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)external_re + re->name_table_offset +
6103    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6104    
6105  md->start_subject = (USPTR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
6106  md->start_offset = start_offset;  md->start_offset = start_offset;
6107  md->end_subject = md->start_subject + length;  md->end_subject = md->start_subject + length;
6108  end_subject = md->end_subject;  end_subject = md->end_subject;
# Line 6217  if (md->offset_vector != NULL) Line 6239  if (md->offset_vector != NULL)
6239    md->offset_vector[0] = md->offset_vector[1] = -1;    md->offset_vector[0] = md->offset_vector[1] = -1;
6240    }    }
6241    
6242  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_char value is
6243  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
6244  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
6245  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
# Line 6227  if (!anchored) Line 6249  if (!anchored)
6249    {    {
6250    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6251      {      {
6252      first_byte = re->first_byte & 255;      has_first_char = TRUE;
6253      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = re->first_char;
6254        first_byte = md->lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6255          first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char);
6256      }      }
6257    else    else
6258      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 6242  character" set. */ Line 6265  character" set. */
6265    
6266  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6267    {    {
6268    req_byte = re->req_byte & 255;    has_req_char = TRUE;
6269    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = re->req_char;
6270    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6271        req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char);
6272    }    }
6273    
6274    
   
   
6275  /* ==========================================================================*/  /* ==========================================================================*/
6276    
6277  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6257  the loop runs just once. */ Line 6279  the loop runs just once. */
6279    
6280  for(;;)  for(;;)
6281    {    {
6282    USPTR save_end_subject = end_subject;    PCRE_PUCHAR save_end_subject = end_subject;
6283    USPTR new_start_match;    PCRE_PUCHAR new_start_match;
6284    
6285    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6286    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
# Line 6268  for(;;) Line 6290  for(;;)
6290    
6291    if (firstline)    if (firstline)
6292      {      {
6293      USPTR t = start_match;      PCRE_PUCHAR t = start_match;
6294  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6295      if (utf8)      if (utf)
6296        {        {
6297        while (t < md->end_subject && !IS_NEWLINE(t))        while (t < md->end_subject && !IS_NEWLINE(t))
6298          {          {
# Line 6292  for(;;) Line 6314  for(;;)
6314    
6315    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6316      {      {
6317      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first char if there is one. */
6318    
6319      if (first_byte >= 0)      if (has_first_char)
6320        {        {
6321        if (first_byte_caseless)        if (first_char != first_char2)
6322          while (start_match < end_subject && md->lcc[*start_match] != first_byte)          while (start_match < end_subject &&
6323                *start_match != first_char && *start_match != first_char2)
6324            start_match++;            start_match++;
6325        else        else
6326          while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && *start_match != first_char)
6327            start_match++;            start_match++;
6328        }        }
6329    
# Line 6311  for(;;) Line 6334  for(;;)
6334        if (start_match > md->start_subject + start_offset)        if (start_match > md->start_subject + start_offset)
6335          {          {
6336  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6337          if (utf8)          if (utf)
6338            {            {
6339            while (start_match < end_subject && !WAS_NEWLINE(start_match))            while (start_match < end_subject && !WAS_NEWLINE(start_match))
6340              {              {
# Line 6343  for(;;) Line 6366  for(;;)
6366        {        {
6367        while (start_match < end_subject)        while (start_match < end_subject)
6368          {          {
6369    #ifdef COMPILE_PCRE8
6370          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6371    #else
6372            register unsigned int c = *start_match & 0xff;
6373    #endif
6374          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6375            {            {
6376            start_match++;            start_match++;
6377  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6378            if (utf8)            if (utf)
6379              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6380                start_match++;                start_match++;
6381  #endif  #endif
# Line 6365  for(;;) Line 6392  for(;;)
6392    /* The following two optimizations are disabled for partial matching or if    /* The following two optimizations are disabled for partial matching or if
6393    disabling is explicitly requested. */    disabling is explicitly requested. */
6394    
6395    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6396      {      {
6397      /* If the pattern was studied, a minimum subject length may be set. This is      /* If the pattern was studied, a minimum subject length may be set. This is
6398      a lower bound; no actual string of that length may actually match the      a lower bound; no actual string of that length may actually match the
# Line 6379  for(;;) Line 6406  for(;;)
6406        break;        break;
6407        }        }
6408    
6409      /* If req_byte is set, we know that that character must appear in the      /* If req_char is set, we know that that character must appear in the
6410      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_char
6411      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
6412      This optimization can save a huge amount of backtracking in patterns with      This optimization can save a huge amount of backtracking in patterns with
6413      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
# Line 6393  for(;;) Line 6420  for(;;)
6420      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
6421      long. */      long. */
6422    
6423      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6424        {        {
6425        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);        register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6426    
6427        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
6428        place we found it at last time. */        place we found it at last time. */
6429    
6430        if (p > req_byte_ptr)        if (p > req_char_ptr)
6431          {          {
6432          if (req_byte_caseless)          if (req_char != req_char2)
6433            {            {
6434            while (p < end_subject)            while (p < end_subject)
6435              {              {
6436              register int pp = *p++;              register int pp = *p++;
6437              if (pp == req_byte || pp == req_byte2) { p--; break; }              if (pp == req_char || pp == req_char2) { p--; break; }
6438              }              }
6439            }            }
6440          else          else
6441            {            {
6442            while (p < end_subject)            while (p < end_subject)
6443              {              {
6444              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_char) { p--; break; }
6445              }              }
6446            }            }
6447    
# Line 6431  for(;;) Line 6458  for(;;)
6458          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
6459          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
6460    
6461          req_byte_ptr = p;          req_char_ptr = p;
6462          }          }
6463        }        }
6464      }      }
# Line 6480  for(;;) Line 6507  for(;;)
6507      case MATCH_THEN:      case MATCH_THEN:
6508      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6509  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6510      if (utf8)      if (utf)
6511        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
6512          new_start_match++;          new_start_match++;
6513  #endif  #endif
# Line 6635  if (start_partial != NULL) Line 6662  if (start_partial != NULL)
6662    md->mark = NULL;    md->mark = NULL;
6663    if (offsetcount > 1)    if (offsetcount > 1)
6664      {      {
6665      offsets[0] = (int)(start_partial - (USPTR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
6666      offsets[1] = (int)(end_subject - (USPTR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
6667      }      }
6668    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6669    }    }

Legend:
Removed from v.723  
changed lines
  Added in v.781

  ViewVC Help
Powered by ViewVC 1.1.5