/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 778 by ph10, Thu Dec 1 17:38:47 2011 UTC revision 850 by zherczeg, Wed Jan 4 17:29:11 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 113  Returns:     nothing Line 113  Returns:     nothing
113  */  */
114    
115  static void  static void
116  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117  {  {
118  unsigned int c;  unsigned int c;
119  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 144  Returns:      < 0 if not matched, otherw Line 144  Returns:      < 0 if not matched, otherw
144  */  */
145    
146  static int  static int
147  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
148    BOOL caseless)    BOOL caseless)
149  {  {
150  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
151  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
154  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 173  ASCII characters. */ Line 173  ASCII characters. */
173    
174  if (caseless)  if (caseless)
175    {    {
176  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
177  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
178    if (md->utf8)    if (md->utf)
179      {      {
180      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
181      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
# Line 185  if (caseless) Line 185  if (caseless)
185      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
186      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
187    
188      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
189      while (p < endptr)      while (p < endptr)
190        {        {
191        int c, d;        int c, d;
# Line 204  if (caseless) Line 204  if (caseless)
204      {      {
205      if (eptr + length > md->end_subject) return -1;      if (eptr + length > md->end_subject) return -1;
206      while (length-- > 0)      while (length-- > 0)
207        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
208          if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
209          p++;
210          eptr++;
211          }
212      }      }
213    }    }
214    
# Line 217  else Line 221  else
221    while (length-- > 0) if (*p++ != *eptr++) return -1;    while (length-- > 0) if (*p++ != *eptr++) return -1;
222    }    }
223    
224  return eptr - eptr_start;  return (int)(eptr - eptr_start);
225  }  }
226    
227    
# Line 307  argument of match(), which never changes Line 311  argument of match(), which never changes
311    
312  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
313    {\    {\
314    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
315    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316    frame->Xwhere = rw; \    frame->Xwhere = rw; \
317    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
# Line 328  argument of match(), which never changes Line 332  argument of match(), which never changes
332    {\    {\
333    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
334    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
335    (pcre_stack_free)(oldframe);\    (PUBL(stack_free))(oldframe);\
336    if (frame != NULL)\    if (frame != NULL)\
337      {\      {\
338      rrc = ra;\      rrc = ra;\
# Line 345  typedef struct heapframe { Line 349  typedef struct heapframe {
349    
350    /* Function arguments that may change */    /* Function arguments that may change */
351    
352    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
353    const uschar *Xecode;    const pcre_uchar *Xecode;
354    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
355    int Xoffset_top;    int Xoffset_top;
356    eptrblock *Xeptrb;    eptrblock *Xeptrb;
357    unsigned int Xrdepth;    unsigned int Xrdepth;
358    
359    /* Function local variables */    /* Function local variables */
360    
361    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
362  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
363    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
364  #endif  #endif
365    USPTR Xdata;    PCRE_PUCHAR Xdata;
366    USPTR Xnext;    PCRE_PUCHAR Xnext;
367    USPTR Xpp;    PCRE_PUCHAR Xpp;
368    USPTR Xprev;    PCRE_PUCHAR Xprev;
369    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
370    
371    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
372    
# Line 375  typedef struct heapframe { Line 379  typedef struct heapframe {
379    int Xprop_value;    int Xprop_value;
380    int Xprop_fail_result;    int Xprop_fail_result;
381    int Xoclength;    int Xoclength;
382    uschar Xocchars[8];    pcre_uchar Xocchars[6];
383  #endif  #endif
384    
385    int Xcodelink;    int Xcodelink;
# Line 440  the subject. */ Line 444  the subject. */
444    
445    
446  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
447  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
448  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
449  made performance worse.  made performance worse.
450    
# Line 463  Returns:       MATCH_MATCH if matched Line 467  Returns:       MATCH_MATCH if matched
467  */  */
468    
469  static int  static int
470  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
471    int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
472      unsigned int rdepth)
473  {  {
474  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
475  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 473  so they can be ordinary variables in all Line 478  so they can be ordinary variables in all
478  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
479  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
480  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
481  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
482    
483  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
484  BOOL caseless;  BOOL caseless;
# Line 485  heap storage. Set up the top-level frame Line 490  heap storage. Set up the top-level frame
490  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
491    
492  #ifdef NO_RECURSE  #ifdef NO_RECURSE
493  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));
494  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
495  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
496    
# Line 513  HEAP_RECURSE: Line 518  HEAP_RECURSE:
518    
519  /* Ditto for the local variables */  /* Ditto for the local variables */
520    
521  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
522  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
523  #endif  #endif
524  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 571  declarations can be cut out in a block. Line 576  declarations can be cut out in a block.
576  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
577  to RMATCH(). */  to RMATCH(). */
578    
579  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
580  const uschar *charptr;  const pcre_uchar *charptr;
581  #endif  #endif
582  const uschar *callpat;  const pcre_uchar *callpat;
583  const uschar *data;  const pcre_uchar *data;
584  const uschar *next;  const pcre_uchar *next;
585  USPTR         pp;  PCRE_PUCHAR       pp;
586  const uschar *prev;  const pcre_uchar *prev;
587  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
588    
589  recursion_info new_recursive;  recursion_info new_recursive;
590    
# Line 592  int prop_type; Line 597  int prop_type;
597  int prop_value;  int prop_value;
598  int prop_fail_result;  int prop_fail_result;
599  int oclength;  int oclength;
600  uschar occhars[8];  pcre_uchar occhars[6];
601  #endif  #endif
602    
603  int codelink;  int codelink;
# Line 620  the alternative names that are used. */ Line 625  the alternative names that are used. */
625  #define code_offset   codelink  #define code_offset   codelink
626  #define condassert    condition  #define condassert    condition
627  #define matched_once  prev_is_word  #define matched_once  prev_is_word
628    #define foc           number
629    
630  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
631  variables. */  variables. */
# Line 645  defined). However, RMATCH isn't like a f Line 651  defined). However, RMATCH isn't like a f
651  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
652  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
653    
654  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
655  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
656  #else  #else
657  utf8 = FALSE;  utf = FALSE;
658  #endif  #endif
659    
660  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 689  for (;;) Line 695  for (;;)
695      case OP_MARK:      case OP_MARK:
696      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
697      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
698      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
699        eptrb, RM55);        eptrb, RM55);
700      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
701           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 702  for (;;) Line 708  for (;;)
708      unaltered. */      unaltered. */
709    
710      else if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
711          strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
712        {        {
713        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
714        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
# Line 715  for (;;) Line 721  for (;;)
721      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
722    
723      case OP_COMMIT:      case OP_COMMIT:
724      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
725        eptrb, RM52);        eptrb, RM52);
726      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
727          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
# Line 726  for (;;) Line 732  for (;;)
732      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
733    
734      case OP_PRUNE:      case OP_PRUNE:
735      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
736        eptrb, RM51);        eptrb, RM51);
737      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
738      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 734  for (;;) Line 740  for (;;)
740      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
741      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
742      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
743      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
744        eptrb, RM56);        eptrb, RM56);
745      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
746           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 744  for (;;) Line 750  for (;;)
750      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
751    
752      case OP_SKIP:      case OP_SKIP:
753      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
754        eptrb, RM53);        eptrb, RM53);
755      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
756        RRETURN(rrc);        RRETURN(rrc);
# Line 752  for (;;) Line 758  for (;;)
758      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
759    
760      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
761      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. There is a flag that disables this opcode when re-matching a
762      pattern that ended with a SKIP for which there was not a matching MARK. */      pattern that ended with a SKIP for which there was not a matching MARK. */
763    
764      case OP_SKIP_ARG:      case OP_SKIP_ARG:
765      if (md->ignore_skip_arg)      if (md->ignore_skip_arg)
766        {        {
767        ecode += _pcre_OP_lengths[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
768        break;        break;
769        }        }
770      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
771        eptrb, RM57);        eptrb, RM57);
772      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
773        RRETURN(rrc);        RRETURN(rrc);
774    
775      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
776      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
777      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
778      with the md->ignore_skip_arg flag set. */      with the md->ignore_skip_arg flag set. */
779    
780      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
# Line 779  for (;;) Line 785  for (;;)
785      match pointer to do this. */      match pointer to do this. */
786    
787      case OP_THEN:      case OP_THEN:
788      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
789        eptrb, RM54);        eptrb, RM54);
790      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
791      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
# Line 788  for (;;) Line 794  for (;;)
794      case OP_THEN_ARG:      case OP_THEN_ARG:
795      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
796      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
797      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
798        md, eptrb, RM58);        md, eptrb, RM58);
799      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
800           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 917  for (;;) Line 923  for (;;)
923        for (;;)        for (;;)
924          {          {
925          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
926          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
927            eptrb, RM1);            eptrb, RM1);
928          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
929    
# Line 1004  for (;;) Line 1010  for (;;)
1010    
1011        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1012          {          {
1013          ecode += _pcre_OP_lengths[*ecode];          ecode += PRIV(OP_lengths)[*ecode];
1014          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1015          }          }
1016    
1017        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1018    
1019        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1020          RM2);          RM2);
1021    
1022        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
# Line 1028  for (;;) Line 1034  for (;;)
1034          {          {
1035          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1036            {            {
1037            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1038            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1039              {              {
1040              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 1070  for (;;) Line 1076  for (;;)
1076      if (offset < md->offset_max)      if (offset < md->offset_max)
1077        {        {
1078        matched_once = FALSE;        matched_once = FALSE;
1079        code_offset = ecode - md->start_code;        code_offset = (int)(ecode - md->start_code);
1080    
1081        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1082        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 1093  for (;;) Line 1099  for (;;)
1099          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1100            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1101          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1102          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1103            eptrb, RM63);            eptrb, RM63);
1104          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1105            {            {
# Line 1160  for (;;) Line 1166  for (;;)
1166    
1167      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1168      matched_once = FALSE;      matched_once = FALSE;
1169      code_offset = ecode - md->start_code;      code_offset = (int)(ecode - md->start_code);
1170    
1171      for (;;)      for (;;)
1172        {        {
1173        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1174        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1175          eptrb, RM48);          eptrb, RM48);
1176        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1177          {          {
# Line 1215  for (;;) Line 1221  for (;;)
1221    
1222      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1223        {        {
1224        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1225          {          {
1226          pcre_callout_block cb;          PUBL(callout_block) cb;
1227          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1228          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1229          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
# Line 1231  for (;;) Line 1237  for (;;)
1237          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1238          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1239          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1240          if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1241          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1242          }          }
1243        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1244        }        }
1245    
1246      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1260  for (;;) Line 1266  for (;;)
1266    
1267          if (!condition && condcode == OP_NRREF)          if (!condition && condcode == OP_NRREF)
1268            {            {
1269            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1270            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1271              {              {
1272              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1273  for (;;) Line 1279  for (;;)
1279    
1280            if (i < md->name_count)            if (i < md->name_count)
1281              {              {
1282              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1283              while (slotB > md->name_table)              while (slotB > md->name_table)
1284                {                {
1285                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1286                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1287                  {                  {
1288                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1289                  if (condition) break;                  if (condition) break;
# Line 1293  for (;;) Line 1299  for (;;)
1299                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1300                  {                  {
1301                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1302                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1303                    {                    {
1304                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1305                    if (condition) break;                    if (condition) break;
# Line 1306  for (;;) Line 1312  for (;;)
1312    
1313          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1314    
1315          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1316          }          }
1317        }        }
1318    
# Line 1323  for (;;) Line 1329  for (;;)
1329        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1330          {          {
1331          int refno = offset >> 1;          int refno = offset >> 1;
1332          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1333    
1334          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1335            {            {
# Line 1337  for (;;) Line 1343  for (;;)
1343    
1344          if (i < md->name_count)          if (i < md->name_count)
1345            {            {
1346            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1347            while (slotB > md->name_table)            while (slotB > md->name_table)
1348              {              {
1349              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1350              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1351                {                {
1352                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1353                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1359  for (;;) Line 1365  for (;;)
1365              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1366                {                {
1367                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1368                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1369                  {                  {
1370                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1371                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1374  for (;;) Line 1380  for (;;)
1380    
1381        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1382    
1383        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1384        }        }
1385    
1386      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1466  for (;;) Line 1472  for (;;)
1472        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1473        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1474        }        }
1475      ecode += 3;      ecode += 1 + IMM2_SIZE;
1476      break;      break;
1477    
1478    
# Line 1593  for (;;) Line 1599  for (;;)
1599      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1600    
1601      case OP_REVERSE:      case OP_REVERSE:
1602  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1603      if (utf8)      if (utf)
1604        {        {
1605        i = GET(ecode, 1);        i = GET(ecode, 1);
1606        while (i-- > 0)        while (i-- > 0)
# Line 1625  for (;;) Line 1631  for (;;)
1631      function is able to force a failure. */      function is able to force a failure. */
1632    
1633      case OP_CALLOUT:      case OP_CALLOUT:
1634      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1635        {        {
1636        pcre_callout_block cb;        PUBL(callout_block) cb;
1637        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1638        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1639        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
# Line 1641  for (;;) Line 1647  for (;;)
1647        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1648        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1649        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1650        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1651        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1652        }        }
1653      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1700  for (;;) Line 1706  for (;;)
1706        else        else
1707          {          {
1708          new_recursive.offset_save =          new_recursive.offset_save =
1709            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1710          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1711          }          }
1712        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
# Line 1715  for (;;) Line 1721  for (;;)
1721        do        do
1722          {          {
1723          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1724          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1725            md, eptrb, RM6);            md, eptrb, RM6);
1726          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1727              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
# Line 1724  for (;;) Line 1730  for (;;)
1730            {            {
1731            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1732            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1733              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1734    
1735            /* Set where we got to in the subject, and reset the start in case            /* Set where we got to in the subject, and reset the start in case
1736            it was changed by \K. This *is* propagated back out of a recursion,            it was changed by \K. This *is* propagated back out of a recursion,
# Line 1742  for (;;) Line 1748  for (;;)
1748            {            {
1749            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1750            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1751              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1752            RRETURN(rrc);            RRETURN(rrc);
1753            }            }
1754    
# Line 1754  for (;;) Line 1760  for (;;)
1760        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1761        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1762        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1763          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1764        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1765        }        }
1766    
# Line 2066  for (;;) Line 2072  for (;;)
2072        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2073        partial matching. */        partial matching. */
2074    
2075  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2076        if (utf8)        if (utf)
2077          {          {
2078          /* Get status of previous character */          /* Get status of previous character */
2079    
2080          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2081            {            {
2082            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2083            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2084            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2085            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2086  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2139  for (;;) Line 2145  for (;;)
2145              }              }
2146            else            else
2147  #endif  #endif
2148            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2149                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2150            }            }
2151    
2152          /* Get status of next character */          /* Get status of next character */
# Line 2162  for (;;) Line 2169  for (;;)
2169            }            }
2170          else          else
2171  #endif  #endif
2172          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2173              && ((md->ctypes[*eptr] & ctype_word) != 0);
2174          }          }
2175    
2176        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 2186  for (;;) Line 2194  for (;;)
2194        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2195        }        }
2196      eptr++;      eptr++;
2197      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2198        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2199    #endif
2200      ecode++;      ecode++;
2201      break;      break;
2202    
# Line 2211  for (;;) Line 2221  for (;;)
2221        }        }
2222      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2223      if (      if (
2224  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2225         c < 256 &&         c < 256 &&
2226  #endif  #endif
2227         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
# Line 2228  for (;;) Line 2238  for (;;)
2238        }        }
2239      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2240      if (      if (
2241  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2242         c >= 256 ||         c > 255 ||
2243  #endif  #endif
2244         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2245         )         )
# Line 2245  for (;;) Line 2255  for (;;)
2255        }        }
2256      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2257      if (      if (
2258  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2259         c < 256 &&         c < 256 &&
2260  #endif  #endif
2261         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
# Line 2262  for (;;) Line 2272  for (;;)
2272        }        }
2273      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2274      if (      if (
2275  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2276         c >= 256 ||         c > 255 ||
2277  #endif  #endif
2278         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2279         )         )
# Line 2279  for (;;) Line 2289  for (;;)
2289        }        }
2290      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2291      if (      if (
2292  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2293         c < 256 &&         c < 256 &&
2294  #endif  #endif
2295         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
# Line 2296  for (;;) Line 2306  for (;;)
2306        }        }
2307      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2308      if (      if (
2309  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2310         c >= 256 ||         c > 255 ||
2311  #endif  #endif
2312         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2313         )         )
# Line 2475  for (;;) Line 2485  for (;;)
2485          break;          break;
2486    
2487          case PT_GC:          case PT_GC:
2488          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2489            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2490          break;          break;
2491    
# Line 2492  for (;;) Line 2502  for (;;)
2502          /* These are specials */          /* These are specials */
2503    
2504          case PT_ALNUM:          case PT_ALNUM:
2505          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2506               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2507            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2508          break;          break;
2509    
2510          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2511          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2512               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2513                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2514            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2515          break;          break;
2516    
2517          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2518          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2519               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2520               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2521                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
# Line 2513  for (;;) Line 2523  for (;;)
2523          break;          break;
2524    
2525          case PT_WORD:          case PT_WORD:
2526          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2527               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2528               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2529            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2530          break;          break;
# Line 2543  for (;;) Line 2553  for (;;)
2553      while (eptr < md->end_subject)      while (eptr < md->end_subject)
2554        {        {
2555        int len = 1;        int len = 1;
2556        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2557        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2558        eptr += len;        eptr += len;
2559        }        }
# Line 2564  for (;;) Line 2574  for (;;)
2574      case OP_REFI:      case OP_REFI:
2575      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2576      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2577      ecode += 3;      ecode += 1 + IMM2_SIZE;
2578    
2579      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2580    
# Line 2604  for (;;) Line 2614  for (;;)
2614        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2615        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2616        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2617        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2618        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2619        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2620        break;        break;
2621    
2622        default:               /* No repeat follows */        default:               /* No repeat follows */
# Line 2620  for (;;) Line 2630  for (;;)
2630        }        }
2631    
2632      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2633      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2634        means the reference is unset in non-Java-compatible mode. If the minimum is
2635        zero, we can continue at the same level without recursion. For any other
2636        minimum, carrying on will result in NOMATCH. */
2637    
2638      if (length == 0) continue;      if (length == 0) continue;
2639        if (length < 0 && min == 0) continue;
2640    
2641      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2642      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2703  for (;;) Line 2717  for (;;)
2717      case OP_NCLASS:      case OP_NCLASS:
2718      case OP_CLASS:      case OP_CLASS:
2719        {        {
2720          /* The data variable is saved across frames, so the byte map needs to
2721          be stored there. */
2722    #define BYTE_MAP ((pcre_uint8 *)data)
2723        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2724        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2725    
2726        switch (*ecode)        switch (*ecode)
2727          {          {
# Line 2725  for (;;) Line 2742  for (;;)
2742          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2743          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2744          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2745          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2746          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2747          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2748          break;          break;
2749    
2750          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2737  for (;;) Line 2754  for (;;)
2754    
2755        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2756    
2757  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2758        /* UTF-8 mode */        if (utf)
       if (utf8)  
2759          {          {
2760          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2761            {            {
# Line 2754  for (;;) Line 2770  for (;;)
2770              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2771              }              }
2772            else            else
2773              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
             }  
2774            }            }
2775          }          }
2776        else        else
2777  #endif  #endif
2778        /* Not UTF-8 mode */        /* Not UTF mode */
2779          {          {
2780          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2781            {            {
# Line 2771  for (;;) Line 2785  for (;;)
2785              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2786              }              }
2787            c = *eptr++;            c = *eptr++;
2788            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2789              if (c > 255)
2790                {
2791                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2792                }
2793              else
2794    #endif
2795                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2796            }            }
2797          }          }
2798    
# Line 2785  for (;;) Line 2806  for (;;)
2806    
2807        if (minimize)        if (minimize)
2808          {          {
2809  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2810          /* UTF-8 mode */          if (utf)
         if (utf8)  
2811            {            {
2812            for (fi = min;; fi++)            for (fi = min;; fi++)
2813              {              {
# Line 2805  for (;;) Line 2825  for (;;)
2825                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2826                }                }
2827              else              else
2828                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
               }  
2829              }              }
2830            }            }
2831          else          else
2832  #endif  #endif
2833          /* Not UTF-8 mode */          /* Not UTF mode */
2834            {            {
2835            for (fi = min;; fi++)            for (fi = min;; fi++)
2836              {              {
# Line 2825  for (;;) Line 2843  for (;;)
2843                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2844                }                }
2845              c = *eptr++;              c = *eptr++;
2846              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2847                if (c > 255)
2848                  {
2849                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2850                  }
2851                else
2852    #endif
2853                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2854              }              }
2855            }            }
2856          /* Control never gets here */          /* Control never gets here */
# Line 2837  for (;;) Line 2862  for (;;)
2862          {          {
2863          pp = eptr;          pp = eptr;
2864    
2865  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2866          /* UTF-8 mode */          if (utf)
         if (utf8)  
2867            {            {
2868            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2869              {              {
# Line 2855  for (;;) Line 2879  for (;;)
2879                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2880                }                }
2881              else              else
2882                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2883              eptr += len;              eptr += len;
2884              }              }
2885            for (;;)            for (;;)
# Line 2870  for (;;) Line 2892  for (;;)
2892            }            }
2893          else          else
2894  #endif  #endif
2895            /* Not UTF-8 mode */            /* Not UTF mode */
2896            {            {
2897            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2898              {              {
# Line 2880  for (;;) Line 2902  for (;;)
2902                break;                break;
2903                }                }
2904              c = *eptr;              c = *eptr;
2905              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2906                if (c > 255)
2907                  {
2908                  if (op == OP_CLASS) break;
2909                  }
2910                else
2911    #endif
2912                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2913              eptr++;              eptr++;
2914              }              }
2915            while (eptr >= pp)            while (eptr >= pp)
# Line 2893  for (;;) Line 2922  for (;;)
2922    
2923          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2924          }          }
2925    #undef BYTE_MAP
2926        }        }
2927      /* Control never gets here */      /* Control never gets here */
2928    
# Line 2901  for (;;) Line 2931  for (;;)
2931      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2932      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
2933    
2934  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2935      case OP_XCLASS:      case OP_XCLASS:
2936        {        {
2937        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2926  for (;;) Line 2956  for (;;)
2956          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2957          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2958          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2959          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2960          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2961          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2962          break;          break;
2963    
2964          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2946  for (;;) Line 2976  for (;;)
2976            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2977            }            }
2978          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
2979          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
2980          }          }
2981    
2982        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2970  for (;;) Line 3000  for (;;)
3000              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3001              }              }
3002            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3003            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3004            }            }
3005          /* Control never gets here */          /* Control never gets here */
3006          }          }
# Line 2988  for (;;) Line 3018  for (;;)
3018              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3019              break;              break;
3020              }              }
3021    #ifdef SUPPORT_UTF
3022            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3023            if (!_pcre_xclass(c, data)) break;  #else
3024              c = *eptr;
3025    #endif
3026              if (!PRIV(xclass)(c, data, utf)) break;
3027            eptr += len;            eptr += len;
3028            }            }
3029          for(;;)          for(;;)
# Line 2997  for (;;) Line 3031  for (;;)
3031            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3032            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3033            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3034            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3035              if (utf) BACKCHAR(eptr);
3036    #endif
3037            }            }
3038          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3039          }          }
# Line 3009  for (;;) Line 3045  for (;;)
3045      /* Match a single character, casefully */      /* Match a single character, casefully */
3046    
3047      case OP_CHAR:      case OP_CHAR:
3048  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3049      if (utf8)      if (utf)
3050        {        {
3051        length = 1;        length = 1;
3052        ecode++;        ecode++;
# Line 3024  for (;;) Line 3060  for (;;)
3060        }        }
3061      else      else
3062  #endif  #endif
3063        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3064        {        {
3065        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3066          {          {
# Line 3037  for (;;) Line 3072  for (;;)
3072        }        }
3073      break;      break;
3074    
3075      /* Match a single character, caselessly. If we are at the end of the      /* Match a single character, caselessly. If we are at the end of the
3076      subject, give up immediately. */      subject, give up immediately. */
3077    
3078      case OP_CHARI:      case OP_CHARI:
3079      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3080        {        {
3081        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3082        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3083        }        }
3084    
3085  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3086      if (utf8)      if (utf)
3087        {        {
3088        length = 1;        length = 1;
3089        ecode++;        ecode++;
3090        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3091    
3092        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3093        we know that its other case must also be one byte long, so we can use the        we know that its other case must also be one byte long, so we can use the
3094        fast lookup table. We know that there is at least one byte left in the        fast lookup table. We know that there is at least one byte left in the
3095        subject. */        subject. */
3096    
3097        if (fc < 128)        if (fc < 128)
3098          {          {
3099          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3100                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3101            ecode++;
3102            eptr++;
3103          }          }
3104    
3105        /* Otherwise we must pick up the subject character. Note that we cannot        /* Otherwise we must pick up the subject character. Note that we cannot
# Line 3087  for (;;) Line 3125  for (;;)
3125          }          }
3126        }        }
3127      else      else
3128  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3129    
3130      /* Non-UTF-8 mode */      /* Not UTF mode */
3131        {        {
3132        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3133              != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3134          eptr++;
3135        ecode += 2;        ecode += 2;
3136        }        }
3137      break;      break;
# Line 3101  for (;;) Line 3141  for (;;)
3141      case OP_EXACT:      case OP_EXACT:
3142      case OP_EXACTI:      case OP_EXACTI:
3143      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3144      ecode += 3;      ecode += 1 + IMM2_SIZE;
3145      goto REPEATCHAR;      goto REPEATCHAR;
3146    
3147      case OP_POSUPTO:      case OP_POSUPTO:
# Line 3116  for (;;) Line 3156  for (;;)
3156      min = 0;      min = 0;
3157      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3158      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3159      ecode += 3;      ecode += 1 + IMM2_SIZE;
3160      goto REPEATCHAR;      goto REPEATCHAR;
3161    
3162      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3164  for (;;) Line 3204  for (;;)
3204      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3205    
3206      REPEATCHAR:      REPEATCHAR:
3207  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3208      if (utf8)      if (utf)
3209        {        {
3210        length = 1;        length = 1;
3211        charptr = ecode;        charptr = ecode;
# Line 3181  for (;;) Line 3221  for (;;)
3221          unsigned int othercase;          unsigned int othercase;
3222          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3223              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3224            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3225          else oclength = 0;          else oclength = 0;
3226  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3227    
3228          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3229            {            {
3230            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3231              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3232  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3233            else if (oclength > 0 &&            else if (oclength > 0 &&
3234                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3235                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3236  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3237            else            else
3238              {              {
# Line 3211  for (;;) Line 3251  for (;;)
3251              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3252              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3253              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3254                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3255  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3256              else if (oclength > 0 &&              else if (oclength > 0 &&
3257                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3258                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3259  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3260              else              else
3261                {                {
# Line 3232  for (;;) Line 3272  for (;;)
3272            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3273              {              {
3274              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3275                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3276  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3277              else if (oclength > 0 &&              else if (oclength > 0 &&
3278                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3279                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3280  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3281              else              else
3282                {                {
# Line 3268  for (;;) Line 3308  for (;;)
3308        value of fc will always be < 128. */        value of fc will always be < 128. */
3309        }        }
3310      else      else
3311  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3312          /* When not in UTF-8 mode, load a single-byte character. */
3313      /* When not in UTF-8 mode, load a single-byte character. */        fc = *ecode++;
   
     fc = *ecode++;  
3314    
3315      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always one character, though we may
3316      may not be in UTF-8 mode. The code is duplicated for the caseless and      or may not be in UTF mode. The code is duplicated for the caseless and
3317      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3318      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3319      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3288  for (;;) Line 3326  for (;;)
3326    
3327      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3328        {        {
3329        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3330          /* fc must be < 128 if UTF is enabled. */
3331          foc = md->fcc[fc];
3332    #else
3333    #ifdef SUPPORT_UTF
3334    #ifdef SUPPORT_UCP
3335          if (utf && fc > 127)
3336            foc = UCD_OTHERCASE(fc);
3337    #else
3338          if (utf && fc > 127)
3339            foc = fc;
3340    #endif /* SUPPORT_UCP */
3341          else
3342    #endif /* SUPPORT_UTF */
3343            foc = TABLE_GET(fc, md->fcc, fc);
3344    #endif /* COMPILE_PCRE8 */
3345    
3346        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3347          {          {
3348          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
# Line 3296  for (;;) Line 3350  for (;;)
3350            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3351            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3352            }            }
3353          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3354            eptr++;
3355          }          }
3356        if (min == max) continue;        if (min == max) continue;
3357        if (minimize)        if (minimize)
# Line 3311  for (;;) Line 3366  for (;;)
3366              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3367              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3368              }              }
3369            if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3370              eptr++;
3371            }            }
3372          /* Control never gets here */          /* Control never gets here */
3373          }          }
# Line 3325  for (;;) Line 3381  for (;;)
3381              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3382              break;              break;
3383              }              }
3384            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3385            eptr++;            eptr++;
3386            }            }
3387    
# Line 3414  for (;;) Line 3470  for (;;)
3470      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3471      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3472        {        {
3473  #ifdef SUPPORT_UTF8        register int ch, och;
3474        if (c < 256)        ch = *ecode++;
3475  #endif  #ifdef COMPILE_PCRE8
3476        c = md->lcc[c];        /* ch must be < 128 if UTF is enabled. */
3477        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        och = md->fcc[ch];
3478    #else
3479    #ifdef SUPPORT_UTF
3480    #ifdef SUPPORT_UCP
3481          if (utf && ch > 127)
3482            och = UCD_OTHERCASE(ch);
3483    #else
3484          if (utf && ch > 127)
3485            och = ch;
3486    #endif /* SUPPORT_UCP */
3487          else
3488    #endif /* SUPPORT_UTF */
3489            och = TABLE_GET(ch, md->fcc, ch);
3490    #endif /* COMPILE_PCRE8 */
3491          if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3492        }        }
3493      else    /* Caseful */      else    /* Caseful */
3494        {        {
# Line 3436  for (;;) Line 3506  for (;;)
3506      case OP_NOTEXACT:      case OP_NOTEXACT:
3507      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3508      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3509      ecode += 3;      ecode += 1 + IMM2_SIZE;
3510      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3511    
3512      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3446  for (;;) Line 3516  for (;;)
3516      min = 0;      min = 0;
3517      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3518      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3519      ecode += 3;      ecode += 1 + IMM2_SIZE;
3520      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3521    
3522      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3478  for (;;) Line 3548  for (;;)
3548      possessive = TRUE;      possessive = TRUE;
3549      min = 0;      min = 0;
3550      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3551      ecode += 3;      ecode += 1 + IMM2_SIZE;
3552      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3553    
3554      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3517  for (;;) Line 3587  for (;;)
3587    
3588      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3589        {        {
3590        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3591          /* fc must be < 128 if UTF is enabled. */
3592          foc = md->fcc[fc];
3593    #else
3594    #ifdef SUPPORT_UTF
3595    #ifdef SUPPORT_UCP
3596          if (utf && fc > 127)
3597            foc = UCD_OTHERCASE(fc);
3598    #else
3599          if (utf && fc > 127)
3600            foc = fc;
3601    #endif /* SUPPORT_UCP */
3602          else
3603    #endif /* SUPPORT_UTF */
3604            foc = TABLE_GET(fc, md->fcc, fc);
3605    #endif /* COMPILE_PCRE8 */
3606    
3607  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3608        /* UTF-8 mode */        if (utf)
       if (utf8)  
3609          {          {
3610          register unsigned int d;          register unsigned int d;
3611          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3532  for (;;) Line 3616  for (;;)
3616              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3617              }              }
3618            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3619            if (d < 256) d = md->lcc[d];            if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) RRETURN(MATCH_NOMATCH);  
3620            }            }
3621          }          }
3622        else        else
3623  #endif  #endif
3624          /* Not UTF mode */
       /* Not UTF-8 mode */  
3625          {          {
3626          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3627            {            {
# Line 3548  for (;;) Line 3630  for (;;)
3630              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3631              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3632              }              }
3633            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3634              eptr++;
3635            }            }
3636          }          }
3637    
# Line 3556  for (;;) Line 3639  for (;;)
3639    
3640        if (minimize)        if (minimize)
3641          {          {
3642  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3643          /* UTF-8 mode */          if (utf)
         if (utf8)  
3644            {            {
3645            register unsigned int d;            register unsigned int d;
3646            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3572  for (;;) Line 3654  for (;;)
3654                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3655                }                }
3656              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3657              if (d < 256) d = md->lcc[d];              if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) RRETURN(MATCH_NOMATCH);  
3658              }              }
3659            }            }
3660          else          else
3661  #endif  #endif
3662          /* Not UTF-8 mode */          /* Not UTF mode */
3663            {            {
3664            for (fi = min;; fi++)            for (fi = min;; fi++)
3665              {              {
# Line 3590  for (;;) Line 3671  for (;;)
3671                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3672                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3673                }                }
3674              if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3675                eptr++;
3676              }              }
3677            }            }
3678          /* Control never gets here */          /* Control never gets here */
# Line 3602  for (;;) Line 3684  for (;;)
3684          {          {
3685          pp = eptr;          pp = eptr;
3686    
3687  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3688          /* UTF-8 mode */          if (utf)
         if (utf8)  
3689            {            {
3690            register unsigned int d;            register unsigned int d;
3691            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3616  for (;;) Line 3697  for (;;)
3697                break;                break;
3698                }                }
3699              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3700              if (d < 256) d = md->lcc[d];              if (fc == d || foc == d) break;
             if (fc == d) break;  
3701              eptr += len;              eptr += len;
3702              }              }
3703          if (possessive) continue;          if (possessive) continue;
# Line 3631  for (;;) Line 3711  for (;;)
3711            }            }
3712          else          else
3713  #endif  #endif
3714          /* Not UTF-8 mode */          /* Not UTF mode */
3715            {            {
3716            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3717              {              {
# Line 3640  for (;;) Line 3720  for (;;)
3720                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3721                break;                break;
3722                }                }
3723              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3724              eptr++;              eptr++;
3725              }              }
3726            if (possessive) continue;            if (possessive) continue;
# Line 3661  for (;;) Line 3741  for (;;)
3741    
3742      else      else
3743        {        {
3744  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3745        /* UTF-8 mode */        if (utf)
       if (utf8)  
3746          {          {
3747          register unsigned int d;          register unsigned int d;
3748          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3679  for (;;) Line 3758  for (;;)
3758          }          }
3759        else        else
3760  #endif  #endif
3761        /* Not UTF-8 mode */        /* Not UTF mode */
3762          {          {
3763          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3764            {            {
# Line 3696  for (;;) Line 3775  for (;;)
3775    
3776        if (minimize)        if (minimize)
3777          {          {
3778  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3779          /* UTF-8 mode */          if (utf)
         if (utf8)  
3780            {            {
3781            register unsigned int d;            register unsigned int d;
3782            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3717  for (;;) Line 3795  for (;;)
3795            }            }
3796          else          else
3797  #endif  #endif
3798          /* Not UTF-8 mode */          /* Not UTF mode */
3799            {            {
3800            for (fi = min;; fi++)            for (fi = min;; fi++)
3801              {              {
# Line 3741  for (;;) Line 3819  for (;;)
3819          {          {
3820          pp = eptr;          pp = eptr;
3821    
3822  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3823          /* UTF-8 mode */          if (utf)
         if (utf8)  
3824            {            {
3825            register unsigned int d;            register unsigned int d;
3826            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3769  for (;;) Line 3846  for (;;)
3846            }            }
3847          else          else
3848  #endif  #endif
3849          /* Not UTF-8 mode */          /* Not UTF mode */
3850            {            {
3851            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3852              {              {
# Line 3802  for (;;) Line 3879  for (;;)
3879      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3880      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3881      minimize = TRUE;      minimize = TRUE;
3882      ecode += 3;      ecode += 1 + IMM2_SIZE;
3883      goto REPEATTYPE;      goto REPEATTYPE;
3884    
3885      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3810  for (;;) Line 3887  for (;;)
3887      min = 0;      min = 0;
3888      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3889      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3890      ecode += 3;      ecode += 1 + IMM2_SIZE;
3891      goto REPEATTYPE;      goto REPEATTYPE;
3892    
3893      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3838  for (;;) Line 3915  for (;;)
3915      possessive = TRUE;      possessive = TRUE;
3916      min = 0;      min = 0;
3917      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3918      ecode += 3;      ecode += 1 + IMM2_SIZE;
3919      goto REPEATTYPE;      goto REPEATTYPE;
3920    
3921      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 4045  for (;;) Line 4122  for (;;)
4122            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4123              {              {
4124              int len = 1;              int len = 1;
4125              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4126              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4127              eptr += len;              eptr += len;
4128              }              }
# Line 4057  for (;;) Line 4134  for (;;)
4134    
4135  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4136    
4137  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4138        if (utf8) switch(ctype)        if (utf) switch(ctype)
4139          {          {
4140          case OP_ANY:          case OP_ANY:
4141          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 4070  for (;;) Line 4147  for (;;)
4147              }              }
4148            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4149            eptr++;            eptr++;
4150            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4151            }            }
4152          break;          break;
4153    
# Line 4083  for (;;) Line 4160  for (;;)
4160              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4161              }              }
4162            eptr++;            eptr++;
4163            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4164            }            }
4165          break;          break;
4166    
# Line 4265  for (;;) Line 4342  for (;;)
4342              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4343              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4344              }              }
4345            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4346              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4347              eptr++;
4348            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4349            }            }
4350          break;          break;
# Line 4281  for (;;) Line 4359  for (;;)
4359              }              }
4360            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4361              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4362            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4363              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4364            }            }
4365          break;          break;
4366    
# Line 4293  for (;;) Line 4372  for (;;)
4372              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4373              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4374              }              }
4375            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4376              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4377              eptr++;
4378            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4379            }            }
4380          break;          break;
# Line 4309  for (;;) Line 4389  for (;;)
4389              }              }
4390            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4391              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4392            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4393              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4394            }            }
4395          break;          break;
4396    
# Line 4321  for (;;) Line 4402  for (;;)
4402              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4403              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4404              }              }
4405            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4406              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4407              eptr++;
4408            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4409            }            }
4410          break;          break;
# Line 4332  for (;;) Line 4414  for (;;)
4414          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4415    
4416        else        else
4417  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4418    
4419        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4420        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4392  for (;;) Line 4474  for (;;)
4474              case 0x000b:              case 0x000b:
4475              case 0x000c:              case 0x000c:
4476              case 0x0085:              case 0x0085:
4477    #ifdef COMPILE_PCRE16
4478                case 0x2028:
4479                case 0x2029:
4480    #endif
4481              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4482              break;              break;
4483              }              }
# Line 4412  for (;;) Line 4498  for (;;)
4498              case 0x09:      /* HT */              case 0x09:      /* HT */
4499              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4500              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4501    #ifdef COMPILE_PCRE16
4502                case 0x1680:    /* OGHAM SPACE MARK */
4503                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4504                case 0x2000:    /* EN QUAD */
4505                case 0x2001:    /* EM QUAD */
4506                case 0x2002:    /* EN SPACE */
4507                case 0x2003:    /* EM SPACE */
4508                case 0x2004:    /* THREE-PER-EM SPACE */
4509                case 0x2005:    /* FOUR-PER-EM SPACE */
4510                case 0x2006:    /* SIX-PER-EM SPACE */
4511                case 0x2007:    /* FIGURE SPACE */
4512                case 0x2008:    /* PUNCTUATION SPACE */
4513                case 0x2009:    /* THIN SPACE */
4514                case 0x200A:    /* HAIR SPACE */
4515                case 0x202f:    /* NARROW NO-BREAK SPACE */
4516                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4517                case 0x3000:    /* IDEOGRAPHIC SPACE */
4518    #endif
4519              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4520              }              }
4521            }            }
# Line 4431  for (;;) Line 4535  for (;;)
4535              case 0x09:      /* HT */              case 0x09:      /* HT */
4536              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4537              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4538    #ifdef COMPILE_PCRE16
4539                case 0x1680:    /* OGHAM SPACE MARK */
4540                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4541                case 0x2000:    /* EN QUAD */
4542                case 0x2001:    /* EM QUAD */
4543                case 0x2002:    /* EN SPACE */
4544                case 0x2003:    /* EM SPACE */
4545                case 0x2004:    /* THREE-PER-EM SPACE */
4546                case 0x2005:    /* FOUR-PER-EM SPACE */
4547                case 0x2006:    /* SIX-PER-EM SPACE */
4548                case 0x2007:    /* FIGURE SPACE */
4549                case 0x2008:    /* PUNCTUATION SPACE */
4550                case 0x2009:    /* THIN SPACE */
4551                case 0x200A:    /* HAIR SPACE */
4552                case 0x202f:    /* NARROW NO-BREAK SPACE */
4553                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4554                case 0x3000:    /* IDEOGRAPHIC SPACE */
4555    #endif
4556              break;              break;
4557              }              }
4558            }            }
# Line 4452  for (;;) Line 4574  for (;;)
4574              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4575              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4576              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4577    #ifdef COMPILE_PCRE16
4578                case 0x2028:    /* LINE SEPARATOR */
4579                case 0x2029:    /* PARAGRAPH SEPARATOR */
4580    #endif
4581              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4582              }              }
4583            }            }
# Line 4473  for (;;) Line 4599  for (;;)
4599              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4600              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4601              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4602    #ifdef COMPILE_PCRE16
4603                case 0x2028:    /* LINE SEPARATOR */
4604                case 0x2029:    /* PARAGRAPH SEPARATOR */
4605    #endif
4606              break;              break;
4607              }              }
4608            }            }
# Line 4486  for (;;) Line 4616  for (;;)
4616              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4617              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4618              }              }
4619            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4620                RRETURN(MATCH_NOMATCH);
4621              eptr++;
4622            }            }
4623          break;          break;
4624    
# Line 4498  for (;;) Line 4630  for (;;)
4630              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4631              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4632              }              }
4633            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4634                RRETURN(MATCH_NOMATCH);
4635              eptr++;
4636            }            }
4637          break;          break;
4638    
# Line 4510  for (;;) Line 4644  for (;;)
4644              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4645              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4646              }              }
4647            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4648                RRETURN(MATCH_NOMATCH);
4649              eptr++;
4650            }            }
4651          break;          break;
4652    
# Line 4522  for (;;) Line 4658  for (;;)
4658              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4659              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4660              }              }
4661            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4662                RRETURN(MATCH_NOMATCH);
4663              eptr++;
4664            }            }
4665          break;          break;
4666    
# Line 4534  for (;;) Line 4672  for (;;)
4672              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4673              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4674              }              }
4675            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4676              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4677              eptr++;
4678            }            }
4679          break;          break;
4680    
# Line 4547  for (;;) Line 4686  for (;;)
4686              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4687              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4688              }              }
4689            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4690              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4691              eptr++;
4692            }            }
4693          break;          break;
4694    
# Line 4766  for (;;) Line 4906  for (;;)
4906            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4907              {              {
4908              int len = 1;              int len = 1;
4909              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4910              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4911              eptr += len;              eptr += len;
4912              }              }
# Line 4775  for (;;) Line 4915  for (;;)
4915        else        else
4916  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
4917    
4918  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4919        /* UTF-8 mode */        if (utf)
       if (utf8)  
4920          {          {
4921          for (fi = min;; fi++)          for (fi = min;; fi++)
4922            {            {
# Line 4919  for (;;) Line 5058  for (;;)
5058              break;              break;
5059    
5060              case OP_WHITESPACE:              case OP_WHITESPACE:
5061              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5062                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5063              break;              break;
5064    
# Line 4940  for (;;) Line 5079  for (;;)
5079          }          }
5080        else        else
5081  #endif  #endif
5082        /* Not UTF-8 mode */        /* Not UTF mode */
5083          {          {
5084          for (fi = min;; fi++)          for (fi = min;; fi++)
5085            {            {
# Line 4976  for (;;) Line 5115  for (;;)
5115                case 0x000b:                case 0x000b:
5116                case 0x000c:                case 0x000c:
5117                case 0x0085:                case 0x0085:
5118    #ifdef COMPILE_PCRE16
5119                  case 0x2028:
5120                  case 0x2029:
5121    #endif
5122                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5123                break;                break;
5124                }                }
# Line 4988  for (;;) Line 5131  for (;;)
5131                case 0x09:      /* HT */                case 0x09:      /* HT */
5132                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
5133                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
5134    #ifdef COMPILE_PCRE16
5135                  case 0x1680:    /* OGHAM SPACE MARK */
5136                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5137                  case 0x2000:    /* EN QUAD */
5138                  case 0x2001:    /* EM QUAD */
5139                  case 0x2002:    /* EN SPACE */
5140                  case 0x2003:    /* EM SPACE */
5141                  case 0x2004:    /* THREE-PER-EM SPACE */
5142                  case 0x2005:    /* FOUR-PER-EM SPACE */
5143                  case 0x2006:    /* SIX-PER-EM SPACE */
5144                  case 0x2007:    /* FIGURE SPACE */
5145                  case 0x2008:    /* PUNCTUATION SPACE */
5146                  case 0x2009:    /* THIN SPACE */
5147                  case 0x200A:    /* HAIR SPACE */
5148                  case 0x202f:    /* NARROW NO-BREAK SPACE */
5149                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5150                  case 0x3000:    /* IDEOGRAPHIC SPACE */
5151    #endif
5152                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5153                }                }
5154              break;              break;
# Line 4999  for (;;) Line 5160  for (;;)
5160                case 0x09:      /* HT */                case 0x09:      /* HT */
5161                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
5162                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
5163    #ifdef COMPILE_PCRE16
5164                  case 0x1680:    /* OGHAM SPACE MARK */
5165                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5166                  case 0x2000:    /* EN QUAD */
5167                  case 0x2001:    /* EM QUAD */
5168                  case 0x2002:    /* EN SPACE */
5169                  case 0x2003:    /* EM SPACE */
5170                  case 0x2004:    /* THREE-PER-EM SPACE */
5171                  case 0x2005:    /* FOUR-PER-EM SPACE */
5172                  case 0x2006:    /* SIX-PER-EM SPACE */
5173                  case 0x2007:    /* FIGURE SPACE */
5174                  case 0x2008:    /* PUNCTUATION SPACE */
5175                  case 0x2009:    /* THIN SPACE */
5176                  case 0x200A:    /* HAIR SPACE */
5177                  case 0x202f:    /* NARROW NO-BREAK SPACE */
5178                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5179                  case 0x3000:    /* IDEOGRAPHIC SPACE */
5180    #endif
5181                break;                break;
5182                }                }
5183              break;              break;
# Line 5012  for (;;) Line 5191  for (;;)
5191                case 0x0c:      /* FF */                case 0x0c:      /* FF */
5192                case 0x0d:      /* CR */                case 0x0d:      /* CR */
5193                case 0x85:      /* NEL */                case 0x85:      /* NEL */
5194    #ifdef COMPILE_PCRE16
5195                  case 0x2028:    /* LINE SEPARATOR */
5196                  case 0x2029:    /* PARAGRAPH SEPARATOR */
5197    #endif
5198                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5199                }                }
5200              break;              break;
# Line 5025  for (;;) Line 5208  for (;;)
5208                case 0x0c:      /* FF */                case 0x0c:      /* FF */
5209                case 0x0d:      /* CR */                case 0x0d:      /* CR */
5210                case 0x85:      /* NEL */                case 0x85:      /* NEL */
5211    #ifdef COMPILE_PCRE16
5212                  case 0x2028:    /* LINE SEPARATOR */
5213                  case 0x2029:    /* PARAGRAPH SEPARATOR */
5214    #endif
5215                break;                break;
5216                }                }
5217              break;              break;
5218    
5219              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
5220              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5221              break;              break;
5222    
5223              case OP_DIGIT:              case OP_DIGIT:
5224              if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5225              break;              break;
5226    
5227              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
5228              if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5229              break;              break;
5230    
5231              case OP_WHITESPACE:              case OP_WHITESPACE:
5232              if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5233              break;              break;
5234    
5235              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
5236              if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5237              break;              break;
5238    
5239              case OP_WORDCHAR:              case OP_WORDCHAR:
5240              if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5241              break;              break;
5242    
5243              default:              default:
# Line 5239  for (;;) Line 5426  for (;;)
5426            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5427            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5428            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5429            if (utf8) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5430            }            }
5431          }          }
5432    
# Line 5256  for (;;) Line 5443  for (;;)
5443              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5444              break;              break;
5445              }              }
5446            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5447            if (UCD_CATEGORY(c) == ucp_M) break;            if (UCD_CATEGORY(c) == ucp_M) break;
5448            eptr += len;            eptr += len;
5449            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5450              {              {
5451              len = 1;              len = 1;
5452              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5453              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5454              eptr += len;              eptr += len;
5455              }              }
# Line 5279  for (;;) Line 5466  for (;;)
5466            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5467            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5468              {              {
5469              if (!utf8) c = *eptr; else              if (!utf) c = *eptr; else
5470                {                {
5471                BACKCHAR(eptr);                BACKCHAR(eptr);
5472                GETCHAR(c, eptr);                GETCHAR(c, eptr);
# Line 5293  for (;;) Line 5480  for (;;)
5480        else        else
5481  #endif   /* SUPPORT_UCP */  #endif   /* SUPPORT_UCP */
5482    
5483  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5484        /* UTF-8 mode */        if (utf)
   
       if (utf8)  
5485          {          {
5486          switch(ctype)          switch(ctype)
5487            {            {
# Line 5312  for (;;) Line 5497  for (;;)
5497                  }                  }
5498                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5499                eptr++;                eptr++;
5500                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5501                }                }
5502              }              }
5503    
# Line 5329  for (;;) Line 5514  for (;;)
5514                  }                  }
5515                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5516                eptr++;                eptr++;
5517                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5518                }                }
5519              }              }
5520            break;            break;
# Line 5345  for (;;) Line 5530  for (;;)
5530                  break;                  break;
5531                  }                  }
5532                eptr++;                eptr++;
5533                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5534                }                }
5535              }              }
5536            else            else
# Line 5578  for (;;) Line 5763  for (;;)
5763            }            }
5764          }          }
5765        else        else
5766  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5767          /* Not UTF mode */
       /* Not UTF-8 mode */  
5768          {          {
5769          switch(ctype)          switch(ctype)
5770            {            {
# Line 5624  for (;;) Line 5808  for (;;)
5808                }                }
5809              else              else
5810                {                {
5811                if (c != 0x000a &&                if (c != 0x000a && (md->bsr_anycrlf ||
5812                    (md->bsr_anycrlf ||                  (c != 0x000b && c != 0x000c && c != 0x0085
5813                      (c != 0x000b && c != 0x000c && c != 0x0085)))  #ifdef COMPILE_PCRE16
5814                  break;                  && c != 0x2028 && c != 0x2029
5815    #endif
5816                    ))) break;
5817                eptr++;                eptr++;
5818                }                }
5819              }              }
# Line 5642  for (;;) Line 5828  for (;;)
5828                break;                break;
5829                }                }
5830              c = *eptr;              c = *eptr;
5831              if (c == 0x09 || c == 0x20 || c == 0xa0) break;              if (c == 0x09 || c == 0x20 || c == 0xa0
5832    #ifdef COMPILE_PCRE16
5833                  || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
5834                  || c == 0x202f || c == 0x205f || c == 0x3000
5835    #endif
5836                  ) break;
5837              eptr++;              eptr++;
5838              }              }
5839            break;            break;
# Line 5656  for (;;) Line 5847  for (;;)
5847                break;                break;
5848                }                }
5849              c = *eptr;              c = *eptr;
5850              if (c != 0x09 && c != 0x20 && c != 0xa0) break;              if (c != 0x09 && c != 0x20 && c != 0xa0
5851    #ifdef COMPILE_PCRE16
5852                  && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
5853                  && c != 0x202f && c != 0x205f && c != 0x3000
5854    #endif
5855                  ) break;
5856              eptr++;              eptr++;
5857              }              }
5858            break;            break;
# Line 5670  for (;;) Line 5866  for (;;)
5866                break;                break;
5867                }                }
5868              c = *eptr;              c = *eptr;
5869              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
5870                break;  #ifdef COMPILE_PCRE16
5871                  || c == 0x2028 || c == 0x2029
5872    #endif
5873                  ) break;
5874              eptr++;              eptr++;
5875              }              }
5876            break;            break;
# Line 5685  for (;;) Line 5884  for (;;)
5884                break;                break;
5885                }                }
5886              c = *eptr;              c = *eptr;
5887              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
5888                break;  #ifdef COMPILE_PCRE16
5889                  && c != 0x2028 && c != 0x2029
5890    #endif
5891                  ) break;
5892              eptr++;              eptr++;
5893              }              }
5894            break;            break;
# Line 5699  for (;;) Line 5901  for (;;)
5901                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5902                break;                break;
5903                }                }
5904              if ((md->ctypes[*eptr] & ctype_digit) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
5905              eptr++;              eptr++;
5906              }              }
5907            break;            break;
# Line 5712  for (;;) Line 5914  for (;;)
5914                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5915                break;                break;
5916                }                }
5917              if ((md->ctypes[*eptr] & ctype_digit) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
5918              eptr++;              eptr++;
5919              }              }
5920            break;            break;
# Line 5725  for (;;) Line 5927  for (;;)
5927                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5928                break;                break;
5929                }                }
5930              if ((md->ctypes[*eptr] & ctype_space) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
5931              eptr++;              eptr++;
5932              }              }
5933            break;            break;
# Line 5738  for (;;) Line 5940  for (;;)
5940                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5941                break;                break;
5942                }                }
5943              if ((md->ctypes[*eptr] & ctype_space) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
5944              eptr++;              eptr++;
5945              }              }
5946            break;            break;
# Line 5751  for (;;) Line 5953  for (;;)
5953                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5954                break;                break;
5955                }                }
5956              if ((md->ctypes[*eptr] & ctype_word) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
5957              eptr++;              eptr++;
5958              }              }
5959            break;            break;
# Line 5764  for (;;) Line 5966  for (;;)
5966                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5967                break;                break;
5968                }                }
5969              if ((md->ctypes[*eptr] & ctype_word) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
5970              eptr++;              eptr++;
5971              }              }
5972            break;            break;
# Line 5827  switch (frame->Xwhere) Line 6029  switch (frame->Xwhere)
6029    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6030    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6031    LBL(65) LBL(66)    LBL(65) LBL(66)
6032  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6033    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(21)
6034    #endif
6035    #ifdef SUPPORT_UTF
6036      LBL(16) LBL(18) LBL(20)
6037      LBL(22) LBL(23) LBL(28) LBL(30)
6038    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6039  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6040    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6041    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62)
6042  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6043  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
6044    default:    default:
6045    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6046    
6047    printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6048    
6049    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6050    }    }
6051  #undef LBL  #undef LBL
# Line 5923  Returns:          > 0 => success; value Line 6132  Returns:          > 0 => success; value
6132                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
6133  */  */
6134    
6135    #ifdef COMPILE_PCRE8
6136  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6137  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6138    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6139    int offsetcount)    int offsetcount)
6140    #else
6141    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6142    pcre16_exec(const pcre *argument_re, const pcre16_extra *extra_data,
6143      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6144      int offsetcount)
6145    #endif
6146  {  {
6147  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
 int first_byte = -1;  
 int req_byte = -1;  
 int req_byte2 = -1;  
6148  int newline;  int newline;
6149  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
6150  BOOL anchored;  BOOL anchored;
6151  BOOL startline;  BOOL startline;
6152  BOOL firstline;  BOOL firstline;
6153  BOOL first_byte_caseless = FALSE;  BOOL utf;
6154  BOOL req_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
6155  BOOL utf8;  BOOL has_req_char = FALSE;
6156    pcre_uchar first_char = 0;
6157    pcre_uchar first_char2 = 0;
6158    pcre_uchar req_char = 0;
6159    pcre_uchar req_char2 = 0;
6160  match_data match_block;  match_data match_block;
6161  match_data *md = &match_block;  match_data *md = &match_block;
6162  const uschar *tables;  const pcre_uint8 *tables;
6163  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
6164  USPTR start_match = (USPTR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6165  USPTR end_subject;  PCRE_PUCHAR end_subject;
6166  USPTR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6167  USPTR req_byte_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6168    
 pcre_study_data internal_study;  
6169  const pcre_study_data *study;  const pcre_study_data *study;
   
 real_pcre internal_re;  
6170  const real_pcre *external_re = (const real_pcre *)argument_re;  const real_pcre *external_re = (const real_pcre *)argument_re;
6171  const real_pcre *re = external_re;  const real_pcre *re = external_re;
6172    
# Line 5969  follows immediately afterwards. Other va Line 6183  follows immediately afterwards. Other va
6183  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
6184  so they are set up later. */  so they are set up later. */
6185    
6186  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6187    utf = md->utf = (re->options & PCRE_UTF8) != 0;
6188  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6189                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6190    
6191  /* Check a UTF-8 string if required. Pass back the character offset and error  /* Check a UTF-8 string if required. Pass back the character offset and error
6192  code for an invalid string if a results vector is available. */  code for an invalid string if a results vector is available. */
6193    
6194  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6195  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6196    {    {
6197    int erroroffset;    int erroroffset;
6198    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6199    if (errorcode != 0)    if (errorcode != 0)
6200      {      {
6201      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 5988  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6203  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6203        offsets[0] = erroroffset;        offsets[0] = erroroffset;
6204        offsets[1] = errorcode;        offsets[1] = errorcode;
6205        }        }
6206    #ifdef COMPILE_PCRE16
6207        return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6208          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6209    #else
6210      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6211        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6212    #endif
6213      }      }
6214    
6215    /* Check that a start_offset points to the start of a UTF-8 character. */    /* Check that a start_offset points to the start of a UTF character. */
6216    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
6217        (((USPTR)subject)[start_offset] & 0xc0) == 0x80)        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6218      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
6219    }    }
6220  #endif  #endif
# Line 6012  if (extra_data != NULL Line 6232  if (extra_data != NULL
6232      && (extra_data->flags & PCRE_EXTRA_TABLES) == 0      && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
6233      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6234                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
6235    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,    return PRIV(jit_exec)(re, extra_data->executable_jit,
6236      start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)      (const pcre_uchar *)subject, length, start_offset, options,
6237        ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6238      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6239  #endif  #endif
6240    
6241  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
6242  numbers associated with a given name, for condition testing. */  numbers associated with a given name, for condition testing. */
6243    
6244  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (pcre_uchar *)re + re->name_table_offset;
6245  md->name_count = re->name_count;  md->name_count = re->name_count;
6246  md->name_entry_size = re->name_entry_size;  md->name_entry_size = re->name_entry_size;
6247    
# Line 6054  if (extra_data != NULL) Line 6275  if (extra_data != NULL)
6275  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6276  in other programs later. */  in other programs later. */
6277    
6278  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
6279    
6280  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
6281  test for a regex that was compiled on a host of opposite endianness. If this is  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6282  the case, flipped values are put in internal_re and internal_study if there was  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6283  study data too. */  means that the pattern is likely compiled with different endianness. */
6284    
6285  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
6286    {    return re->magic_number == REVERSED_MAGIC_NUMBER?
6287    re = _pcre_try_flipped(re, &internal_re, study, &internal_study);      PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6288    if (re == NULL) return PCRE_ERROR_BADMAGIC;  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
   if (study != NULL) study = &internal_study;  
   }  
6289    
6290  /* Set up other data */  /* Set up other data */
6291    
# Line 6076  firstline = (re->options & PCRE_FIRSTLIN Line 6295  firstline = (re->options & PCRE_FIRSTLIN
6295    
6296  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6297    
6298  md->start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)external_re + re->name_table_offset +
6299    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6300    
6301  md->start_subject = (USPTR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
6302  md->start_offset = start_offset;  md->start_offset = start_offset;
6303  md->end_subject = md->start_subject + length;  md->end_subject = md->start_subject + length;
6304  end_subject = md->end_subject;  end_subject = md->end_subject;
# Line 6104  md->recursive = NULL; Line 6323  md->recursive = NULL;
6323  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6324    
6325  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6326    md->fcc = tables + fcc_offset;
6327  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
6328    
6329  /* Handle different \R options. */  /* Handle different \R options. */
# Line 6190  arg_offset_max = (2*ocount)/3; Line 6410  arg_offset_max = (2*ocount)/3;
6410  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6411    {    {
6412    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
6413    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6414    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6415    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
6416    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
# Line 6217  if (md->offset_vector != NULL) Line 6437  if (md->offset_vector != NULL)
6437    md->offset_vector[0] = md->offset_vector[1] = -1;    md->offset_vector[0] = md->offset_vector[1] = -1;
6438    }    }
6439    
6440  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_char value is
6441  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
6442  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
6443  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
# Line 6227  if (!anchored) Line 6447  if (!anchored)
6447    {    {
6448    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6449      {      {
6450      first_byte = re->first_byte & 255;      has_first_char = TRUE;
6451      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = re->first_char;
6452        first_byte = md->lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6453          {
6454          first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6455    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6456          if (utf && first_char > 127)
6457            first_char2 = UCD_OTHERCASE(first_char);
6458    #endif
6459          }
6460      }      }
6461    else    else
6462      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 6242  character" set. */ Line 6469  character" set. */
6469    
6470  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6471    {    {
6472    req_byte = re->req_byte & 255;    has_req_char = TRUE;
6473    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = re->req_char;
6474    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6475        {
6476        req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6477    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6478        if (utf && req_char > 127)
6479          req_char2 = UCD_OTHERCASE(req_char);
6480    #endif
6481        }
6482    }    }
6483    
6484    
   
   
6485  /* ==========================================================================*/  /* ==========================================================================*/
6486    
6487  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6257  the loop runs just once. */ Line 6489  the loop runs just once. */
6489    
6490  for(;;)  for(;;)
6491    {    {
6492    USPTR save_end_subject = end_subject;    PCRE_PUCHAR save_end_subject = end_subject;
6493    USPTR new_start_match;    PCRE_PUCHAR new_start_match;
6494    
6495    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6496    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
# Line 6268  for(;;) Line 6500  for(;;)
6500    
6501    if (firstline)    if (firstline)
6502      {      {
6503      USPTR t = start_match;      PCRE_PUCHAR t = start_match;
6504  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6505      if (utf8)      if (utf)
6506        {        {
6507        while (t < md->end_subject && !IS_NEWLINE(t))        while (t < md->end_subject && !IS_NEWLINE(t))
6508          {          {
6509          t++;          t++;
6510          while (t < end_subject && (*t & 0xc0) == 0x80) t++;          ACROSSCHAR(t < end_subject, *t, t++);
6511          }          }
6512        }        }
6513      else      else
# Line 6292  for(;;) Line 6524  for(;;)
6524    
6525    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6526      {      {
6527      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first char if there is one. */
6528    
6529      if (first_byte >= 0)      if (has_first_char)
6530        {        {
6531        if (first_byte_caseless)        if (first_char != first_char2)
6532          while (start_match < end_subject && md->lcc[*start_match] != first_byte)          while (start_match < end_subject &&
6533                *start_match != first_char && *start_match != first_char2)
6534            start_match++;            start_match++;
6535        else        else
6536          while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && *start_match != first_char)
6537            start_match++;            start_match++;
6538        }        }
6539    
# Line 6310  for(;;) Line 6543  for(;;)
6543        {        {
6544        if (start_match > md->start_subject + start_offset)        if (start_match > md->start_subject + start_offset)
6545          {          {
6546  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6547          if (utf8)          if (utf)
6548            {            {
6549            while (start_match < end_subject && !WAS_NEWLINE(start_match))            while (start_match < end_subject && !WAS_NEWLINE(start_match))
6550              {              {
6551              start_match++;              start_match++;
6552              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              ACROSSCHAR(start_match < end_subject, *start_match,
6553                start_match++;                start_match++);
6554              }              }
6555            }            }
6556          else          else
# Line 6344  for(;;) Line 6577  for(;;)
6577        while (start_match < end_subject)        while (start_match < end_subject)
6578          {          {
6579          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6580    #ifndef COMPILE_PCRE8
6581            if (c > 255) c = 255;
6582    #endif
6583          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6584            {            {
6585            start_match++;            start_match++;
6586  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6587            if (utf8)            /* In non 8-bit mode, the iteration will stop for
6588              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)            characters > 255 at the beginning or not stop at all. */
6589                start_match++;            if (utf)
6590                ACROSSCHAR(start_match < end_subject, *start_match,
6591                  start_match++);
6592  #endif  #endif
6593            }            }
6594          else break;          else break;
# Line 6379  for(;;) Line 6617  for(;;)
6617        break;        break;
6618        }        }
6619    
6620      /* If req_byte is set, we know that that character must appear in the      /* If req_char is set, we know that that character must appear in the
6621      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_char
6622      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
6623      This optimization can save a huge amount of backtracking in patterns with      This optimization can save a huge amount of backtracking in patterns with
6624      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
# Line 6393  for(;;) Line 6631  for(;;)
6631      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
6632      long. */      long. */
6633    
6634      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6635        {        {
6636        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);        register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6637    
6638        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
6639        place we found it at last time. */        place we found it at last time. */
6640    
6641        if (p > req_byte_ptr)        if (p > req_char_ptr)
6642          {          {
6643          if (req_byte_caseless)          if (req_char != req_char2)
6644            {            {
6645            while (p < end_subject)            while (p < end_subject)
6646              {              {
6647              register int pp = *p++;              register int pp = *p++;
6648              if (pp == req_byte || pp == req_byte2) { p--; break; }              if (pp == req_char || pp == req_char2) { p--; break; }
6649              }              }
6650            }            }
6651          else          else
6652            {            {
6653            while (p < end_subject)            while (p < end_subject)
6654              {              {
6655              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_char) { p--; break; }
6656              }              }
6657            }            }
6658    
# Line 6431  for(;;) Line 6669  for(;;)
6669          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
6670          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
6671    
6672          req_byte_ptr = p;          req_char_ptr = p;
6673          }          }
6674        }        }
6675      }      }
# Line 6456  for(;;) Line 6694  for(;;)
6694    switch(rc)    switch(rc)
6695      {      {
6696      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6697      the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP      the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6698      entirely. The only way we can do that is to re-do the match at the same      entirely. The only way we can do that is to re-do the match at the same
6699      point, with a flag to force SKIP with an argument to be ignored. Just      point, with a flag to force SKIP with an argument to be ignored. Just
6700      treating this case as NOMATCH does not work because it does not check other      treating this case as NOMATCH does not work because it does not check other
6701      alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */      alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6702    
6703      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6704      new_start_match = start_match;      new_start_match = start_match;
6705      md->ignore_skip_arg = TRUE;      md->ignore_skip_arg = TRUE;
6706      break;      break;
6707    
6708      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is the
6709      same as the match we have just done, treat it as NOMATCH. */      same as the match we have just done, treat it as NOMATCH. */
# Line 6486  for(;;) Line 6724  for(;;)
6724      case MATCH_THEN:      case MATCH_THEN:
6725      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = FALSE;
6726      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6727  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6728      if (utf8)      if (utf)
6729        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)        ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6730          new_start_match++;          new_start_match++);
6731  #endif  #endif
6732      break;      break;
6733    
# Line 6527  for(;;) Line 6765  for(;;)
6765    
6766    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
6767    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
6768    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. In
6769      normal matching start_match will aways be greater than the first position at
6770      this stage, but a failed *SKIP can cause a return at the same point, which is
6771      why the first test exists. */
6772    
6773    if (start_match[-1] == CHAR_CR &&    if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6774          start_match[-1] == CHAR_CR &&
6775        start_match < end_subject &&        start_match < end_subject &&
6776        *start_match == CHAR_NL &&        *start_match == CHAR_NL &&
6777        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
# Line 6575  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6817  if (rc == MATCH_MATCH || rc == MATCH_ACC
6817        }        }
6818      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6819      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6820      (pcre_free)(md->offset_vector);      (PUBL(free))(md->offset_vector);
6821      }      }
6822    
6823    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
# Line 6614  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6856  if (rc == MATCH_MATCH || rc == MATCH_ACC
6856      }      }
6857    
6858    /* Return MARK data if requested */    /* Return MARK data if requested */
6859    
6860    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6861      *(extra_data->mark) = (unsigned char *)(md->mark);      *(extra_data->mark) = (pcre_uchar *)md->mark;
6862    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
6863    return rc;    return rc;
6864    }    }
# Line 6627  attempt has failed at all permitted star Line 6869  attempt has failed at all permitted star
6869  if (using_temporary_offsets)  if (using_temporary_offsets)
6870    {    {
6871    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
6872    (pcre_free)(md->offset_vector);    (PUBL(free))(md->offset_vector);
6873    }    }
6874    
6875  /* For anything other than nomatch or partial match, just return the code. */  /* For anything other than nomatch or partial match, just return the code. */
# Line 6646  if (start_partial != NULL) Line 6888  if (start_partial != NULL)
6888    md->mark = NULL;    md->mark = NULL;
6889    if (offsetcount > 1)    if (offsetcount > 1)
6890      {      {
6891      offsets[0] = (int)(start_partial - (USPTR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
6892      offsets[1] = (int)(end_subject - (USPTR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
6893      }      }
6894    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6895    }    }
# Line 6663  else Line 6905  else
6905  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
6906    
6907  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6908    *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);    *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
6909  return rc;  return rc;
6910  }  }
6911    

Legend:
Removed from v.778  
changed lines
  Added in v.850

  ViewVC Help
Powered by ViewVC 1.1.5