/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 778 by ph10, Thu Dec 1 17:38:47 2011 UTC revision 888 by ph10, Tue Jan 17 14:43:23 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 113  Returns:     nothing Line 113  Returns:     nothing
113  */  */
114    
115  static void  static void
116  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
117  {  {
118  unsigned int c;  unsigned int c;
119  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 144  Returns:      < 0 if not matched, otherw Line 144  Returns:      < 0 if not matched, otherw
144  */  */
145    
146  static int  static int
147  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
148    BOOL caseless)    BOOL caseless)
149  {  {
150  USPTR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
151  register USPTR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
154  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 173  ASCII characters. */ Line 173  ASCII characters. */
173    
174  if (caseless)  if (caseless)
175    {    {
176  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
177  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
178    if (md->utf8)    if (md->utf)
179      {      {
180      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
181      bytes matched may differ, because there are some characters whose upper and      bytes matched may differ, because there are some characters whose upper and
# Line 185  if (caseless) Line 185  if (caseless)
185      the latter. It is important, therefore, to check the length along the      the latter. It is important, therefore, to check the length along the
186      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
187    
188      USPTR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
189      while (p < endptr)      while (p < endptr)
190        {        {
191        int c, d;        int c, d;
# Line 204  if (caseless) Line 204  if (caseless)
204      {      {
205      if (eptr + length > md->end_subject) return -1;      if (eptr + length > md->end_subject) return -1;
206      while (length-- > 0)      while (length-- > 0)
207        { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }        {
208          if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
209          p++;
210          eptr++;
211          }
212      }      }
213    }    }
214    
# Line 217  else Line 221  else
221    while (length-- > 0) if (*p++ != *eptr++) return -1;    while (length-- > 0) if (*p++ != *eptr++) return -1;
222    }    }
223    
224  return eptr - eptr_start;  return (int)(eptr - eptr_start);
225  }  }
226    
227    
# Line 307  argument of match(), which never changes Line 311  argument of match(), which never changes
311    
312  #define RMATCH(ra,rb,rc,rd,re,rw)\  #define RMATCH(ra,rb,rc,rd,re,rw)\
313    {\    {\
314    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
315    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316    frame->Xwhere = rw; \    frame->Xwhere = rw; \
317    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
# Line 328  argument of match(), which never changes Line 332  argument of match(), which never changes
332    {\    {\
333    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
334    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
335    (pcre_stack_free)(oldframe);\    (PUBL(stack_free))(oldframe);\
336    if (frame != NULL)\    if (frame != NULL)\
337      {\      {\
338      rrc = ra;\      rrc = ra;\
# Line 345  typedef struct heapframe { Line 349  typedef struct heapframe {
349    
350    /* Function arguments that may change */    /* Function arguments that may change */
351    
352    USPTR Xeptr;    PCRE_PUCHAR Xeptr;
353    const uschar *Xecode;    const pcre_uchar *Xecode;
354    USPTR Xmstart;    PCRE_PUCHAR Xmstart;
355    int Xoffset_top;    int Xoffset_top;
356    eptrblock *Xeptrb;    eptrblock *Xeptrb;
357    unsigned int Xrdepth;    unsigned int Xrdepth;
358    
359    /* Function local variables */    /* Function local variables */
360    
361    USPTR Xcallpat;    PCRE_PUCHAR Xcallpat;
362  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
363    USPTR Xcharptr;    PCRE_PUCHAR Xcharptr;
364  #endif  #endif
365    USPTR Xdata;    PCRE_PUCHAR Xdata;
366    USPTR Xnext;    PCRE_PUCHAR Xnext;
367    USPTR Xpp;    PCRE_PUCHAR Xpp;
368    USPTR Xprev;    PCRE_PUCHAR Xprev;
369    USPTR Xsaved_eptr;    PCRE_PUCHAR Xsaved_eptr;
370    
371    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
372    
# Line 375  typedef struct heapframe { Line 379  typedef struct heapframe {
379    int Xprop_value;    int Xprop_value;
380    int Xprop_fail_result;    int Xprop_fail_result;
381    int Xoclength;    int Xoclength;
382    uschar Xocchars[8];    pcre_uchar Xocchars[6];
383  #endif  #endif
384    
385    int Xcodelink;    int Xcodelink;
# Line 440  the subject. */ Line 444  the subject. */
444    
445    
446  /* Performance note: It might be tempting to extract commonly used fields from  /* Performance note: It might be tempting to extract commonly used fields from
447  the md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf, end_subject) into individual variables to improve
448  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
449  made performance worse.  made performance worse.
450    
# Line 463  Returns:       MATCH_MATCH if matched Line 467  Returns:       MATCH_MATCH if matched
467  */  */
468    
469  static int  static int
470  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
471    int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
472      unsigned int rdepth)
473  {  {
474  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
475  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 473  so they can be ordinary variables in all Line 478  so they can be ordinary variables in all
478  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
479  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
480  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
481  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
482    
483  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
484  BOOL caseless;  BOOL caseless;
# Line 485  heap storage. Set up the top-level frame Line 490  heap storage. Set up the top-level frame
490  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
491    
492  #ifdef NO_RECURSE  #ifdef NO_RECURSE
493  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));
494  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
495  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
496    
# Line 513  HEAP_RECURSE: Line 518  HEAP_RECURSE:
518    
519  /* Ditto for the local variables */  /* Ditto for the local variables */
520    
521  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
522  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
523  #endif  #endif
524  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 571  declarations can be cut out in a block. Line 576  declarations can be cut out in a block.
576  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
577  to RMATCH(). */  to RMATCH(). */
578    
579  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
580  const uschar *charptr;  const pcre_uchar *charptr;
581  #endif  #endif
582  const uschar *callpat;  const pcre_uchar *callpat;
583  const uschar *data;  const pcre_uchar *data;
584  const uschar *next;  const pcre_uchar *next;
585  USPTR         pp;  PCRE_PUCHAR       pp;
586  const uschar *prev;  const pcre_uchar *prev;
587  USPTR         saved_eptr;  PCRE_PUCHAR       saved_eptr;
588    
589  recursion_info new_recursive;  recursion_info new_recursive;
590    
# Line 592  int prop_type; Line 597  int prop_type;
597  int prop_value;  int prop_value;
598  int prop_fail_result;  int prop_fail_result;
599  int oclength;  int oclength;
600  uschar occhars[8];  pcre_uchar occhars[6];
601  #endif  #endif
602    
603  int codelink;  int codelink;
# Line 620  the alternative names that are used. */ Line 625  the alternative names that are used. */
625  #define code_offset   codelink  #define code_offset   codelink
626  #define condassert    condition  #define condassert    condition
627  #define matched_once  prev_is_word  #define matched_once  prev_is_word
628    #define foc           number
629    #define save_mark     data
630    
631  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
632  variables. */  variables. */
# Line 645  defined). However, RMATCH isn't like a f Line 652  defined). However, RMATCH isn't like a f
652  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
653  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
654    
655  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
656  utf8 = md->utf8;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
657  #else  #else
658  utf8 = FALSE;  utf = FALSE;
659  #endif  #endif
660    
661  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
# Line 689  for (;;) Line 696  for (;;)
696      case OP_MARK:      case OP_MARK:
697      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
698      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
699      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
700        eptrb, RM55);        eptrb, RM55);
701      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
702           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 702  for (;;) Line 709  for (;;)
709      unaltered. */      unaltered. */
710    
711      else if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
712          strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0)          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
713        {        {
714        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
715        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
# Line 715  for (;;) Line 722  for (;;)
722      /* COMMIT overrides PRUNE, SKIP, and THEN */      /* COMMIT overrides PRUNE, SKIP, and THEN */
723    
724      case OP_COMMIT:      case OP_COMMIT:
725      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
726        eptrb, RM52);        eptrb, RM52);
727      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
728          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&          rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
# Line 726  for (;;) Line 733  for (;;)
733      /* PRUNE overrides THEN */      /* PRUNE overrides THEN */
734    
735      case OP_PRUNE:      case OP_PRUNE:
736      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
737        eptrb, RM51);        eptrb, RM51);
738      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
739      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 734  for (;;) Line 741  for (;;)
741      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
742      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
743      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
744      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
745        eptrb, RM56);        eptrb, RM56);
746      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
747           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 744  for (;;) Line 751  for (;;)
751      /* SKIP overrides PRUNE and THEN */      /* SKIP overrides PRUNE and THEN */
752    
753      case OP_SKIP:      case OP_SKIP:
754      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
755        eptrb, RM53);        eptrb, RM53);
756      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
757        RRETURN(rrc);        RRETURN(rrc);
# Line 752  for (;;) Line 759  for (;;)
759      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
760    
761      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
762      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. There is a flag that disables this opcode when re-matching a
763      pattern that ended with a SKIP for which there was not a matching MARK. */      pattern that ended with a SKIP for which there was not a matching MARK. */
764    
765      case OP_SKIP_ARG:      case OP_SKIP_ARG:
766      if (md->ignore_skip_arg)      if (md->ignore_skip_arg)
767        {        {
768        ecode += _pcre_OP_lengths[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
769        break;        break;
770        }        }
771      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
772        eptrb, RM57);        eptrb, RM57);
773      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
774        RRETURN(rrc);        RRETURN(rrc);
775    
776      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
777      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
778      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
779      with the md->ignore_skip_arg flag set. */      with the md->ignore_skip_arg flag set. */
780    
781      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
# Line 779  for (;;) Line 786  for (;;)
786      match pointer to do this. */      match pointer to do this. */
787    
788      case OP_THEN:      case OP_THEN:
789      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
790        eptrb, RM54);        eptrb, RM54);
791      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
792      md->start_match_ptr = ecode;      md->start_match_ptr = ecode;
# Line 788  for (;;) Line 795  for (;;)
795      case OP_THEN_ARG:      case OP_THEN_ARG:
796      md->nomatch_mark = ecode + 2;      md->nomatch_mark = ecode + 2;
797      md->mark = NULL;    /* In case previously set by assertion */      md->mark = NULL;    /* In case previously set by assertion */
798      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
799        md, eptrb, RM58);        md, eptrb, RM58);
800      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
801           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
# Line 812  for (;;) Line 819  for (;;)
819      case OP_ONCE_NC:      case OP_ONCE_NC:
820      prev = ecode;      prev = ecode;
821      saved_eptr = eptr;      saved_eptr = eptr;
822        save_mark = md->mark;
823      do      do
824        {        {
825        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 830  for (;;) Line 838  for (;;)
838    
839        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
840        ecode += GET(ecode,1);        ecode += GET(ecode,1);
841          md->mark = save_mark;
842        }        }
843      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
844    
# Line 909  for (;;) Line 918  for (;;)
918        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
919        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
920        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
921          save_mark = md->mark;
922    
923        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
924        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 917  for (;;) Line 927  for (;;)
927        for (;;)        for (;;)
928          {          {
929          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
930          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
931            eptrb, RM1);            eptrb, RM1);
932          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */          if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
933    
# Line 945  for (;;) Line 955  for (;;)
955          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
956          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
957          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
958            md->mark = save_mark;
959          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
960          }          }
961    
# Line 1004  for (;;) Line 1015  for (;;)
1015    
1016        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)        else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1017          {          {
1018          ecode += _pcre_OP_lengths[*ecode];          ecode += PRIV(OP_lengths)[*ecode];
1019          goto TAIL_RECURSE;          goto TAIL_RECURSE;
1020          }          }
1021    
1022        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1023    
1024        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        save_mark = md->mark;
1025          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1026          RM2);          RM2);
1027    
1028        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1029        THEN. */        THEN. */
1030    
# Line 1028  for (;;) Line 1040  for (;;)
1040          {          {
1041          if (rrc == MATCH_ONCE)          if (rrc == MATCH_ONCE)
1042            {            {
1043            const uschar *scode = ecode;            const pcre_uchar *scode = ecode;
1044            if (*scode != OP_ONCE)           /* If not at start, find it */            if (*scode != OP_ONCE)           /* If not at start, find it */
1045              {              {
1046              while (*scode == OP_ALT) scode += GET(scode, 1);              while (*scode == OP_ALT) scode += GET(scode, 1);
# Line 1039  for (;;) Line 1051  for (;;)
1051          RRETURN(rrc);          RRETURN(rrc);
1052          }          }
1053        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1054          md->mark = save_mark;
1055        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1056        }        }
1057    
# Line 1070  for (;;) Line 1083  for (;;)
1083      if (offset < md->offset_max)      if (offset < md->offset_max)
1084        {        {
1085        matched_once = FALSE;        matched_once = FALSE;
1086        code_offset = ecode - md->start_code;        code_offset = (int)(ecode - md->start_code);
1087    
1088        save_offset1 = md->offset_vector[offset];        save_offset1 = md->offset_vector[offset];
1089        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
# Line 1093  for (;;) Line 1106  for (;;)
1106          md->offset_vector[md->offset_end - number] =          md->offset_vector[md->offset_end - number] =
1107            (int)(eptr - md->start_subject);            (int)(eptr - md->start_subject);
1108          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;          if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1109          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1110            eptrb, RM63);            eptrb, RM63);
1111          if (rrc == MATCH_KETRPOS)          if (rrc == MATCH_KETRPOS)
1112            {            {
# Line 1160  for (;;) Line 1173  for (;;)
1173    
1174      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1175      matched_once = FALSE;      matched_once = FALSE;
1176      code_offset = ecode - md->start_code;      code_offset = (int)(ecode - md->start_code);
1177    
1178      for (;;)      for (;;)
1179        {        {
1180        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1181        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1182          eptrb, RM48);          eptrb, RM48);
1183        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1184          {          {
# Line 1215  for (;;) Line 1228  for (;;)
1228    
1229      if (ecode[LINK_SIZE+1] == OP_CALLOUT)      if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1230        {        {
1231        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
1232          {          {
1233          pcre_callout_block cb;          PUBL(callout_block) cb;
1234          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1235          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1236          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1237    #ifdef COMPILE_PCRE8
1238          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1239    #else
1240            cb.subject          = (PCRE_SPTR16)md->start_subject;
1241    #endif
1242          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1243          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1244          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1231  for (;;) Line 1248  for (;;)
1248          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last;
1249          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1250          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1251          if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1252          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1253          }          }
1254        ecode += _pcre_OP_lengths[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1255        }        }
1256    
1257      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1260  for (;;) Line 1277  for (;;)
1277    
1278          if (!condition && condcode == OP_NRREF)          if (!condition && condcode == OP_NRREF)
1279            {            {
1280            uschar *slotA = md->name_table;            pcre_uchar *slotA = md->name_table;
1281            for (i = 0; i < md->name_count; i++)            for (i = 0; i < md->name_count; i++)
1282              {              {
1283              if (GET2(slotA, 0) == recno) break;              if (GET2(slotA, 0) == recno) break;
# Line 1273  for (;;) Line 1290  for (;;)
1290    
1291            if (i < md->name_count)            if (i < md->name_count)
1292              {              {
1293              uschar *slotB = slotA;              pcre_uchar *slotB = slotA;
1294              while (slotB > md->name_table)              while (slotB > md->name_table)
1295                {                {
1296                slotB -= md->name_entry_size;                slotB -= md->name_entry_size;
1297                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1298                  {                  {
1299                  condition = GET2(slotB, 0) == md->recursive->group_num;                  condition = GET2(slotB, 0) == md->recursive->group_num;
1300                  if (condition) break;                  if (condition) break;
# Line 1293  for (;;) Line 1310  for (;;)
1310                for (i++; i < md->name_count; i++)                for (i++; i < md->name_count; i++)
1311                  {                  {
1312                  slotB += md->name_entry_size;                  slotB += md->name_entry_size;
1313                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                  if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1314                    {                    {
1315                    condition = GET2(slotB, 0) == md->recursive->group_num;                    condition = GET2(slotB, 0) == md->recursive->group_num;
1316                    if (condition) break;                    if (condition) break;
# Line 1306  for (;;) Line 1323  for (;;)
1323    
1324          /* Chose branch according to the condition */          /* Chose branch according to the condition */
1325    
1326          ecode += condition? 3 : GET(ecode, 1);          ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1327          }          }
1328        }        }
1329    
# Line 1323  for (;;) Line 1340  for (;;)
1340        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1341          {          {
1342          int refno = offset >> 1;          int refno = offset >> 1;
1343          uschar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1344    
1345          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
1346            {            {
# Line 1337  for (;;) Line 1354  for (;;)
1354    
1355          if (i < md->name_count)          if (i < md->name_count)
1356            {            {
1357            uschar *slotB = slotA;            pcre_uchar *slotB = slotA;
1358            while (slotB > md->name_table)            while (slotB > md->name_table)
1359              {              {
1360              slotB -= md->name_entry_size;              slotB -= md->name_entry_size;
1361              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1362                {                {
1363                offset = GET2(slotB, 0) << 1;                offset = GET2(slotB, 0) << 1;
1364                condition = offset < offset_top &&                condition = offset < offset_top &&
# Line 1359  for (;;) Line 1376  for (;;)
1376              for (i++; i < md->name_count; i++)              for (i++; i < md->name_count; i++)
1377                {                {
1378                slotB += md->name_entry_size;                slotB += md->name_entry_size;
1379                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1380                  {                  {
1381                  offset = GET2(slotB, 0) << 1;                  offset = GET2(slotB, 0) << 1;
1382                  condition = offset < offset_top &&                  condition = offset < offset_top &&
# Line 1374  for (;;) Line 1391  for (;;)
1391    
1392        /* Chose branch according to the condition */        /* Chose branch according to the condition */
1393    
1394        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1395        }        }
1396    
1397      else if (condcode == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
# Line 1466  for (;;) Line 1483  for (;;)
1483        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1484        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1485        }        }
1486      ecode += 3;      ecode += 1 + IMM2_SIZE;
1487      break;      break;
1488    
1489    
# Line 1513  for (;;) Line 1530  for (;;)
1530    
1531      case OP_ASSERT:      case OP_ASSERT:
1532      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1533        save_mark = md->mark;
1534      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1535        {        {
1536        condassert = TRUE;        condassert = TRUE;
# Line 1534  for (;;) Line 1552  for (;;)
1552    
1553        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1554        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1555          md->mark = save_mark;
1556        }        }
1557      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1558    
# Line 1557  for (;;) Line 1576  for (;;)
1576    
1577      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1578      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1579        save_mark = md->mark;
1580      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1581        {        {
1582        condassert = TRUE;        condassert = TRUE;
# Line 1567  for (;;) Line 1587  for (;;)
1587      do      do
1588        {        {
1589        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1590          md->mark = save_mark;
1591        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1592        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1593          {          {
# Line 1593  for (;;) Line 1614  for (;;)
1614      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1615    
1616      case OP_REVERSE:      case OP_REVERSE:
1617  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1618      if (utf8)      if (utf)
1619        {        {
1620        i = GET(ecode, 1);        i = GET(ecode, 1);
1621        while (i-- > 0)        while (i-- > 0)
# Line 1625  for (;;) Line 1646  for (;;)
1646      function is able to force a failure. */      function is able to force a failure. */
1647    
1648      case OP_CALLOUT:      case OP_CALLOUT:
1649      if (pcre_callout != NULL)      if (PUBL(callout) != NULL)
1650        {        {
1651        pcre_callout_block cb;        PUBL(callout_block) cb;
1652        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1653        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1654        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1655    #ifdef COMPILE_PCRE8
1656        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1657    #else
1658          cb.subject          = (PCRE_SPTR16)md->start_subject;
1659    #endif
1660        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1661        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1662        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 1641  for (;;) Line 1666  for (;;)
1666        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1667        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1668        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1669        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1670        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1671        }        }
1672      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 1700  for (;;) Line 1725  for (;;)
1725        else        else
1726          {          {
1727          new_recursive.offset_save =          new_recursive.offset_save =
1728            (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));            (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1729          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);          if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1730          }          }
1731        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
# Line 1715  for (;;) Line 1740  for (;;)
1740        do        do
1741          {          {
1742          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;          if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1743          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1744            md, eptrb, RM6);            md, eptrb, RM6);
1745          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1746              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
# Line 1724  for (;;) Line 1749  for (;;)
1749            {            {
1750            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1751            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1752              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1753    
1754            /* Set where we got to in the subject, and reset the start in case            /* Set where we got to in the subject, and reset the start in case
1755            it was changed by \K. This *is* propagated back out of a recursion,            it was changed by \K. This *is* propagated back out of a recursion,
# Line 1742  for (;;) Line 1767  for (;;)
1767            {            {
1768            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1769            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1770              (pcre_free)(new_recursive.offset_save);              (PUBL(free))(new_recursive.offset_save);
1771            RRETURN(rrc);            RRETURN(rrc);
1772            }            }
1773    
# Line 1754  for (;;) Line 1779  for (;;)
1779        DPRINTF(("Recursion didn't match\n"));        DPRINTF(("Recursion didn't match\n"));
1780        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1781        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1782          (pcre_free)(new_recursive.offset_save);          (PUBL(free))(new_recursive.offset_save);
1783        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1784        }        }
1785    
# Line 2066  for (;;) Line 2091  for (;;)
2091        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
2092        partial matching. */        partial matching. */
2093    
2094  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2095        if (utf8)        if (utf)
2096          {          {
2097          /* Get status of previous character */          /* Get status of previous character */
2098    
2099          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
2100            {            {
2101            USPTR lastptr = eptr - 1;            PCRE_PUCHAR lastptr = eptr - 1;
2102            while((*lastptr & 0xc0) == 0x80) lastptr--;            BACKCHAR(lastptr);
2103            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2104            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
2105  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2139  for (;;) Line 2164  for (;;)
2164              }              }
2165            else            else
2166  #endif  #endif
2167            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = MAX_255(eptr[-1])
2168                && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2169            }            }
2170    
2171          /* Get status of next character */          /* Get status of next character */
# Line 2162  for (;;) Line 2188  for (;;)
2188            }            }
2189          else          else
2190  #endif  #endif
2191          cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          cur_is_word = MAX_255(*eptr)
2192              && ((md->ctypes[*eptr] & ctype_word) != 0);
2193          }          }
2194    
2195        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 2186  for (;;) Line 2213  for (;;)
2213        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2214        }        }
2215      eptr++;      eptr++;
2216      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  #ifdef SUPPORT_UTF
2217        if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2218    #endif
2219      ecode++;      ecode++;
2220      break;      break;
2221    
# Line 2211  for (;;) Line 2240  for (;;)
2240        }        }
2241      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2242      if (      if (
2243  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2244         c < 256 &&         c < 256 &&
2245  #endif  #endif
2246         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
# Line 2228  for (;;) Line 2257  for (;;)
2257        }        }
2258      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2259      if (      if (
2260  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2261         c >= 256 ||         c > 255 ||
2262  #endif  #endif
2263         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2264         )         )
# Line 2245  for (;;) Line 2274  for (;;)
2274        }        }
2275      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2276      if (      if (
2277  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2278         c < 256 &&         c < 256 &&
2279  #endif  #endif
2280         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
# Line 2262  for (;;) Line 2291  for (;;)
2291        }        }
2292      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2293      if (      if (
2294  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2295         c >= 256 ||         c > 255 ||
2296  #endif  #endif
2297         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2298         )         )
# Line 2279  for (;;) Line 2308  for (;;)
2308        }        }
2309      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2310      if (      if (
2311  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2312         c < 256 &&         c < 256 &&
2313  #endif  #endif
2314         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
# Line 2296  for (;;) Line 2325  for (;;)
2325        }        }
2326      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2327      if (      if (
2328  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2329         c >= 256 ||         c > 255 ||
2330  #endif  #endif
2331         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2332         )         )
# Line 2475  for (;;) Line 2504  for (;;)
2504          break;          break;
2505    
2506          case PT_GC:          case PT_GC:
2507          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2508            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2509          break;          break;
2510    
# Line 2492  for (;;) Line 2521  for (;;)
2521          /* These are specials */          /* These are specials */
2522    
2523          case PT_ALNUM:          case PT_ALNUM:
2524          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2525               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))               PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2526            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2527          break;          break;
2528    
2529          case PT_SPACE:    /* Perl space */          case PT_SPACE:    /* Perl space */
2530          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2531               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2532                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
2533            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2534          break;          break;
2535    
2536          case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
2537          if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2538               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2539               c == CHAR_FF || c == CHAR_CR)               c == CHAR_FF || c == CHAR_CR)
2540                 == (op == OP_NOTPROP))                 == (op == OP_NOTPROP))
# Line 2513  for (;;) Line 2542  for (;;)
2542          break;          break;
2543    
2544          case PT_WORD:          case PT_WORD:
2545          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2546               _pcre_ucp_gentype[prop->chartype] == ucp_N ||               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2547               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2548            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2549          break;          break;
# Line 2543  for (;;) Line 2572  for (;;)
2572      while (eptr < md->end_subject)      while (eptr < md->end_subject)
2573        {        {
2574        int len = 1;        int len = 1;
2575        if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2576        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2577        eptr += len;        eptr += len;
2578        }        }
# Line 2564  for (;;) Line 2593  for (;;)
2593      case OP_REFI:      case OP_REFI:
2594      caseless = op == OP_REFI;      caseless = op == OP_REFI;
2595      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2596      ecode += 3;      ecode += 1 + IMM2_SIZE;
2597    
2598      /* If the reference is unset, there are two possibilities:      /* If the reference is unset, there are two possibilities:
2599    
# Line 2604  for (;;) Line 2633  for (;;)
2633        case OP_CRMINRANGE:        case OP_CRMINRANGE:
2634        minimize = (*ecode == OP_CRMINRANGE);        minimize = (*ecode == OP_CRMINRANGE);
2635        min = GET2(ecode, 1);        min = GET2(ecode, 1);
2636        max = GET2(ecode, 3);        max = GET2(ecode, 1 + IMM2_SIZE);
2637        if (max == 0) max = INT_MAX;        if (max == 0) max = INT_MAX;
2638        ecode += 5;        ecode += 1 + 2 * IMM2_SIZE;
2639        break;        break;
2640    
2641        default:               /* No repeat follows */        default:               /* No repeat follows */
# Line 2620  for (;;) Line 2649  for (;;)
2649        }        }
2650    
2651      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2652      zero, just continue with the main loop. */      zero, just continue with the main loop. If the length is negative, it
2653        means the reference is unset in non-Java-compatible mode. If the minimum is
2654        zero, we can continue at the same level without recursion. For any other
2655        minimum, carrying on will result in NOMATCH. */
2656    
2657      if (length == 0) continue;      if (length == 0) continue;
2658        if (length < 0 && min == 0) continue;
2659    
2660      /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2661      the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
# Line 2703  for (;;) Line 2736  for (;;)
2736      case OP_NCLASS:      case OP_NCLASS:
2737      case OP_CLASS:      case OP_CLASS:
2738        {        {
2739          /* The data variable is saved across frames, so the byte map needs to
2740          be stored there. */
2741    #define BYTE_MAP ((pcre_uint8 *)data)
2742        data = ecode + 1;                /* Save for matching */        data = ecode + 1;                /* Save for matching */
2743        ecode += 33;                     /* Advance past the item */        ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2744    
2745        switch (*ecode)        switch (*ecode)
2746          {          {
# Line 2725  for (;;) Line 2761  for (;;)
2761          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2762          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2763          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2764          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2765          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2766          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2767          break;          break;
2768    
2769          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2737  for (;;) Line 2773  for (;;)
2773    
2774        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
2775    
2776  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2777        /* UTF-8 mode */        if (utf)
       if (utf8)  
2778          {          {
2779          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2780            {            {
# Line 2754  for (;;) Line 2789  for (;;)
2789              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2790              }              }
2791            else            else
2792              {              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
             }  
2793            }            }
2794          }          }
2795        else        else
2796  #endif  #endif
2797        /* Not UTF-8 mode */        /* Not UTF mode */
2798          {          {
2799          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2800            {            {
# Line 2771  for (;;) Line 2804  for (;;)
2804              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2805              }              }
2806            c = *eptr++;            c = *eptr++;
2807            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2808              if (c > 255)
2809                {
2810                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2811                }
2812              else
2813    #endif
2814                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2815            }            }
2816          }          }
2817    
# Line 2785  for (;;) Line 2825  for (;;)
2825    
2826        if (minimize)        if (minimize)
2827          {          {
2828  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2829          /* UTF-8 mode */          if (utf)
         if (utf8)  
2830            {            {
2831            for (fi = min;; fi++)            for (fi = min;; fi++)
2832              {              {
# Line 2805  for (;;) Line 2844  for (;;)
2844                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2845                }                }
2846              else              else
2847                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  
               }  
2848              }              }
2849            }            }
2850          else          else
2851  #endif  #endif
2852          /* Not UTF-8 mode */          /* Not UTF mode */
2853            {            {
2854            for (fi = min;; fi++)            for (fi = min;; fi++)
2855              {              {
# Line 2825  for (;;) Line 2862  for (;;)
2862                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2863                }                }
2864              c = *eptr++;              c = *eptr++;
2865              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);  #ifndef COMPILE_PCRE8
2866                if (c > 255)
2867                  {
2868                  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2869                  }
2870                else
2871    #endif
2872                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2873              }              }
2874            }            }
2875          /* Control never gets here */          /* Control never gets here */
# Line 2837  for (;;) Line 2881  for (;;)
2881          {          {
2882          pp = eptr;          pp = eptr;
2883    
2884  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2885          /* UTF-8 mode */          if (utf)
         if (utf8)  
2886            {            {
2887            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2888              {              {
# Line 2855  for (;;) Line 2898  for (;;)
2898                if (op == OP_CLASS) break;                if (op == OP_CLASS) break;
2899                }                }
2900              else              else
2901                {                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
               if ((data[c/8] & (1 << (c&7))) == 0) break;  
               }  
2902              eptr += len;              eptr += len;
2903              }              }
2904            for (;;)            for (;;)
# Line 2870  for (;;) Line 2911  for (;;)
2911            }            }
2912          else          else
2913  #endif  #endif
2914            /* Not UTF-8 mode */            /* Not UTF mode */
2915            {            {
2916            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2917              {              {
# Line 2880  for (;;) Line 2921  for (;;)
2921                break;                break;
2922                }                }
2923              c = *eptr;              c = *eptr;
2924              if ((data[c/8] & (1 << (c&7))) == 0) break;  #ifndef COMPILE_PCRE8
2925                if (c > 255)
2926                  {
2927                  if (op == OP_CLASS) break;
2928                  }
2929                else
2930    #endif
2931                  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2932              eptr++;              eptr++;
2933              }              }
2934            while (eptr >= pp)            while (eptr >= pp)
# Line 2893  for (;;) Line 2941  for (;;)
2941    
2942          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2943          }          }
2944    #undef BYTE_MAP
2945        }        }
2946      /* Control never gets here */      /* Control never gets here */
2947    
# Line 2901  for (;;) Line 2950  for (;;)
2950      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2951      mode, because Unicode properties are supported in non-UTF-8 mode. */      mode, because Unicode properties are supported in non-UTF-8 mode. */
2952    
2953  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2954      case OP_XCLASS:      case OP_XCLASS:
2955        {        {
2956        data = ecode + 1 + LINK_SIZE;                /* Save for matching */        data = ecode + 1 + LINK_SIZE;                /* Save for matching */
# Line 2926  for (;;) Line 2975  for (;;)
2975          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2976          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2977          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2978          max = GET2(ecode, 3);          max = GET2(ecode, 1 + IMM2_SIZE);
2979          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
2980          ecode += 5;          ecode += 1 + 2 * IMM2_SIZE;
2981          break;          break;
2982    
2983          default:               /* No repeat follows */          default:               /* No repeat follows */
# Line 2946  for (;;) Line 2995  for (;;)
2995            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2996            }            }
2997          GETCHARINCTEST(c, eptr);          GETCHARINCTEST(c, eptr);
2998          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
2999          }          }
3000    
3001        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2970  for (;;) Line 3019  for (;;)
3019              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3020              }              }
3021            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3022            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3023            }            }
3024          /* Control never gets here */          /* Control never gets here */
3025          }          }
# Line 2988  for (;;) Line 3037  for (;;)
3037              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3038              break;              break;
3039              }              }
3040    #ifdef SUPPORT_UTF
3041            GETCHARLENTEST(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
3042            if (!_pcre_xclass(c, data)) break;  #else
3043              c = *eptr;
3044    #endif
3045              if (!PRIV(xclass)(c, data, utf)) break;
3046            eptr += len;            eptr += len;
3047            }            }
3048          for(;;)          for(;;)
# Line 2997  for (;;) Line 3050  for (;;)
3050            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3051            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3052            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3053            if (utf8) BACKCHAR(eptr);  #ifdef SUPPORT_UTF
3054              if (utf) BACKCHAR(eptr);
3055    #endif
3056            }            }
3057          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3058          }          }
# Line 3009  for (;;) Line 3064  for (;;)
3064      /* Match a single character, casefully */      /* Match a single character, casefully */
3065    
3066      case OP_CHAR:      case OP_CHAR:
3067  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3068      if (utf8)      if (utf)
3069        {        {
3070        length = 1;        length = 1;
3071        ecode++;        ecode++;
# Line 3024  for (;;) Line 3079  for (;;)
3079        }        }
3080      else      else
3081  #endif  #endif
3082        /* Not UTF mode */
     /* Non-UTF-8 mode */  
3083        {        {
3084        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
3085          {          {
# Line 3037  for (;;) Line 3091  for (;;)
3091        }        }
3092      break;      break;
3093    
3094      /* Match a single character, caselessly. If we are at the end of the      /* Match a single character, caselessly. If we are at the end of the
3095      subject, give up immediately. */      subject, give up immediately. */
3096    
3097      case OP_CHARI:      case OP_CHARI:
3098      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
3099        {        {
3100        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3101        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3102        }        }
3103    
3104  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3105      if (utf8)      if (utf)
3106        {        {
3107        length = 1;        length = 1;
3108        ecode++;        ecode++;
3109        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
3110    
3111        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
3112        we know that its other case must also be one byte long, so we can use the        we know that its other case must also be one byte long, so we can use the
3113        fast lookup table. We know that there is at least one byte left in the        fast lookup table. We know that there is at least one byte left in the
3114        subject. */        subject. */
3115    
3116        if (fc < 128)        if (fc < 128)
3117          {          {
3118          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc]
3119                != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3120            ecode++;
3121            eptr++;
3122          }          }
3123    
3124        /* Otherwise we must pick up the subject character. Note that we cannot        /* Otherwise we must pick up the subject character. Note that we cannot
# Line 3087  for (;;) Line 3144  for (;;)
3144          }          }
3145        }        }
3146      else      else
3147  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3148    
3149      /* Non-UTF-8 mode */      /* Not UTF mode */
3150        {        {
3151        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3152              != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3153          eptr++;
3154        ecode += 2;        ecode += 2;
3155        }        }
3156      break;      break;
# Line 3101  for (;;) Line 3160  for (;;)
3160      case OP_EXACT:      case OP_EXACT:
3161      case OP_EXACTI:      case OP_EXACTI:
3162      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3163      ecode += 3;      ecode += 1 + IMM2_SIZE;
3164      goto REPEATCHAR;      goto REPEATCHAR;
3165    
3166      case OP_POSUPTO:      case OP_POSUPTO:
# Line 3116  for (;;) Line 3175  for (;;)
3175      min = 0;      min = 0;
3176      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3177      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3178      ecode += 3;      ecode += 1 + IMM2_SIZE;
3179      goto REPEATCHAR;      goto REPEATCHAR;
3180    
3181      case OP_POSSTAR:      case OP_POSSTAR:
# Line 3164  for (;;) Line 3223  for (;;)
3223      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3224    
3225      REPEATCHAR:      REPEATCHAR:
3226  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3227      if (utf8)      if (utf)
3228        {        {
3229        length = 1;        length = 1;
3230        charptr = ecode;        charptr = ecode;
# Line 3181  for (;;) Line 3240  for (;;)
3240          unsigned int othercase;          unsigned int othercase;
3241          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3242              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3243            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
3244          else oclength = 0;          else oclength = 0;
3245  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3246    
3247          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3248            {            {
3249            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3250              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3251  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3252            else if (oclength > 0 &&            else if (oclength > 0 &&
3253                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
3254                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3255  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3256            else            else
3257              {              {
# Line 3211  for (;;) Line 3270  for (;;)
3270              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3271              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3272              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3273                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3274  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3275              else if (oclength > 0 &&              else if (oclength > 0 &&
3276                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3277                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3278  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3279              else              else
3280                {                {
# Line 3232  for (;;) Line 3291  for (;;)
3291            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3292              {              {
3293              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3294                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3295  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3296              else if (oclength > 0 &&              else if (oclength > 0 &&
3297                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
3298                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;                       memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3299  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
3300              else              else
3301                {                {
# Line 3268  for (;;) Line 3327  for (;;)
3327        value of fc will always be < 128. */        value of fc will always be < 128. */
3328        }        }
3329      else      else
3330  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3331          /* When not in UTF-8 mode, load a single-byte character. */
3332      /* When not in UTF-8 mode, load a single-byte character. */        fc = *ecode++;
3333    
3334      fc = *ecode++;      /* The value of fc at this point is always one character, though we may
3335        or may not be in UTF mode. The code is duplicated for the caseless and
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3336      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3337      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3338      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3288  for (;;) Line 3345  for (;;)
3345    
3346      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3347        {        {
3348        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3349          /* fc must be < 128 if UTF is enabled. */
3350          foc = md->fcc[fc];
3351    #else
3352    #ifdef SUPPORT_UTF
3353    #ifdef SUPPORT_UCP
3354          if (utf && fc > 127)
3355            foc = UCD_OTHERCASE(fc);
3356    #else
3357          if (utf && fc > 127)
3358            foc = fc;
3359    #endif /* SUPPORT_UCP */
3360          else
3361    #endif /* SUPPORT_UTF */
3362            foc = TABLE_GET(fc, md->fcc, fc);
3363    #endif /* COMPILE_PCRE8 */
3364    
3365        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3366          {          {
3367          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
# Line 3296  for (;;) Line 3369  for (;;)
3369            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3370            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3371            }            }
3372          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3373            eptr++;
3374          }          }
3375        if (min == max) continue;        if (min == max) continue;
3376        if (minimize)        if (minimize)
# Line 3311  for (;;) Line 3385  for (;;)
3385              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3386              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3387              }              }
3388            if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3389              eptr++;
3390            }            }
3391          /* Control never gets here */          /* Control never gets here */
3392          }          }
# Line 3325  for (;;) Line 3400  for (;;)
3400              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3401              break;              break;
3402              }              }
3403            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3404            eptr++;            eptr++;
3405            }            }
3406    
# Line 3414  for (;;) Line 3489  for (;;)
3489      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3490      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3491        {        {
3492  #ifdef SUPPORT_UTF8        register int ch, och;
3493        if (c < 256)        ch = *ecode++;
3494  #endif  #ifdef COMPILE_PCRE8
3495        c = md->lcc[c];        /* ch must be < 128 if UTF is enabled. */
3496        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        och = md->fcc[ch];
3497    #else
3498    #ifdef SUPPORT_UTF
3499    #ifdef SUPPORT_UCP
3500          if (utf && ch > 127)
3501            och = UCD_OTHERCASE(ch);
3502    #else
3503          if (utf && ch > 127)
3504            och = ch;
3505    #endif /* SUPPORT_UCP */
3506          else
3507    #endif /* SUPPORT_UTF */
3508            och = TABLE_GET(ch, md->fcc, ch);
3509    #endif /* COMPILE_PCRE8 */
3510          if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3511        }        }
3512      else    /* Caseful */      else    /* Caseful */
3513        {        {
# Line 3436  for (;;) Line 3525  for (;;)
3525      case OP_NOTEXACT:      case OP_NOTEXACT:
3526      case OP_NOTEXACTI:      case OP_NOTEXACTI:
3527      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3528      ecode += 3;      ecode += 1 + IMM2_SIZE;
3529      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3530    
3531      case OP_NOTUPTO:      case OP_NOTUPTO:
# Line 3446  for (;;) Line 3535  for (;;)
3535      min = 0;      min = 0;
3536      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3537      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3538      ecode += 3;      ecode += 1 + IMM2_SIZE;
3539      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3540    
3541      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
# Line 3478  for (;;) Line 3567  for (;;)
3567      possessive = TRUE;      possessive = TRUE;
3568      min = 0;      min = 0;
3569      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3570      ecode += 3;      ecode += 1 + IMM2_SIZE;
3571      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3572    
3573      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 3517  for (;;) Line 3606  for (;;)
3606    
3607      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3608        {        {
3609        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3610          /* fc must be < 128 if UTF is enabled. */
3611          foc = md->fcc[fc];
3612    #else
3613    #ifdef SUPPORT_UTF
3614    #ifdef SUPPORT_UCP
3615          if (utf && fc > 127)
3616            foc = UCD_OTHERCASE(fc);
3617    #else
3618          if (utf && fc > 127)
3619            foc = fc;
3620    #endif /* SUPPORT_UCP */
3621          else
3622    #endif /* SUPPORT_UTF */
3623            foc = TABLE_GET(fc, md->fcc, fc);
3624    #endif /* COMPILE_PCRE8 */
3625    
3626  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3627        /* UTF-8 mode */        if (utf)
       if (utf8)  
3628          {          {
3629          register unsigned int d;          register unsigned int d;
3630          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3532  for (;;) Line 3635  for (;;)
3635              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3636              }              }
3637            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3638            if (d < 256) d = md->lcc[d];            if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);
           if (fc == d) RRETURN(MATCH_NOMATCH);  
3639            }            }
3640          }          }
3641        else        else
3642  #endif  #endif
3643          /* Not UTF mode */
       /* Not UTF-8 mode */  
3644          {          {
3645          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3646            {            {
# Line 3548  for (;;) Line 3649  for (;;)
3649              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3650              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3651              }              }
3652            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3653              eptr++;
3654            }            }
3655          }          }
3656    
# Line 3556  for (;;) Line 3658  for (;;)
3658    
3659        if (minimize)        if (minimize)
3660          {          {
3661  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3662          /* UTF-8 mode */          if (utf)
         if (utf8)  
3663            {            {
3664            register unsigned int d;            register unsigned int d;
3665            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3572  for (;;) Line 3673  for (;;)
3673                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3674                }                }
3675              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3676              if (d < 256) d = md->lcc[d];              if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);
             if (fc == d) RRETURN(MATCH_NOMATCH);  
3677              }              }
3678            }            }
3679          else          else
3680  #endif  #endif
3681          /* Not UTF-8 mode */          /* Not UTF mode */
3682            {            {
3683            for (fi = min;; fi++)            for (fi = min;; fi++)
3684              {              {
# Line 3590  for (;;) Line 3690  for (;;)
3690                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3691                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3692                }                }
3693              if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);              if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3694                eptr++;
3695              }              }
3696            }            }
3697          /* Control never gets here */          /* Control never gets here */
# Line 3602  for (;;) Line 3703  for (;;)
3703          {          {
3704          pp = eptr;          pp = eptr;
3705    
3706  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3707          /* UTF-8 mode */          if (utf)
         if (utf8)  
3708            {            {
3709            register unsigned int d;            register unsigned int d;
3710            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3616  for (;;) Line 3716  for (;;)
3716                break;                break;
3717                }                }
3718              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3719              if (d < 256) d = md->lcc[d];              if (fc == d || foc == d) break;
             if (fc == d) break;  
3720              eptr += len;              eptr += len;
3721              }              }
3722          if (possessive) continue;          if (possessive) continue;
# Line 3631  for (;;) Line 3730  for (;;)
3730            }            }
3731          else          else
3732  #endif  #endif
3733          /* Not UTF-8 mode */          /* Not UTF mode */
3734            {            {
3735            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3736              {              {
# Line 3640  for (;;) Line 3739  for (;;)
3739                SCHECK_PARTIAL();                SCHECK_PARTIAL();
3740                break;                break;
3741                }                }
3742              if (fc == md->lcc[*eptr]) break;              if (fc == *eptr || foc == *eptr) break;
3743              eptr++;              eptr++;
3744              }              }
3745            if (possessive) continue;            if (possessive) continue;
# Line 3661  for (;;) Line 3760  for (;;)
3760    
3761      else      else
3762        {        {
3763  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3764        /* UTF-8 mode */        if (utf)
       if (utf8)  
3765          {          {
3766          register unsigned int d;          register unsigned int d;
3767          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3679  for (;;) Line 3777  for (;;)
3777          }          }
3778        else        else
3779  #endif  #endif
3780        /* Not UTF-8 mode */        /* Not UTF mode */
3781          {          {
3782          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3783            {            {
# Line 3696  for (;;) Line 3794  for (;;)
3794    
3795        if (minimize)        if (minimize)
3796          {          {
3797  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3798          /* UTF-8 mode */          if (utf)
         if (utf8)  
3799            {            {
3800            register unsigned int d;            register unsigned int d;
3801            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3717  for (;;) Line 3814  for (;;)
3814            }            }
3815          else          else
3816  #endif  #endif
3817          /* Not UTF-8 mode */          /* Not UTF mode */
3818            {            {
3819            for (fi = min;; fi++)            for (fi = min;; fi++)
3820              {              {
# Line 3741  for (;;) Line 3838  for (;;)
3838          {          {
3839          pp = eptr;          pp = eptr;
3840    
3841  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3842          /* UTF-8 mode */          if (utf)
         if (utf8)  
3843            {            {
3844            register unsigned int d;            register unsigned int d;
3845            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3769  for (;;) Line 3865  for (;;)
3865            }            }
3866          else          else
3867  #endif  #endif
3868          /* Not UTF-8 mode */          /* Not UTF mode */
3869            {            {
3870            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3871              {              {
# Line 3802  for (;;) Line 3898  for (;;)
3898      case OP_TYPEEXACT:      case OP_TYPEEXACT:
3899      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3900      minimize = TRUE;      minimize = TRUE;
3901      ecode += 3;      ecode += 1 + IMM2_SIZE;
3902      goto REPEATTYPE;      goto REPEATTYPE;
3903    
3904      case OP_TYPEUPTO:      case OP_TYPEUPTO:
# Line 3810  for (;;) Line 3906  for (;;)
3906      min = 0;      min = 0;
3907      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3908      minimize = *ecode == OP_TYPEMINUPTO;      minimize = *ecode == OP_TYPEMINUPTO;
3909      ecode += 3;      ecode += 1 + IMM2_SIZE;
3910      goto REPEATTYPE;      goto REPEATTYPE;
3911    
3912      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
# Line 3838  for (;;) Line 3934  for (;;)
3934      possessive = TRUE;      possessive = TRUE;
3935      min = 0;      min = 0;
3936      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3937      ecode += 3;      ecode += 1 + IMM2_SIZE;
3938      goto REPEATTYPE;      goto REPEATTYPE;
3939    
3940      case OP_TYPESTAR:      case OP_TYPESTAR:
# Line 4045  for (;;) Line 4141  for (;;)
4141            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4142              {              {
4143              int len = 1;              int len = 1;
4144              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4145              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4146              eptr += len;              eptr += len;
4147              }              }
# Line 4057  for (;;) Line 4153  for (;;)
4153    
4154  /* Handle all other cases when the coding is UTF-8 */  /* Handle all other cases when the coding is UTF-8 */
4155    
4156  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4157        if (utf8) switch(ctype)        if (utf) switch(ctype)
4158          {          {
4159          case OP_ANY:          case OP_ANY:
4160          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 4070  for (;;) Line 4166  for (;;)
4166              }              }
4167            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4168            eptr++;            eptr++;
4169            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4170            }            }
4171          break;          break;
4172    
# Line 4083  for (;;) Line 4179  for (;;)
4179              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4180              }              }
4181            eptr++;            eptr++;
4182            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4183            }            }
4184          break;          break;
4185    
# Line 4265  for (;;) Line 4361  for (;;)
4361              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4362              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4363              }              }
4364            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4365              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4366              eptr++;
4367            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4368            }            }
4369          break;          break;
# Line 4281  for (;;) Line 4378  for (;;)
4378              }              }
4379            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4380              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4381            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4382              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4383            }            }
4384          break;          break;
4385    
# Line 4293  for (;;) Line 4391  for (;;)
4391              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4392              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4393              }              }
4394            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4395              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4396              eptr++;
4397            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4398            }            }
4399          break;          break;
# Line 4309  for (;;) Line 4408  for (;;)
4408              }              }
4409            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4410              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4411            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            eptr++;
4412              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4413            }            }
4414          break;          break;
4415    
# Line 4321  for (;;) Line 4421  for (;;)
4421              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4422              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4423              }              }
4424            if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4425              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4426              eptr++;
4427            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
4428            }            }
4429          break;          break;
# Line 4332  for (;;) Line 4433  for (;;)
4433          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4434    
4435        else        else
4436  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4437    
4438        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4439        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4392  for (;;) Line 4493  for (;;)
4493              case 0x000b:              case 0x000b:
4494              case 0x000c:              case 0x000c:
4495              case 0x0085:              case 0x0085:
4496    #ifdef COMPILE_PCRE16
4497                case 0x2028:
4498                case 0x2029:
4499    #endif
4500              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4501              break;              break;
4502              }              }
# Line 4412  for (;;) Line 4517  for (;;)
4517              case 0x09:      /* HT */              case 0x09:      /* HT */
4518              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4519              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4520    #ifdef COMPILE_PCRE16
4521                case 0x1680:    /* OGHAM SPACE MARK */
4522                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4523                case 0x2000:    /* EN QUAD */
4524                case 0x2001:    /* EM QUAD */
4525                case 0x2002:    /* EN SPACE */
4526                case 0x2003:    /* EM SPACE */
4527                case 0x2004:    /* THREE-PER-EM SPACE */
4528                case 0x2005:    /* FOUR-PER-EM SPACE */
4529                case 0x2006:    /* SIX-PER-EM SPACE */
4530                case 0x2007:    /* FIGURE SPACE */
4531                case 0x2008:    /* PUNCTUATION SPACE */
4532                case 0x2009:    /* THIN SPACE */
4533                case 0x200A:    /* HAIR SPACE */
4534                case 0x202f:    /* NARROW NO-BREAK SPACE */
4535                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4536                case 0x3000:    /* IDEOGRAPHIC SPACE */
4537    #endif
4538              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4539              }              }
4540            }            }
# Line 4431  for (;;) Line 4554  for (;;)
4554              case 0x09:      /* HT */              case 0x09:      /* HT */
4555              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4556              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4557    #ifdef COMPILE_PCRE16
4558                case 0x1680:    /* OGHAM SPACE MARK */
4559                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4560                case 0x2000:    /* EN QUAD */
4561                case 0x2001:    /* EM QUAD */
4562                case 0x2002:    /* EN SPACE */
4563                case 0x2003:    /* EM SPACE */
4564                case 0x2004:    /* THREE-PER-EM SPACE */
4565                case 0x2005:    /* FOUR-PER-EM SPACE */
4566                case 0x2006:    /* SIX-PER-EM SPACE */
4567                case 0x2007:    /* FIGURE SPACE */
4568                case 0x2008:    /* PUNCTUATION SPACE */
4569                case 0x2009:    /* THIN SPACE */
4570                case 0x200A:    /* HAIR SPACE */
4571                case 0x202f:    /* NARROW NO-BREAK SPACE */
4572                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4573                case 0x3000:    /* IDEOGRAPHIC SPACE */
4574    #endif
4575              break;              break;
4576              }              }
4577            }            }
# Line 4452  for (;;) Line 4593  for (;;)
4593              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4594              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4595              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4596    #ifdef COMPILE_PCRE16
4597                case 0x2028:    /* LINE SEPARATOR */
4598                case 0x2029:    /* PARAGRAPH SEPARATOR */
4599    #endif
4600              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4601              }              }
4602            }            }
# Line 4473  for (;;) Line 4618  for (;;)
4618              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4619              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4620              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4621    #ifdef COMPILE_PCRE16
4622                case 0x2028:    /* LINE SEPARATOR */
4623                case 0x2029:    /* PARAGRAPH SEPARATOR */
4624    #endif
4625              break;              break;
4626              }              }
4627            }            }
# Line 4486  for (;;) Line 4635  for (;;)
4635              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4636              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4637              }              }
4638            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4639                RRETURN(MATCH_NOMATCH);
4640              eptr++;
4641            }            }
4642          break;          break;
4643    
# Line 4498  for (;;) Line 4649  for (;;)
4649              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4650              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4651              }              }
4652            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4653                RRETURN(MATCH_NOMATCH);
4654              eptr++;
4655            }            }
4656          break;          break;
4657    
# Line 4510  for (;;) Line 4663  for (;;)
4663              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4664              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4665              }              }
4666            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4667                RRETURN(MATCH_NOMATCH);
4668              eptr++;
4669            }            }
4670          break;          break;
4671    
# Line 4522  for (;;) Line 4677  for (;;)
4677              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4678              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4679              }              }
4680            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4681                RRETURN(MATCH_NOMATCH);
4682              eptr++;
4683            }            }
4684          break;          break;
4685    
# Line 4534  for (;;) Line 4691  for (;;)
4691              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4692              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4693              }              }
4694            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4695              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4696              eptr++;
4697            }            }
4698          break;          break;
4699    
# Line 4547  for (;;) Line 4705  for (;;)
4705              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4706              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4707              }              }
4708            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4709              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4710              eptr++;
4711            }            }
4712          break;          break;
4713    
# Line 4766  for (;;) Line 4925  for (;;)
4925            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4926              {              {
4927              int len = 1;              int len = 1;
4928              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4929              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4930              eptr += len;              eptr += len;
4931              }              }
# Line 4775  for (;;) Line 4934  for (;;)
4934        else        else
4935  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
4936    
4937  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4938        /* UTF-8 mode */        if (utf)
       if (utf8)  
4939          {          {
4940          for (fi = min;; fi++)          for (fi = min;; fi++)
4941            {            {
# Line 4919  for (;;) Line 5077  for (;;)
5077              break;              break;
5078    
5079              case OP_WHITESPACE:              case OP_WHITESPACE:
5080              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5081                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5082              break;              break;
5083    
# Line 4940  for (;;) Line 5098  for (;;)
5098          }          }
5099        else        else
5100  #endif  #endif
5101        /* Not UTF-8 mode */        /* Not UTF mode */
5102          {          {
5103          for (fi = min;; fi++)          for (fi = min;; fi++)
5104            {            {
# Line 4976  for (;;) Line 5134  for (;;)
5134                case 0x000b:                case 0x000b:
5135                case 0x000c:                case 0x000c:
5136                case 0x0085:                case 0x0085:
5137    #ifdef COMPILE_PCRE16
5138                  case 0x2028:
5139                  case 0x2029:
5140    #endif
5141                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5142                break;                break;
5143                }                }
# Line 4988  for (;;) Line 5150  for (;;)
5150                case 0x09:      /* HT */                case 0x09:      /* HT */
5151                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
5152                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
5153    #ifdef COMPILE_PCRE16
5154                  case 0x1680:    /* OGHAM SPACE MARK */
5155                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5156                  case 0x2000:    /* EN QUAD */
5157                  case 0x2001:    /* EM QUAD */
5158                  case 0x2002:    /* EN SPACE */
5159                  case 0x2003:    /* EM SPACE */
5160                  case 0x2004:    /* THREE-PER-EM SPACE */
5161                  case 0x2005:    /* FOUR-PER-EM SPACE */
5162                  case 0x2006:    /* SIX-PER-EM SPACE */
5163                  case 0x2007:    /* FIGURE SPACE */
5164                  case 0x2008:    /* PUNCTUATION SPACE */
5165                  case 0x2009:    /* THIN SPACE */
5166                  case 0x200A:    /* HAIR SPACE */
5167                  case 0x202f:    /* NARROW NO-BREAK SPACE */
5168                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5169                  case 0x3000:    /* IDEOGRAPHIC SPACE */
5170    #endif
5171                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5172                }                }
5173              break;              break;
# Line 4999  for (;;) Line 5179  for (;;)
5179                case 0x09:      /* HT */                case 0x09:      /* HT */
5180                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
5181                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
5182    #ifdef COMPILE_PCRE16
5183                  case 0x1680:    /* OGHAM SPACE MARK */
5184                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5185                  case 0x2000:    /* EN QUAD */
5186                  case 0x2001:    /* EM QUAD */
5187                  case 0x2002:    /* EN SPACE */
5188                  case 0x2003:    /* EM SPACE */
5189                  case 0x2004:    /* THREE-PER-EM SPACE */
5190                  case 0x2005:    /* FOUR-PER-EM SPACE */
5191                  case 0x2006:    /* SIX-PER-EM SPACE */
5192                  case 0x2007:    /* FIGURE SPACE */
5193                  case 0x2008:    /* PUNCTUATION SPACE */
5194                  case 0x2009:    /* THIN SPACE */
5195                  case 0x200A:    /* HAIR SPACE */
5196                  case 0x202f:    /* NARROW NO-BREAK SPACE */
5197                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5198                  case 0x3000:    /* IDEOGRAPHIC SPACE */
5199    #endif
5200                break;                break;
5201                }                }
5202              break;              break;
# Line 5012  for (;;) Line 5210  for (;;)
5210                case 0x0c:      /* FF */                case 0x0c:      /* FF */
5211                case 0x0d:      /* CR */                case 0x0d:      /* CR */
5212                case 0x85:      /* NEL */                case 0x85:      /* NEL */
5213    #ifdef COMPILE_PCRE16
5214                  case 0x2028:    /* LINE SEPARATOR */
5215                  case 0x2029:    /* PARAGRAPH SEPARATOR */
5216    #endif
5217                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5218                }                }
5219              break;              break;
# Line 5025  for (;;) Line 5227  for (;;)
5227                case 0x0c:      /* FF */                case 0x0c:      /* FF */
5228                case 0x0d:      /* CR */                case 0x0d:      /* CR */
5229                case 0x85:      /* NEL */                case 0x85:      /* NEL */
5230    #ifdef COMPILE_PCRE16
5231                  case 0x2028:    /* LINE SEPARATOR */
5232                  case 0x2029:    /* PARAGRAPH SEPARATOR */
5233    #endif
5234                break;                break;
5235                }                }
5236              break;              break;
5237    
5238              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
5239              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5240              break;              break;
5241    
5242              case OP_DIGIT:              case OP_DIGIT:
5243              if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5244              break;              break;
5245    
5246              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
5247              if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5248              break;              break;
5249    
5250              case OP_WHITESPACE:              case OP_WHITESPACE:
5251              if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5252              break;              break;
5253    
5254              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
5255              if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);              if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5256              break;              break;
5257    
5258              case OP_WORDCHAR:              case OP_WORDCHAR:
5259              if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);              if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5260              break;              break;
5261    
5262              default:              default:
# Line 5239  for (;;) Line 5445  for (;;)
5445            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5446            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5447            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5448            if (utf8) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5449            }            }
5450          }          }
5451    
# Line 5256  for (;;) Line 5462  for (;;)
5462              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5463              break;              break;
5464              }              }
5465            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5466            if (UCD_CATEGORY(c) == ucp_M) break;            if (UCD_CATEGORY(c) == ucp_M) break;
5467            eptr += len;            eptr += len;
5468            while (eptr < md->end_subject)            while (eptr < md->end_subject)
5469              {              {
5470              len = 1;              len = 1;
5471              if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5472              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5473              eptr += len;              eptr += len;
5474              }              }
# Line 5279  for (;;) Line 5485  for (;;)
5485            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5486            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5487              {              {
5488              if (!utf8) c = *eptr; else              if (!utf) c = *eptr; else
5489                {                {
5490                BACKCHAR(eptr);                BACKCHAR(eptr);
5491                GETCHAR(c, eptr);                GETCHAR(c, eptr);
# Line 5293  for (;;) Line 5499  for (;;)
5499        else        else
5500  #endif   /* SUPPORT_UCP */  #endif   /* SUPPORT_UCP */
5501    
5502  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5503        /* UTF-8 mode */        if (utf)
   
       if (utf8)  
5504          {          {
5505          switch(ctype)          switch(ctype)
5506            {            {
# Line 5312  for (;;) Line 5516  for (;;)
5516                  }                  }
5517                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5518                eptr++;                eptr++;
5519                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5520                }                }
5521              }              }
5522    
# Line 5329  for (;;) Line 5533  for (;;)
5533                  }                  }
5534                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5535                eptr++;                eptr++;
5536                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5537                }                }
5538              }              }
5539            break;            break;
# Line 5345  for (;;) Line 5549  for (;;)
5549                  break;                  break;
5550                  }                  }
5551                eptr++;                eptr++;
5552                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5553                }                }
5554              }              }
5555            else            else
# Line 5578  for (;;) Line 5782  for (;;)
5782            }            }
5783          }          }
5784        else        else
5785  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5786          /* Not UTF mode */
       /* Not UTF-8 mode */  
5787          {          {
5788          switch(ctype)          switch(ctype)
5789            {            {
# Line 5624  for (;;) Line 5827  for (;;)
5827                }                }
5828              else              else
5829                {                {
5830                if (c != 0x000a &&                if (c != 0x000a && (md->bsr_anycrlf ||
5831                    (md->bsr_anycrlf ||                  (c != 0x000b && c != 0x000c && c != 0x0085
5832                      (c != 0x000b && c != 0x000c && c != 0x0085)))  #ifdef COMPILE_PCRE16
5833                  break;                  && c != 0x2028 && c != 0x2029
5834    #endif
5835                    ))) break;
5836                eptr++;                eptr++;
5837                }                }
5838              }              }
# Line 5642  for (;;) Line 5847  for (;;)
5847                break;                break;
5848                }                }
5849              c = *eptr;              c = *eptr;
5850              if (c == 0x09 || c == 0x20 || c == 0xa0) break;              if (c == 0x09 || c == 0x20 || c == 0xa0
5851    #ifdef COMPILE_PCRE16
5852                  || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
5853                  || c == 0x202f || c == 0x205f || c == 0x3000
5854    #endif
5855                  ) break;
5856              eptr++;              eptr++;
5857              }              }
5858            break;            break;
# Line 5656  for (;;) Line 5866  for (;;)
5866                break;                break;
5867                }                }
5868              c = *eptr;              c = *eptr;
5869              if (c != 0x09 && c != 0x20 && c != 0xa0) break;              if (c != 0x09 && c != 0x20 && c != 0xa0
5870    #ifdef COMPILE_PCRE16
5871                  && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
5872                  && c != 0x202f && c != 0x205f && c != 0x3000
5873    #endif
5874                  ) break;
5875              eptr++;              eptr++;
5876              }              }
5877            break;            break;
# Line 5670  for (;;) Line 5885  for (;;)
5885                break;                break;
5886                }                }
5887              c = *eptr;              c = *eptr;
5888              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
5889                break;  #ifdef COMPILE_PCRE16
5890                  || c == 0x2028 || c == 0x2029
5891    #endif
5892                  ) break;
5893              eptr++;              eptr++;
5894              }              }
5895            break;            break;
# Line 5685  for (;;) Line 5903  for (;;)
5903                break;                break;
5904                }                }
5905              c = *eptr;              c = *eptr;
5906              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
5907                break;  #ifdef COMPILE_PCRE16
5908                  && c != 0x2028 && c != 0x2029
5909    #endif
5910                  ) break;
5911              eptr++;              eptr++;
5912              }              }
5913            break;            break;
# Line 5699  for (;;) Line 5920  for (;;)
5920                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5921                break;                break;
5922                }                }
5923              if ((md->ctypes[*eptr] & ctype_digit) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
5924              eptr++;              eptr++;
5925              }              }
5926            break;            break;
# Line 5712  for (;;) Line 5933  for (;;)
5933                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5934                break;                break;
5935                }                }
5936              if ((md->ctypes[*eptr] & ctype_digit) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
5937              eptr++;              eptr++;
5938              }              }
5939            break;            break;
# Line 5725  for (;;) Line 5946  for (;;)
5946                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5947                break;                break;
5948                }                }
5949              if ((md->ctypes[*eptr] & ctype_space) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
5950              eptr++;              eptr++;
5951              }              }
5952            break;            break;
# Line 5738  for (;;) Line 5959  for (;;)
5959                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5960                break;                break;
5961                }                }
5962              if ((md->ctypes[*eptr] & ctype_space) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
5963              eptr++;              eptr++;
5964              }              }
5965            break;            break;
# Line 5751  for (;;) Line 5972  for (;;)
5972                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5973                break;                break;
5974                }                }
5975              if ((md->ctypes[*eptr] & ctype_word) != 0) break;              if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
5976              eptr++;              eptr++;
5977              }              }
5978            break;            break;
# Line 5764  for (;;) Line 5985  for (;;)
5985                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5986                break;                break;
5987                }                }
5988              if ((md->ctypes[*eptr] & ctype_word) == 0) break;              if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
5989              eptr++;              eptr++;
5990              }              }
5991            break;            break;
# Line 5827  switch (frame->Xwhere) Line 6048  switch (frame->Xwhere)
6048    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6049    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6050    LBL(65) LBL(66)    LBL(65) LBL(66)
6051  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6052    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(21)
6053    #endif
6054    #ifdef SUPPORT_UTF
6055      LBL(16) LBL(18) LBL(20)
6056      LBL(22) LBL(23) LBL(28) LBL(30)
6057    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6058  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6059    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6060    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62)
6061  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6062  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
6063    default:    default:
6064    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6065    
6066    printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6067    
6068    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6069    }    }
6070  #undef LBL  #undef LBL
# Line 5923  Returns:          > 0 => success; value Line 6151  Returns:          > 0 => success; value
6151                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
6152  */  */
6153    
6154    #ifdef COMPILE_PCRE8
6155  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6156  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6157    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6158    int offsetcount)    int offsetcount)
6159    #else
6160    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6161    pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6162      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6163      int offsetcount)
6164    #endif
6165  {  {
6166  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
 int first_byte = -1;  
 int req_byte = -1;  
 int req_byte2 = -1;  
6167  int newline;  int newline;
6168  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
6169  BOOL anchored;  BOOL anchored;
6170  BOOL startline;  BOOL startline;
6171  BOOL firstline;  BOOL firstline;
6172  BOOL first_byte_caseless = FALSE;  BOOL utf;
6173  BOOL req_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
6174  BOOL utf8;  BOOL has_req_char = FALSE;
6175    pcre_uchar first_char = 0;
6176    pcre_uchar first_char2 = 0;
6177    pcre_uchar req_char = 0;
6178    pcre_uchar req_char2 = 0;
6179  match_data match_block;  match_data match_block;
6180  match_data *md = &match_block;  match_data *md = &match_block;
6181  const uschar *tables;  const pcre_uint8 *tables;
6182  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
6183  USPTR start_match = (USPTR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6184  USPTR end_subject;  PCRE_PUCHAR end_subject;
6185  USPTR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6186  USPTR req_byte_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6187    
 pcre_study_data internal_study;  
6188  const pcre_study_data *study;  const pcre_study_data *study;
6189    const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
 real_pcre internal_re;  
 const real_pcre *external_re = (const real_pcre *)argument_re;  
 const real_pcre *re = external_re;  
6190    
6191  /* Plausibility checks */  /* Plausibility checks */
6192    
# Line 5969  follows immediately afterwards. Other va Line 6201  follows immediately afterwards. Other va
6201  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
6202  so they are set up later. */  so they are set up later. */
6203    
6204  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6205    utf = md->utf = (re->options & PCRE_UTF8) != 0;
6206  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6207                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6208    
6209  /* Check a UTF-8 string if required. Pass back the character offset and error  /* Check a UTF-8 string if required. Pass back the character offset and error
6210  code for an invalid string if a results vector is available. */  code for an invalid string if a results vector is available. */
6211    
6212  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6213  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6214    {    {
6215    int erroroffset;    int erroroffset;
6216    int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);    int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6217    if (errorcode != 0)    if (errorcode != 0)
6218      {      {
6219      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 5988  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 6221  if (utf8 && (options & PCRE_NO_UTF8_CHEC
6221        offsets[0] = erroroffset;        offsets[0] = erroroffset;
6222        offsets[1] = errorcode;        offsets[1] = errorcode;
6223        }        }
6224    #ifdef COMPILE_PCRE16
6225        return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6226          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6227    #else
6228      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6229        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6230    #endif
6231      }      }
6232    
6233    /* Check that a start_offset points to the start of a UTF-8 character. */    /* Check that a start_offset points to the start of a UTF character. */
6234    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
6235        (((USPTR)subject)[start_offset] & 0xc0) == 0x80)        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6236      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
6237    }    }
6238  #endif  #endif
# Line 6012  if (extra_data != NULL Line 6250  if (extra_data != NULL
6250      && (extra_data->flags & PCRE_EXTRA_TABLES) == 0      && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
6251      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6252                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
6253    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,    return PRIV(jit_exec)(re, extra_data->executable_jit,
6254      start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)      (const pcre_uchar *)subject, length, start_offset, options,
6255        ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6256      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6257  #endif  #endif
6258    
6259  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
6260  numbers associated with a given name, for condition testing. */  numbers associated with a given name, for condition testing. */
6261    
6262  md->name_table = (uschar *)re + re->name_table_offset;  md->name_table = (pcre_uchar *)re + re->name_table_offset;
6263  md->name_count = re->name_count;  md->name_count = re->name_count;
6264  md->name_entry_size = re->name_entry_size;  md->name_entry_size = re->name_entry_size;
6265    
# Line 6034  md->callout_data = NULL; Line 6273  md->callout_data = NULL;
6273    
6274  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
6275    
6276  tables = external_re->tables;  tables = re->tables;
6277    
6278  if (extra_data != NULL)  if (extra_data != NULL)
6279    {    {
# Line 6054  if (extra_data != NULL) Line 6293  if (extra_data != NULL)
6293  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6294  in other programs later. */  in other programs later. */
6295    
6296  if (tables == NULL) tables = _pcre_default_tables;  if (tables == NULL) tables = PRIV(default_tables);
6297    
6298  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
6299  test for a regex that was compiled on a host of opposite endianness. If this is  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6300  the case, flipped values are put in internal_re and internal_study if there was  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6301  study data too. */  means that the pattern is likely compiled with different endianness. */
6302    
6303  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
6304    {    return re->magic_number == REVERSED_MAGIC_NUMBER?
6305    re = _pcre_try_flipped(re, &internal_re, study, &internal_study);      PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6306    if (re == NULL) return PCRE_ERROR_BADMAGIC;  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
   if (study != NULL) study = &internal_study;  
   }  
6307    
6308  /* Set up other data */  /* Set up other data */
6309    
# Line 6076  firstline = (re->options & PCRE_FIRSTLIN Line 6313  firstline = (re->options & PCRE_FIRSTLIN
6313    
6314  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6315    
6316  md->start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6317    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6318    
6319  md->start_subject = (USPTR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
6320  md->start_offset = start_offset;  md->start_offset = start_offset;
6321  md->end_subject = md->start_subject + length;  md->end_subject = md->start_subject + length;
6322  end_subject = md->end_subject;  end_subject = md->end_subject;
# Line 6104  md->recursive = NULL; Line 6341  md->recursive = NULL;
6341  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6342    
6343  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6344    md->fcc = tables + fcc_offset;
6345  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
6346    
6347  /* Handle different \R options. */  /* Handle different \R options. */
# Line 6190  arg_offset_max = (2*ocount)/3; Line 6428  arg_offset_max = (2*ocount)/3;
6428  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
6429    {    {
6430    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
6431    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6432    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6433    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
6434    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
# Line 6217  if (md->offset_vector != NULL) Line 6455  if (md->offset_vector != NULL)
6455    md->offset_vector[0] = md->offset_vector[1] = -1;    md->offset_vector[0] = md->offset_vector[1] = -1;
6456    }    }
6457    
6458  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_char value is
6459  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
6460  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
6461  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
# Line 6227  if (!anchored) Line 6465  if (!anchored)
6465    {    {
6466    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6467      {      {
6468      first_byte = re->first_byte & 255;      has_first_char = TRUE;
6469      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = re->first_char;
6470        first_byte = md->lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6471          {
6472          first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6473    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6474          if (utf && first_char > 127)
6475            first_char2 = UCD_OTHERCASE(first_char);
6476    #endif
6477          }
6478      }      }
6479    else    else
6480      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 6242  character" set. */ Line 6487  character" set. */
6487    
6488  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6489    {    {
6490    req_byte = re->req_byte & 255;    has_req_char = TRUE;
6491    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = re->req_char;
6492    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6493        {
6494        req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6495    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6496        if (utf && req_char > 127)
6497          req_char2 = UCD_OTHERCASE(req_char);
6498    #endif
6499        }
6500    }    }
6501    
6502    
   
   
6503  /* ==========================================================================*/  /* ==========================================================================*/
6504    
6505  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
# Line 6257  the loop runs just once. */ Line 6507  the loop runs just once. */
6507    
6508  for(;;)  for(;;)
6509    {    {
6510    USPTR save_end_subject = end_subject;    PCRE_PUCHAR save_end_subject = end_subject;
6511    USPTR new_start_match;    PCRE_PUCHAR new_start_match;
6512    
6513    /* If firstline is TRUE, the start of the match is constrained to the first    /* If firstline is TRUE, the start of the match is constrained to the first
6514    line of a multiline string. That is, the match must be before or at the first    line of a multiline string. That is, the match must be before or at the first
# Line 6268  for(;;) Line 6518  for(;;)
6518    
6519    if (firstline)    if (firstline)
6520      {      {
6521      USPTR t = start_match;      PCRE_PUCHAR t = start_match;
6522  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6523      if (utf8)      if (utf)
6524        {        {
6525        while (t < md->end_subject && !IS_NEWLINE(t))        while (t < md->end_subject && !IS_NEWLINE(t))
6526          {          {
6527          t++;          t++;
6528          while (t < end_subject && (*t & 0xc0) == 0x80) t++;          ACROSSCHAR(t < end_subject, *t, t++);
6529          }          }
6530        }        }
6531      else      else
# Line 6292  for(;;) Line 6542  for(;;)
6542    
6543    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6544      {      {
6545      /* Advance to a unique first byte if there is one. */      /* Advance to a unique first char if there is one. */
6546    
6547      if (first_byte >= 0)      if (has_first_char)
6548        {        {
6549        if (first_byte_caseless)        if (first_char != first_char2)
6550          while (start_match < end_subject && md->lcc[*start_match] != first_byte)          while (start_match < end_subject &&
6551                *start_match != first_char && *start_match != first_char2)
6552            start_match++;            start_match++;
6553        else        else
6554          while (start_match < end_subject && *start_match != first_byte)          while (start_match < end_subject && *start_match != first_char)
6555            start_match++;            start_match++;
6556        }        }
6557    
# Line 6310  for(;;) Line 6561  for(;;)
6561        {        {
6562        if (start_match > md->start_subject + start_offset)        if (start_match > md->start_subject + start_offset)
6563          {          {
6564  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6565          if (utf8)          if (utf)
6566            {            {
6567            while (start_match < end_subject && !WAS_NEWLINE(start_match))            while (start_match < end_subject && !WAS_NEWLINE(start_match))
6568              {              {
6569              start_match++;              start_match++;
6570              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              ACROSSCHAR(start_match < end_subject, *start_match,
6571                start_match++;                start_match++);
6572              }              }
6573            }            }
6574          else          else
# Line 6344  for(;;) Line 6595  for(;;)
6595        while (start_match < end_subject)        while (start_match < end_subject)
6596          {          {
6597          register unsigned int c = *start_match;          register unsigned int c = *start_match;
6598    #ifndef COMPILE_PCRE8
6599            if (c > 255) c = 255;
6600    #endif
6601          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) == 0)
6602            {            {
6603            start_match++;            start_match++;
6604  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6605            if (utf8)            /* In non 8-bit mode, the iteration will stop for
6606              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)            characters > 255 at the beginning or not stop at all. */
6607                start_match++;            if (utf)
6608                ACROSSCHAR(start_match < end_subject, *start_match,
6609                  start_match++);
6610  #endif  #endif
6611            }            }
6612          else break;          else break;
# Line 6379  for(;;) Line 6635  for(;;)
6635        break;        break;
6636        }        }
6637    
6638      /* If req_byte is set, we know that that character must appear in the      /* If req_char is set, we know that that character must appear in the
6639      subject for the match to succeed. If the first character is set, req_byte      subject for the match to succeed. If the first character is set, req_char
6640      must be later in the subject; otherwise the test starts at the match point.      must be later in the subject; otherwise the test starts at the match point.
6641      This optimization can save a huge amount of backtracking in patterns with      This optimization can save a huge amount of backtracking in patterns with
6642      nested unlimited repeats that aren't going to match. Writing separate code      nested unlimited repeats that aren't going to match. Writing separate code
# Line 6393  for(;;) Line 6649  for(;;)
6649      32-megabyte string... so we don't do this when the string is sufficiently      32-megabyte string... so we don't do this when the string is sufficiently
6650      long. */      long. */
6651    
6652      if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)      if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6653        {        {
6654        register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);        register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6655    
6656        /* We don't need to repeat the search if we haven't yet reached the        /* We don't need to repeat the search if we haven't yet reached the
6657        place we found it at last time. */        place we found it at last time. */
6658    
6659        if (p > req_byte_ptr)        if (p > req_char_ptr)
6660          {          {
6661          if (req_byte_caseless)          if (req_char != req_char2)
6662            {            {
6663            while (p < end_subject)            while (p < end_subject)
6664              {              {
6665              register int pp = *p++;              register int pp = *p++;
6666              if (pp == req_byte || pp == req_byte2) { p--; break; }              if (pp == req_char || pp == req_char2) { p--; break; }
6667              }              }
6668            }            }
6669          else          else
6670            {            {
6671            while (p < end_subject)            while (p < end_subject)
6672              {              {
6673              if (*p++ == req_byte) { p--; break; }              if (*p++ == req_char) { p--; break; }
6674              }              }
6675            }            }
6676    
# Line 6431  for(;;) Line 6687  for(;;)
6687          found it, so that we don't search again next time round the loop if          found it, so that we don't search again next time round the loop if
6688          the start hasn't passed this character yet. */          the start hasn't passed this character yet. */
6689    
6690          req_byte_ptr = p;          req_char_ptr = p;
6691          }          }
6692        }        }
6693      }      }
# Line 6456  for(;;) Line 6712  for(;;)
6712    switch(rc)    switch(rc)
6713      {      {
6714      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched      /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6715      the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP      the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6716      entirely. The only way we can do that is to re-do the match at the same      entirely. The only way we can do that is to re-do the match at the same
6717      point, with a flag to force SKIP with an argument to be ignored. Just      point, with a flag to force SKIP with an argument to be ignored. Just
6718      treating this case as NOMATCH does not work because it does not check other      treating this case as NOMATCH does not work because it does not check other
6719      alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */      alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6720    
6721      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6722      new_start_match = start_match;      new_start_match = start_match;
6723      md->ignore_skip_arg = TRUE;      md->ignore_skip_arg = TRUE;
6724      break;      break;
6725    
6726      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is the
6727      same as the match we have just done, treat it as NOMATCH. */      same as the match we have just done, treat it as NOMATCH. */
# Line 6486  for(;;) Line 6742  for(;;)
6742      case MATCH_THEN:      case MATCH_THEN:
6743      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = FALSE;
6744      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6745  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6746      if (utf8)      if (utf)
6747        while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)        ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6748          new_start_match++;          new_start_match++);
6749  #endif  #endif
6750      break;      break;
6751    
# Line 6527  for(;;) Line 6783  for(;;)
6783    
6784    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
6785    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
6786    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. In
6787      normal matching start_match will aways be greater than the first position at
6788      this stage, but a failed *SKIP can cause a return at the same point, which is
6789      why the first test exists. */
6790    
6791    if (start_match[-1] == CHAR_CR &&    if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6792          start_match[-1] == CHAR_CR &&
6793        start_match < end_subject &&        start_match < end_subject &&
6794        *start_match == CHAR_NL &&        *start_match == CHAR_NL &&
6795        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
# Line 6575  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6835  if (rc == MATCH_MATCH || rc == MATCH_ACC
6835        }        }
6836      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6837      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6838      (pcre_free)(md->offset_vector);      (PUBL(free))(md->offset_vector);
6839      }      }
6840    
6841    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
# Line 6614  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6874  if (rc == MATCH_MATCH || rc == MATCH_ACC
6874      }      }
6875    
6876    /* Return MARK data if requested */    /* Return MARK data if requested */
6877    
6878    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6879      *(extra_data->mark) = (unsigned char *)(md->mark);      *(extra_data->mark) = (pcre_uchar *)md->mark;
6880    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
6881    return rc;    return rc;
6882    }    }
# Line 6627  attempt has failed at all permitted star Line 6887  attempt has failed at all permitted star
6887  if (using_temporary_offsets)  if (using_temporary_offsets)
6888    {    {
6889    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
6890    (pcre_free)(md->offset_vector);    (PUBL(free))(md->offset_vector);
6891    }    }
6892    
6893  /* For anything other than nomatch or partial match, just return the code. */  /* For anything other than nomatch or partial match, just return the code. */
# Line 6646  if (start_partial != NULL) Line 6906  if (start_partial != NULL)
6906    md->mark = NULL;    md->mark = NULL;
6907    if (offsetcount > 1)    if (offsetcount > 1)
6908      {      {
6909      offsets[0] = (int)(start_partial - (USPTR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
6910      offsets[1] = (int)(end_subject - (USPTR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
6911      }      }
6912    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6913    }    }
# Line 6663  else Line 6923  else
6923  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
6924    
6925  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6926    *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);    *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
6927  return rc;  return rc;
6928  }  }
6929    

Legend:
Removed from v.778  
changed lines
  Added in v.888

  ViewVC Help
Powered by ViewVC 1.1.5