/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 976 by ph10, Sat Jun 16 17:53:17 2012 UTC revision 1298 by ph10, Fri Mar 22 16:13:13 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59    /* The md->capture_last field uses the lower 16 bits for the last captured
60    substring (which can never be greater than 65535) and a bit in the top half
61    to mean "capture vector overflowed". This odd way of doing things was
62    implemented when it was realized that preserving and restoring the overflow bit
63    whenever the last capture number was saved/restored made for a neater
64    interface, and doing it this way saved on (a) another variable, which would
65    have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66    separate set of save/restore instructions. The following defines are used in
67    implementing this. */
68    
69    #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70    #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71    #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72    
73  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
74  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
75  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
# Line 73  defined PCRE_ERROR_xxx codes, which are Line 87  defined PCRE_ERROR_xxx codes, which are
87  negative to avoid the external error codes. */  negative to avoid the external error codes. */
88    
89  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
90  #define MATCH_COMMIT       (-998)  #define MATCH_KETRPOS      (-998)
91  #define MATCH_KETRPOS      (-997)  #define MATCH_ONCE         (-997)
92  #define MATCH_ONCE         (-996)  /* The next 5 must be kept together and in sequence so that a test that checks
93    for any one of them can use a range. */
94    #define MATCH_COMMIT       (-996)
95  #define MATCH_PRUNE        (-995)  #define MATCH_PRUNE        (-995)
96  #define MATCH_SKIP         (-994)  #define MATCH_SKIP         (-994)
97  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
98  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
99    #define MATCH_BACKTRACK_MAX MATCH_THEN
100    #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101    
102  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
103  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 92  because the offset vector is always a mu Line 110  because the offset vector is always a mu
110  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
111  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
112    
   
   
113  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
114  /*************************************************  /*************************************************
115  *        Debugging function to print chars       *  *        Debugging function to print chars       *
# Line 114  Returns:     nothing Line 130  Returns:     nothing
130  static void  static void
131  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)  pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
132  {  {
133  unsigned int c;  pcre_uint32 c;
134    BOOL utf = md->utf;
135  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
136  while (length-- > 0)  while (length-- > 0)
137    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
138  }  }
139  #endif  #endif
140    
# Line 150  match_ref(int offset, register PCRE_PUCH Line 167  match_ref(int offset, register PCRE_PUCH
167  {  {
168  PCRE_PUCHAR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
169  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170    #ifdef SUPPORT_UTF
171    BOOL utf = md->utf;
172    #endif
173    
174  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
175  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 177  if (caseless) Line 197  if (caseless)
197    {    {
198  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
199  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
200    if (md->utf)    if (utf)
201      {      {
202      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
203      bytes matched may differ, because there are some characters whose upper and      data units matched may differ, because in UTF-8 there are some characters
204      lower case versions code as different numbers of bytes. For example, U+023A      whose upper and lower case versions code have different numbers of bytes.
205      (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);      For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
206      a sequence of 3 of the former uses 6 bytes, as does a sequence of two of      (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
207      the latter. It is important, therefore, to check the length along the      sequence of two of the latter. It is important, therefore, to check the
208      reference, not along the subject (earlier code did this wrong). */      length along the reference, not along the subject (earlier code did this
209        wrong). */
210    
211      PCRE_PUCHAR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
212      while (p < endptr)      while (p < endptr)
213        {        {
214        int c, d;        pcre_uint32 c, d;
215          const ucd_record *ur;
216        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
217        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
218        GETCHARINC(d, p);        GETCHARINC(d, p);
219        if (c != d && c != UCD_OTHERCASE(d)) return -1;        ur = GET_UCD(d);
220          if (c != d && c != d + ur->other_case)
221            {
222            const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
223            for (;;)
224              {
225              if (c < *pp) return -1;
226              if (c == *pp++) break;
227              }
228            }
229        }        }
230      }      }
231    else    else
# Line 206  if (caseless) Line 237  if (caseless)
237      {      {
238      while (length-- > 0)      while (length-- > 0)
239        {        {
240          pcre_uint32 cc, cp;
241        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
242        if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;        cc = RAWUCHARTEST(eptr);
243          cp = RAWUCHARTEST(p);
244          if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
245        p++;        p++;
246        eptr++;        eptr++;
247        }        }
# Line 222  else Line 256  else
256    while (length-- > 0)    while (length-- > 0)
257      {      {
258      if (eptr >= md->end_subject) return -2;   /* Partial match */      if (eptr >= md->end_subject) return -2;   /* Partial match */
259      if (*p++ != *eptr++) return -1;      if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
260      }      }
261    }    }
262    
# Line 278  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 312  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
312         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
313         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
314         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
315         RM61,  RM62, RM63, RM64, RM65, RM66 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
316    
317  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
318  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 296  actually used in this definition. */ Line 330  actually used in this definition. */
330    }    }
331  #define RRETURN(ra) \  #define RRETURN(ra) \
332    { \    { \
333    printf("match() returned %d from line %d ", ra, __LINE__); \    printf("match() returned %d from line %d\n", ra, __LINE__); \
334    return ra; \    return ra; \
335    }    }
336  #else  #else
# Line 387  typedef struct heapframe { Line 421  typedef struct heapframe {
421    
422  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
423    int Xprop_type;    int Xprop_type;
424    int Xprop_value;    unsigned int Xprop_value;
425    int Xprop_fail_result;    int Xprop_fail_result;
426    int Xoclength;    int Xoclength;
427    pcre_uchar Xocchars[6];    pcre_uchar Xocchars[6];
# Line 400  typedef struct heapframe { Line 434  typedef struct heapframe {
434    int Xlength;    int Xlength;
435    int Xmax;    int Xmax;
436    int Xmin;    int Xmin;
437    int Xnumber;    unsigned int Xnumber;
438    int Xoffset;    int Xoffset;
439    int Xop;    unsigned int Xop;
440    int Xsave_capture_last;    pcre_int32 Xsave_capture_last;
441    int Xsave_offset1, Xsave_offset2, Xsave_offset3;    int Xsave_offset1, Xsave_offset2, Xsave_offset3;
442    int Xstacksave[REC_STACK_SAVE_MAX];    int Xstacksave[REC_STACK_SAVE_MAX];
443    
# Line 488  so they can be ordinary variables in all Line 522  so they can be ordinary variables in all
522    
523  register int  rrc;         /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
524  register int  i;           /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
525  register unsigned int c;   /* Character values not kept over RMATCH() calls */  register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
526  register BOOL utf;         /* Local copy of UTF flag for speed */  register BOOL utf;         /* Local copy of UTF flag for speed */
527    
528  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
# Line 605  BOOL prev_is_word; Line 639  BOOL prev_is_word;
639    
640  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
641  int prop_type;  int prop_type;
642  int prop_value;  unsigned int prop_value;
643  int prop_fail_result;  int prop_fail_result;
644  int oclength;  int oclength;
645  pcre_uchar occhars[6];  pcre_uchar occhars[6];
# Line 616  int ctype; Line 650  int ctype;
650  int length;  int length;
651  int max;  int max;
652  int min;  int min;
653  int number;  unsigned int number;
654  int offset;  int offset;
655  int op;  unsigned int op;
656  int save_capture_last;  pcre_int32 save_capture_last;
657  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
658  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
659    
# Line 737  for (;;) Line 771  for (;;)
771      unaltered. */      unaltered. */
772    
773      else if (rrc == MATCH_SKIP_ARG &&      else if (rrc == MATCH_SKIP_ARG &&
774          STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)          STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
775        {        {
776        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
777        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
# Line 747  for (;;) Line 781  for (;;)
781      case OP_FAIL:      case OP_FAIL:
782      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
783    
     /* COMMIT overrides PRUNE, SKIP, and THEN */  
   
784      case OP_COMMIT:      case OP_COMMIT:
785      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
786        eptrb, RM52);        eptrb, RM52);
787      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&  
         rrc != MATCH_THEN)  
       RRETURN(rrc);  
788      RRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
789    
     /* PRUNE overrides THEN */  
   
790      case OP_PRUNE:      case OP_PRUNE:
791      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
792        eptrb, RM51);        eptrb, RM51);
793      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
794      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
795    
796      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
# Line 773  for (;;) Line 800  for (;;)
800        eptrb, RM56);        eptrb, RM56);
801      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
802           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
803      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
804      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
805    
     /* SKIP overrides PRUNE and THEN */  
   
806      case OP_SKIP:      case OP_SKIP:
807      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
808        eptrb, RM53);        eptrb, RM53);
809      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       RRETURN(rrc);  
810      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
811      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
812    
813      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
814      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
815      pattern that ended with a SKIP for which there was not a matching MARK. */      not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
816        that failed and any that preceed it (either they also failed, or were not
817        triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
818        SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
819        set to the count of the one that failed. */
820    
821      case OP_SKIP_ARG:      case OP_SKIP_ARG:
822      if (md->ignore_skip_arg)      md->skip_arg_count++;
823        if (md->skip_arg_count <= md->ignore_skip_arg)
824        {        {
825        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
826        break;        break;
827        }        }
828      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
829        eptrb, RM57);        eptrb, RM57);
830      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
831        RRETURN(rrc);  
   
832      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
833      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
834      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
835      with the md->ignore_skip_arg flag set. */      with md->ignore_skip_arg set to the value of md->skip_arg_count. */
836    
837      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
838      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
# Line 1050  for (;;) Line 1078  for (;;)
1078        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1079    
1080        save_mark = md->mark;        save_mark = md->mark;
1081          save_capture_last = md->capture_last;
1082        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1083          RM2);          RM2);
1084    
# Line 1081  for (;;) Line 1110  for (;;)
1110        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1111        md->mark = save_mark;        md->mark = save_mark;
1112        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1113          md->capture_last = save_capture_last;
1114        }        }
1115    
1116      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
# Line 1202  for (;;) Line 1232  for (;;)
1232      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1233      matched_once = FALSE;      matched_once = FALSE;
1234      code_offset = (int)(ecode - md->start_code);      code_offset = (int)(ecode - md->start_code);
1235        save_capture_last = md->capture_last;
1236    
1237      for (;;)      for (;;)
1238        {        {
# Line 1231  for (;;) Line 1262  for (;;)
1262        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1263        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1264        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1265          md->capture_last = save_capture_last;
1266        }        }
1267    
1268      if (matched_once || allow_zero)      if (matched_once || allow_zero)
# Line 1262  for (;;) Line 1294  for (;;)
1294          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1295          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1296          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1297  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
1298          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1299  #else  #elif defined COMPILE_PCRE16
1300          cb.subject          = (PCRE_SPTR16)md->start_subject;          cb.subject          = (PCRE_SPTR16)md->start_subject;
1301    #elif defined COMPILE_PCRE32
1302            cb.subject          = (PCRE_SPTR32)md->start_subject;
1303  #endif  #endif
1304          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1305          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
# Line 1273  for (;;) Line 1307  for (;;)
1307          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1308          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1309          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1310          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last & CAPLMASK;
1311            /* Internal change requires this for API compatibility. */
1312            if (cb.capture_last == 0) cb.capture_last = -1;
1313          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1314          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1315          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1316          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1317          }          }
1318        ecode += PRIV(OP_lengths)[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1319          codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1320        }        }
1321    
1322      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1295  for (;;) Line 1332  for (;;)
1332          }          }
1333        else        else
1334          {          {
1335          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1336          condition = (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1337    
1338          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
# Line 1367  for (;;) Line 1404  for (;;)
1404    
1405        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1406          {          {
1407          int refno = offset >> 1;          unsigned int refno = offset >> 1;
1408          pcre_uchar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1409    
1410          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
# Line 1495  for (;;) Line 1532  for (;;)
1532      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1533    
1534      case OP_CLOSE:      case OP_CLOSE:
1535      number = GET2(ecode, 1);      number = GET2(ecode, 1);   /* Must be less than 65536 */
1536      offset = number << 1;      offset = number << 1;
1537    
1538  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 1503  for (;;) Line 1540  for (;;)
1540        printf("\n");        printf("\n");
1541  #endif  #endif
1542    
1543      md->capture_last = number;      md->capture_last = (md->capture_last & OVFLMASK) | number;
1544      if (offset >= md->offset_max) md->offset_overflow = TRUE; else      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1545        {        {
1546        md->offset_vector[offset] =        md->offset_vector[offset] =
1547          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
# Line 1566  for (;;) Line 1603  for (;;)
1603        }        }
1604      else condassert = FALSE;      else condassert = FALSE;
1605    
1606        /* Loop for each branch */
1607    
1608      do      do
1609        {        {
1610        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
# Line 1576  for (;;) Line 1615  for (;;)
1615          }          }
1616        md->mark = save_mark;        md->mark = save_mark;
1617    
1618        /* A COMMIT failure must fail the entire assertion, without trying any        /* See comment in the code for capturing groups above about handling
1619        subsequent branches. */        THEN. */
   
       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);  
1620    
1621        /* PCRE does not allow THEN to escape beyond an assertion; it        if (rrc == MATCH_THEN)
1622        is treated as NOMATCH. */          {
1623            next = ecode + GET(ecode,1);
1624            if (md->start_match_ptr < next &&
1625                (*ecode == OP_ALT || *next == OP_ALT))
1626              rrc = MATCH_NOMATCH;
1627            }
1628    
1629          /* Anything other than NOMATCH causes the assertion to fail. This
1630          includes COMMIT, SKIP, and PRUNE. However, this consistent approach does
1631          not always have exactly the same effect as in Perl. */
1632    
1633        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1634        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1635        }        }
1636      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1637    
1638        /* If we have tried all the alternative branches, the assertion has
1639        failed. */
1640    
1641      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1642    
# Line 1595  for (;;) Line 1644  for (;;)
1644    
1645      if (condassert) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);
1646    
1647      /* Continue from after the assertion, updating the offsets high water      /* Continue from after a successful assertion, updating the offsets high
1648      mark, since extracts may have been taken during the assertion. */      water mark, since extracts may have been taken during the assertion. */
1649    
1650      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1651      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1652      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1653      continue;      continue;
1654    
1655      /* Negative assertion: all branches must fail to match. Encountering SKIP,      /* Negative assertion: all branches must fail to match for the assertion to
1656      PRUNE, or COMMIT means we must assume failure without checking subsequent      succeed. */
     branches. */  
1657    
1658      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1659      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1617  for (;;) Line 1665  for (;;)
1665        }        }
1666      else condassert = FALSE;      else condassert = FALSE;
1667    
1668        /* Loop for each alternative branch. */
1669    
1670      do      do
1671        {        {
1672        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1673        md->mark = save_mark;        md->mark = save_mark;
1674    
1675          /* A successful match means the assertion has failed. */
1676    
1677        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1678        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)  
1679          /* See comment in the code for capturing groups above about handling
1680          THEN. */
1681    
1682          if (rrc == MATCH_THEN)
1683          {          {
1684          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          next = ecode + GET(ecode,1);
1685          break;          if (md->start_match_ptr < next &&
1686                (*ecode == OP_ALT || *next == OP_ALT))
1687              rrc = MATCH_NOMATCH;
1688          }          }
1689    
1690          /* No match on a branch means we must carry on and try the next branch.
1691          Anything else, in particular, SKIP, PRUNE, etc. causes a failure in the
1692          enclosing branch. This is a consistent approach, but does not always have
1693          the same effect as in Perl. */
1694    
1695        /* PCRE does not allow THEN to escape beyond an assertion; it is treated        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       as NOMATCH. */  
   
       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);  
1696        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1697        }        }
1698      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1699    
1700        /* All branches in the assertion failed to match. */
1701    
1702      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1703        ecode += 1 + LINK_SIZE;                /* Continue with current branch */
     ecode += 1 + LINK_SIZE;  
1704      continue;      continue;
1705    
1706      /* Move the subject pointer back. This occurs only at the start of      /* Move the subject pointer back. This occurs only at the start of
# Line 1685  for (;;) Line 1747  for (;;)
1747        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1748        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1749        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1750  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
1751        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1752  #else  #elif defined COMPILE_PCRE16
1753        cb.subject          = (PCRE_SPTR16)md->start_subject;        cb.subject          = (PCRE_SPTR16)md->start_subject;
1754    #elif defined COMPILE_PCRE32
1755          cb.subject          = (PCRE_SPTR32)md->start_subject;
1756  #endif  #endif
1757        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1758        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
# Line 1696  for (;;) Line 1760  for (;;)
1760        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1761        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1762        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1763        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last & CAPLMASK;
1764          /* Internal change requires this for API compatibility. */
1765          if (cb.capture_last == 0) cb.capture_last = -1;
1766        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1767        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1768        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1725  for (;;) Line 1791  for (;;)
1791      case OP_RECURSE:      case OP_RECURSE:
1792        {        {
1793        recursion_info *ri;        recursion_info *ri;
1794        int recno;        unsigned int recno;
1795    
1796        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1797        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
# Line 1742  for (;;) Line 1808  for (;;)
1808        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1809    
1810        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1811          new_recursive.saved_capture_last = md->capture_last;
1812        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1813        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1814        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1765  for (;;) Line 1832  for (;;)
1832              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1833    
1834        /* OK, now we can do the recursion. After processing each alternative,        /* OK, now we can do the recursion. After processing each alternative,
1835        restore the offset data. If there were nested recursions, md->recursive        restore the offset data and the last captured value. If there were nested
1836        might be changed, so reset it before looping. */        recursions, md->recursive might be changed, so reset it before looping.
1837          */
1838    
1839        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1840        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
# Line 1777  for (;;) Line 1845  for (;;)
1845            md, eptrb, RM6);            md, eptrb, RM6);
1846          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1847              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1848            md->capture_last = new_recursive.saved_capture_last;
1849          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1850          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1851            {            {
# Line 1793  for (;;) Line 1862  for (;;)
1862            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1863            }            }
1864    
1865          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it          /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1866          is treated as NOMATCH. */          recursion; they cause a NOMATCH for the entire recursion. These codes
1867            are defined in a range that can be tested for. */
1868    
1869            if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1870              RRETURN(MATCH_NOMATCH);
1871    
1872            /* Any return code other than NOMATCH is an error. */
1873    
1874          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&          if (rrc != MATCH_NOMATCH)
                  rrc != MATCH_COMMIT)  
1875            {            {
1876            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1877            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1927  for (;;) Line 2001  for (;;)
2001    
2002        /* Deal with capturing */        /* Deal with capturing */
2003    
2004        md->capture_last = number;        md->capture_last = (md->capture_last & OVFLMASK) | number;
2005        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
2006          {          {
2007          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
2008          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
# Line 2079  for (;;) Line 2153  for (;;)
2153              eptr + 1 >= md->end_subject &&              eptr + 1 >= md->end_subject &&
2154              NLBLOCK->nltype == NLTYPE_FIXED &&              NLBLOCK->nltype == NLTYPE_FIXED &&
2155              NLBLOCK->nllen == 2 &&              NLBLOCK->nllen == 2 &&
2156              *eptr == NLBLOCK->nl[0])              RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2157            {            {
2158            md->hitend = TRUE;            md->hitend = TRUE;
2159            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
# Line 2123  for (;;) Line 2197  for (;;)
2197            eptr + 1 >= md->end_subject &&            eptr + 1 >= md->end_subject &&
2198            NLBLOCK->nltype == NLTYPE_FIXED &&            NLBLOCK->nltype == NLTYPE_FIXED &&
2199            NLBLOCK->nllen == 2 &&            NLBLOCK->nllen == 2 &&
2200            *eptr == NLBLOCK->nl[0])            RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2201          {          {
2202          md->hitend = TRUE;          md->hitend = TRUE;
2203          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
# Line 2266  for (;;) Line 2340  for (;;)
2340          eptr + 1 >= md->end_subject &&          eptr + 1 >= md->end_subject &&
2341          NLBLOCK->nltype == NLTYPE_FIXED &&          NLBLOCK->nltype == NLTYPE_FIXED &&
2342          NLBLOCK->nllen == 2 &&          NLBLOCK->nllen == 2 &&
2343          *eptr == NLBLOCK->nl[0])          RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2344        {        {
2345        md->hitend = TRUE;        md->hitend = TRUE;
2346        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
# Line 2415  for (;;) Line 2489  for (;;)
2489        {        {
2490        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2491    
2492        case 0x000d:        case CHAR_CR:
2493        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2494          {          {
2495          SCHECK_PARTIAL();          SCHECK_PARTIAL();
2496          }          }
2497        else if (*eptr == 0x0a) eptr++;        else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
2498        break;        break;
2499    
2500        case 0x000a:        case CHAR_LF:
2501        break;        break;
2502    
2503        case 0x000b:        case CHAR_VT:
2504        case 0x000c:        case CHAR_FF:
2505        case 0x0085:        case CHAR_NEL:
2506    #ifndef EBCDIC
2507        case 0x2028:        case 0x2028:
2508        case 0x2029:        case 0x2029:
2509    #endif  /* Not EBCDIC */
2510        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);        if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2511        break;        break;
2512        }        }
# Line 2446  for (;;) Line 2522  for (;;)
2522      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2523      switch(c)      switch(c)
2524        {        {
2525          HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2526        default: break;        default: break;
       case 0x09:      /* HT */  
       case 0x20:      /* SPACE */  
       case 0xa0:      /* NBSP */  
       case 0x1680:    /* OGHAM SPACE MARK */  
       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
       case 0x2000:    /* EN QUAD */  
       case 0x2001:    /* EM QUAD */  
       case 0x2002:    /* EN SPACE */  
       case 0x2003:    /* EM SPACE */  
       case 0x2004:    /* THREE-PER-EM SPACE */  
       case 0x2005:    /* FOUR-PER-EM SPACE */  
       case 0x2006:    /* SIX-PER-EM SPACE */  
       case 0x2007:    /* FIGURE SPACE */  
       case 0x2008:    /* PUNCTUATION SPACE */  
       case 0x2009:    /* THIN SPACE */  
       case 0x200A:    /* HAIR SPACE */  
       case 0x202f:    /* NARROW NO-BREAK SPACE */  
       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
       case 0x3000:    /* IDEOGRAPHIC SPACE */  
       RRETURN(MATCH_NOMATCH);  
2527        }        }
2528      ecode++;      ecode++;
2529      break;      break;
# Line 2480  for (;;) Line 2537  for (;;)
2537      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2538      switch(c)      switch(c)
2539        {        {
2540          HSPACE_CASES: break;  /* Byte and multibyte cases */
2541        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
       case 0x09:      /* HT */  
       case 0x20:      /* SPACE */  
       case 0xa0:      /* NBSP */  
       case 0x1680:    /* OGHAM SPACE MARK */  
       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
       case 0x2000:    /* EN QUAD */  
       case 0x2001:    /* EM QUAD */  
       case 0x2002:    /* EN SPACE */  
       case 0x2003:    /* EM SPACE */  
       case 0x2004:    /* THREE-PER-EM SPACE */  
       case 0x2005:    /* FOUR-PER-EM SPACE */  
       case 0x2006:    /* SIX-PER-EM SPACE */  
       case 0x2007:    /* FIGURE SPACE */  
       case 0x2008:    /* PUNCTUATION SPACE */  
       case 0x2009:    /* THIN SPACE */  
       case 0x200A:    /* HAIR SPACE */  
       case 0x202f:    /* NARROW NO-BREAK SPACE */  
       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
       case 0x3000:    /* IDEOGRAPHIC SPACE */  
       break;  
2542        }        }
2543      ecode++;      ecode++;
2544      break;      break;
# Line 2514  for (;;) Line 2552  for (;;)
2552      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2553      switch(c)      switch(c)
2554        {        {
2555          VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2556        default: break;        default: break;
       case 0x0a:      /* LF */  
       case 0x0b:      /* VT */  
       case 0x0c:      /* FF */  
       case 0x0d:      /* CR */  
       case 0x85:      /* NEL */  
       case 0x2028:    /* LINE SEPARATOR */  
       case 0x2029:    /* PARAGRAPH SEPARATOR */  
       RRETURN(MATCH_NOMATCH);  
2557        }        }
2558      ecode++;      ecode++;
2559      break;      break;
# Line 2536  for (;;) Line 2567  for (;;)
2567      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2568      switch(c)      switch(c)
2569        {        {
2570          VSPACE_CASES: break;
2571        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
       case 0x0a:      /* LF */  
       case 0x0b:      /* VT */  
       case 0x0c:      /* FF */  
       case 0x0d:      /* CR */  
       case 0x85:      /* NEL */  
       case 0x2028:    /* LINE SEPARATOR */  
       case 0x2029:    /* PARAGRAPH SEPARATOR */  
       break;  
2572        }        }
2573      ecode++;      ecode++;
2574      break;      break;
# Line 2562  for (;;) Line 2586  for (;;)
2586        }        }
2587      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2588        {        {
2589          const pcre_uint32 *cp;
2590        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2591    
2592        switch(ecode[1])        switch(ecode[1])
# Line 2622  for (;;) Line 2647  for (;;)
2647            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2648          break;          break;
2649    
2650            case PT_CLIST:
2651            cp = PRIV(ucd_caseless_sets) + ecode[2];
2652            for (;;)
2653              {
2654              if (c < *cp)
2655                { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2656              if (c == *cp++)
2657                { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2658              }
2659            break;
2660    
2661            case PT_UCNC:
2662            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2663                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2664                 c >= 0xe000) == (op == OP_NOTPROP))
2665              RRETURN(MATCH_NOMATCH);
2666            break;
2667    
2668          /* This should never occur */          /* This should never occur */
2669    
2670          default:          default:
# Line 2641  for (;;) Line 2684  for (;;)
2684        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2685        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2686        }        }
2687      GETCHARINCTEST(c, eptr);      else
     if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);  
     while (eptr < md->end_subject)  
2688        {        {
2689        int len = 1;        int lgb, rgb;
2690        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }        GETCHARINCTEST(c, eptr);
2691        if (UCD_CATEGORY(c) != ucp_M) break;        lgb = UCD_GRAPHBREAK(c);
2692        eptr += len;        while (eptr < md->end_subject)
2693            {
2694            int len = 1;
2695            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2696            rgb = UCD_GRAPHBREAK(c);
2697            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2698            lgb = rgb;
2699            eptr += len;
2700            }
2701        }        }
2702      CHECK_PARTIAL();      CHECK_PARTIAL();
2703      ecode++;      ecode++;
2704      break;      break;
2705  #endif  #endif  /* SUPPORT_UCP */
2706    
2707    
2708      /* Match a back reference, possibly repeatedly. Look past the end of the      /* Match a back reference, possibly repeatedly. Look past the end of the
# Line 3162  for (;;) Line 3211  for (;;)
3211          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3212          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
3213          }          }
3214        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
3215        }        }
3216      else      else
3217  #endif  #endif
# Line 3202  for (;;) Line 3251  for (;;)
3251    
3252        if (fc < 128)        if (fc < 128)
3253          {          {
3254          if (md->lcc[fc]          pcre_uint32 cc = RAWUCHAR(eptr);
3255              != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3256          ecode++;          ecode++;
3257          eptr++;          eptr++;
3258          }          }
# Line 3214  for (;;) Line 3263  for (;;)
3263    
3264        else        else
3265          {          {
3266          unsigned int dc;          pcre_uint32 dc;
3267          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
3268          ecode += length;          ecode += length;
3269    
# Line 3324  for (;;) Line 3373  for (;;)
3373        if (length > 1)        if (length > 1)
3374          {          {
3375  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3376          unsigned int othercase;          pcre_uint32 othercase;
3377          if (op >= OP_STARI &&     /* Caseless */          if (op >= OP_STARI &&     /* Caseless */
3378              (othercase = UCD_OTHERCASE(fc)) != fc)              (othercase = UCD_OTHERCASE(fc)) != fc)
3379            oclength = PRIV(ord2utf)(othercase, occhars);            oclength = PRIV(ord2utf)(othercase, occhars);
# Line 3451  for (;;) Line 3500  for (;;)
3500    
3501        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3502          {          {
3503            pcre_uint32 cc;                 /* Faster than pcre_uchar */
3504          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3505            {            {
3506            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3507            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3508            }            }
3509          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);          cc = RAWUCHARTEST(eptr);
3510            if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3511          eptr++;          eptr++;
3512          }          }
3513        if (min == max) continue;        if (min == max) continue;
# Line 3464  for (;;) Line 3515  for (;;)
3515          {          {
3516          for (fi = min;; fi++)          for (fi = min;; fi++)
3517            {            {
3518              pcre_uint32 cc;               /* Faster than pcre_uchar */
3519            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3520            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3521            if (fi >= max) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
# Line 3472  for (;;) Line 3524  for (;;)
3524              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3525              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3526              }              }
3527            if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);            cc = RAWUCHARTEST(eptr);
3528              if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3529            eptr++;            eptr++;
3530            }            }
3531          /* Control never gets here */          /* Control never gets here */
# Line 3482  for (;;) Line 3535  for (;;)
3535          pp = eptr;          pp = eptr;
3536          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3537            {            {
3538              pcre_uint32 cc;               /* Faster than pcre_uchar */
3539            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3540              {              {
3541              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3542              break;              break;
3543              }              }
3544            if (fc != *eptr && foc != *eptr) break;            cc = RAWUCHARTEST(eptr);
3545              if (fc != cc && foc != cc) break;
3546            eptr++;            eptr++;
3547            }            }
3548    
# Line 3515  for (;;) Line 3570  for (;;)
3570            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3571            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3572            }            }
3573          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);          if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3574          }          }
3575    
3576        if (min == max) continue;        if (min == max) continue;
# Line 3532  for (;;) Line 3587  for (;;)
3587              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3588              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3589              }              }
3590            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);            if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3591            }            }
3592          /* Control never gets here */          /* Control never gets here */
3593          }          }
# Line 3546  for (;;) Line 3601  for (;;)
3601              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3602              break;              break;
3603              }              }
3604            if (fc != *eptr) break;            if (fc != RAWUCHARTEST(eptr)) break;
3605            eptr++;            eptr++;
3606            }            }
3607          if (possessive) continue;          if (possessive) continue;
# Line 3575  for (;;) Line 3630  for (;;)
3630  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3631      if (utf)      if (utf)
3632        {        {
3633        register unsigned int ch, och;        register pcre_uint32 ch, och;
3634    
3635        ecode++;        ecode++;
3636        GETCHARINC(ch, ecode);        GETCHARINC(ch, ecode);
# Line 3602  for (;;) Line 3657  for (;;)
3657      else      else
3658  #endif  #endif
3659        {        {
3660        register unsigned int ch = ecode[1];        register pcre_uint32 ch = ecode[1];
3661        c = *eptr++;        c = *eptr++;
3662        if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))        if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3663          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
# Line 3716  for (;;) Line 3771  for (;;)
3771  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3772        if (utf)        if (utf)
3773          {          {
3774          register unsigned int d;          register pcre_uint32 d;
3775          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3776            {            {
3777            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 3751  for (;;) Line 3806  for (;;)
3806  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3807          if (utf)          if (utf)
3808            {            {
3809            register unsigned int d;            register pcre_uint32 d;
3810            for (fi = min;; fi++)            for (fi = min;; fi++)
3811              {              {
3812              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
# Line 3796  for (;;) Line 3851  for (;;)
3851  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3852          if (utf)          if (utf)
3853            {            {
3854            register unsigned int d;            register pcre_uint32 d;
3855            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3856              {              {
3857              int len = 1;              int len = 1;
# Line 3853  for (;;) Line 3908  for (;;)
3908  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3909        if (utf)        if (utf)
3910          {          {
3911          register unsigned int d;          register pcre_uint32 d;
3912          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3913            {            {
3914            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
# Line 3887  for (;;) Line 3942  for (;;)
3942  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3943          if (utf)          if (utf)
3944            {            {
3945            register unsigned int d;            register pcre_uint32 d;
3946            for (fi = min;; fi++)            for (fi = min;; fi++)
3947              {              {
3948              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
# Line 3931  for (;;) Line 3986  for (;;)
3986  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3987          if (utf)          if (utf)
3988            {            {
3989            register unsigned int d;            register pcre_uint32 d;
3990            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3991              {              {
3992              int len = 1;              int len = 1;
# Line 4207  for (;;) Line 4262  for (;;)
4262              }              }
4263            break;            break;
4264    
4265              case PT_CLIST:
4266              for (i = 1; i <= min; i++)
4267                {
4268                const pcre_uint32 *cp;
4269                if (eptr >= md->end_subject)
4270                  {
4271                  SCHECK_PARTIAL();
4272                  RRETURN(MATCH_NOMATCH);
4273                  }
4274                GETCHARINCTEST(c, eptr);
4275                cp = PRIV(ucd_caseless_sets) + prop_value;
4276                for (;;)
4277                  {
4278                  if (c < *cp)
4279                    { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4280                  if (c == *cp++)
4281                    { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4282                  }
4283                }
4284              break;
4285    
4286              case PT_UCNC:
4287              for (i = 1; i <= min; i++)
4288                {
4289                if (eptr >= md->end_subject)
4290                  {
4291                  SCHECK_PARTIAL();
4292                  RRETURN(MATCH_NOMATCH);
4293                  }
4294                GETCHARINCTEST(c, eptr);
4295                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4296                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4297                     c >= 0xe000) == prop_fail_result)
4298                  RRETURN(MATCH_NOMATCH);
4299                }
4300              break;
4301    
4302            /* This should not occur */            /* This should not occur */
4303    
4304            default:            default:
# Line 4226  for (;;) Line 4318  for (;;)
4318              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4319              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4320              }              }
4321            GETCHARINCTEST(c, eptr);            else
           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);  
           while (eptr < md->end_subject)  
4322              {              {
4323              int len = 1;              int lgb, rgb;
4324              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }              GETCHARINCTEST(c, eptr);
4325              if (UCD_CATEGORY(c) != ucp_M) break;              lgb = UCD_GRAPHBREAK(c);
4326              eptr += len;             while (eptr < md->end_subject)
4327                  {
4328                  int len = 1;
4329                  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4330                  rgb = UCD_GRAPHBREAK(c);
4331                  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4332                  lgb = rgb;
4333                  eptr += len;
4334                  }
4335              }              }
4336            CHECK_PARTIAL();            CHECK_PARTIAL();
4337            }            }
# Line 4260  for (;;) Line 4358  for (;;)
4358                eptr + 1 >= md->end_subject &&                eptr + 1 >= md->end_subject &&
4359                NLBLOCK->nltype == NLTYPE_FIXED &&                NLBLOCK->nltype == NLTYPE_FIXED &&
4360                NLBLOCK->nllen == 2 &&                NLBLOCK->nllen == 2 &&
4361                *eptr == NLBLOCK->nl[0])                RAWUCHAR(eptr) == NLBLOCK->nl[0])
4362              {              {
4363              md->hitend = TRUE;              md->hitend = TRUE;
4364              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
# Line 4301  for (;;) Line 4399  for (;;)
4399              {              {
4400              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4401    
4402              case 0x000d:              case CHAR_CR:
4403              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
4404              break;              break;
4405    
4406              case 0x000a:              case CHAR_LF:
4407              break;              break;
4408    
4409              case 0x000b:              case CHAR_VT:
4410              case 0x000c:              case CHAR_FF:
4411              case 0x0085:              case CHAR_NEL:
4412    #ifndef EBCDIC
4413              case 0x2028:              case 0x2028:
4414              case 0x2029:              case 0x2029:
4415    #endif  /* Not EBCDIC */
4416              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4417              break;              break;
4418              }              }
# Line 4330  for (;;) Line 4430  for (;;)
4430            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4431            switch(c)            switch(c)
4432              {              {
4433                HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4434              default: break;              default: break;
             case 0x09:      /* HT */  
             case 0x20:      /* SPACE */  
             case 0xa0:      /* NBSP */  
             case 0x1680:    /* OGHAM SPACE MARK */  
             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
             case 0x2000:    /* EN QUAD */  
             case 0x2001:    /* EM QUAD */  
             case 0x2002:    /* EN SPACE */  
             case 0x2003:    /* EM SPACE */  
             case 0x2004:    /* THREE-PER-EM SPACE */  
             case 0x2005:    /* FOUR-PER-EM SPACE */  
             case 0x2006:    /* SIX-PER-EM SPACE */  
             case 0x2007:    /* FIGURE SPACE */  
             case 0x2008:    /* PUNCTUATION SPACE */  
             case 0x2009:    /* THIN SPACE */  
             case 0x200A:    /* HAIR SPACE */  
             case 0x202f:    /* NARROW NO-BREAK SPACE */  
             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
             case 0x3000:    /* IDEOGRAPHIC SPACE */  
             RRETURN(MATCH_NOMATCH);  
4435              }              }
4436            }            }
4437          break;          break;
# Line 4366  for (;;) Line 4447  for (;;)
4447            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4448            switch(c)            switch(c)
4449              {              {
4450                HSPACE_CASES: break;  /* Byte and multibyte cases */
4451              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
             case 0x09:      /* HT */  
             case 0x20:      /* SPACE */  
             case 0xa0:      /* NBSP */  
             case 0x1680:    /* OGHAM SPACE MARK */  
             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
             case 0x2000:    /* EN QUAD */  
             case 0x2001:    /* EM QUAD */  
             case 0x2002:    /* EN SPACE */  
             case 0x2003:    /* EM SPACE */  
             case 0x2004:    /* THREE-PER-EM SPACE */  
             case 0x2005:    /* FOUR-PER-EM SPACE */  
             case 0x2006:    /* SIX-PER-EM SPACE */  
             case 0x2007:    /* FIGURE SPACE */  
             case 0x2008:    /* PUNCTUATION SPACE */  
             case 0x2009:    /* THIN SPACE */  
             case 0x200A:    /* HAIR SPACE */  
             case 0x202f:    /* NARROW NO-BREAK SPACE */  
             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
             case 0x3000:    /* IDEOGRAPHIC SPACE */  
             break;  
4452              }              }
4453            }            }
4454          break;          break;
# Line 4402  for (;;) Line 4464  for (;;)
4464            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4465            switch(c)            switch(c)
4466              {              {
4467                VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4468              default: break;              default: break;
             case 0x0a:      /* LF */  
             case 0x0b:      /* VT */  
             case 0x0c:      /* FF */  
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
             case 0x2028:    /* LINE SEPARATOR */  
             case 0x2029:    /* PARAGRAPH SEPARATOR */  
             RRETURN(MATCH_NOMATCH);  
4469              }              }
4470            }            }
4471          break;          break;
# Line 4426  for (;;) Line 4481  for (;;)
4481            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4482            switch(c)            switch(c)
4483              {              {
4484                VSPACE_CASES: break;
4485              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
             case 0x0a:      /* LF */  
             case 0x0b:      /* VT */  
             case 0x0c:      /* FF */  
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
             case 0x2028:    /* LINE SEPARATOR */  
             case 0x2029:    /* PARAGRAPH SEPARATOR */  
             break;  
4486              }              }
4487            }            }
4488          break;          break;
# Line 4456  for (;;) Line 4504  for (;;)
4504          case OP_DIGIT:          case OP_DIGIT:
4505          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4506            {            {
4507              pcre_uint32 cc;
4508            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4509              {              {
4510              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4511              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4512              }              }
4513            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)            cc = RAWUCHAR(eptr);
4514              if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4515              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4516            eptr++;            eptr++;
4517            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
# Line 4471  for (;;) Line 4521  for (;;)
4521          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4522          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4523            {            {
4524              pcre_uint32 cc;
4525            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4526              {              {
4527              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4528              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4529              }              }
4530            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)            cc = RAWUCHAR(eptr);
4531              if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4532              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4533            eptr++;            eptr++;
4534            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
# Line 4486  for (;;) Line 4538  for (;;)
4538          case OP_WHITESPACE:          case OP_WHITESPACE:
4539          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4540            {            {
4541              pcre_uint32 cc;
4542            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4543              {              {
4544              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4545              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4546              }              }
4547            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)            cc = RAWUCHAR(eptr);
4548              if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4549              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4550            eptr++;            eptr++;
4551            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
# Line 4501  for (;;) Line 4555  for (;;)
4555          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4556          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4557            {            {
4558              pcre_uint32 cc;
4559            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4560              {              {
4561              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4562              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4563              }              }
4564            if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)            cc = RAWUCHAR(eptr);
4565              if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4566              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4567            eptr++;            eptr++;
4568            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
# Line 4516  for (;;) Line 4572  for (;;)
4572          case OP_WORDCHAR:          case OP_WORDCHAR:
4573          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4574            {            {
4575              pcre_uint32 cc;
4576            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4577              {              {
4578              SCHECK_PARTIAL();              SCHECK_PARTIAL();
4579              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4580              }              }
4581            if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)            cc = RAWUCHAR(eptr);
4582              if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4583              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4584            eptr++;            eptr++;
4585            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
# Line 4592  for (;;) Line 4650  for (;;)
4650              {              {
4651              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4652    
4653              case 0x000d:              case CHAR_CR:
4654              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4655              break;              break;
4656    
4657              case 0x000a:              case CHAR_LF:
4658              break;              break;
4659    
4660              case 0x000b:              case CHAR_VT:
4661              case 0x000c:              case CHAR_FF:
4662              case 0x0085:              case CHAR_NEL:
4663  #ifdef COMPILE_PCRE16  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4664              case 0x2028:              case 0x2028:
4665              case 0x2029:              case 0x2029:
4666  #endif  #endif
# Line 4623  for (;;) Line 4681  for (;;)
4681            switch(*eptr++)            switch(*eptr++)
4682              {              {
4683              default: break;              default: break;
4684              case 0x09:      /* HT */              HSPACE_BYTE_CASES:
4685              case 0x20:      /* SPACE */  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4686              case 0xa0:      /* NBSP */              HSPACE_MULTIBYTE_CASES:
 #ifdef COMPILE_PCRE16  
             case 0x1680:    /* OGHAM SPACE MARK */  
             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
             case 0x2000:    /* EN QUAD */  
             case 0x2001:    /* EM QUAD */  
             case 0x2002:    /* EN SPACE */  
             case 0x2003:    /* EM SPACE */  
             case 0x2004:    /* THREE-PER-EM SPACE */  
             case 0x2005:    /* FOUR-PER-EM SPACE */  
             case 0x2006:    /* SIX-PER-EM SPACE */  
             case 0x2007:    /* FIGURE SPACE */  
             case 0x2008:    /* PUNCTUATION SPACE */  
             case 0x2009:    /* THIN SPACE */  
             case 0x200A:    /* HAIR SPACE */  
             case 0x202f:    /* NARROW NO-BREAK SPACE */  
             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
             case 0x3000:    /* IDEOGRAPHIC SPACE */  
4687  #endif  #endif
4688              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4689              }              }
# Line 4660  for (;;) Line 4701  for (;;)
4701            switch(*eptr++)            switch(*eptr++)
4702              {              {
4703              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4704              case 0x09:      /* HT */              HSPACE_BYTE_CASES:
4705              case 0x20:      /* SPACE */  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4706              case 0xa0:      /* NBSP */              HSPACE_MULTIBYTE_CASES:
 #ifdef COMPILE_PCRE16  
             case 0x1680:    /* OGHAM SPACE MARK */  
             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
             case 0x2000:    /* EN QUAD */  
             case 0x2001:    /* EM QUAD */  
             case 0x2002:    /* EN SPACE */  
             case 0x2003:    /* EM SPACE */  
             case 0x2004:    /* THREE-PER-EM SPACE */  
             case 0x2005:    /* FOUR-PER-EM SPACE */  
             case 0x2006:    /* SIX-PER-EM SPACE */  
             case 0x2007:    /* FIGURE SPACE */  
             case 0x2008:    /* PUNCTUATION SPACE */  
             case 0x2009:    /* THIN SPACE */  
             case 0x200A:    /* HAIR SPACE */  
             case 0x202f:    /* NARROW NO-BREAK SPACE */  
             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
             case 0x3000:    /* IDEOGRAPHIC SPACE */  
4707  #endif  #endif
4708              break;              break;
4709              }              }
# Line 4696  for (;;) Line 4720  for (;;)
4720              }              }
4721            switch(*eptr++)            switch(*eptr++)
4722              {              {
4723              default: break;              VSPACE_BYTE_CASES:
4724              case 0x0a:      /* LF */  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4725              case 0x0b:      /* VT */              VSPACE_MULTIBYTE_CASES:
             case 0x0c:      /* FF */  
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
 #ifdef COMPILE_PCRE16  
             case 0x2028:    /* LINE SEPARATOR */  
             case 0x2029:    /* PARAGRAPH SEPARATOR */  
4726  #endif  #endif
4727              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4728                default: break;
4729              }              }
4730            }            }
4731          break;          break;
# Line 4722  for (;;) Line 4741  for (;;)
4741            switch(*eptr++)            switch(*eptr++)
4742              {              {
4743              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
4744              case 0x0a:      /* LF */              VSPACE_BYTE_CASES:
4745              case 0x0b:      /* VT */  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4746              case 0x0c:      /* FF */              VSPACE_MULTIBYTE_CASES:
             case 0x0d:      /* CR */  
             case 0x85:      /* NEL */  
 #ifdef COMPILE_PCRE16  
             case 0x2028:    /* LINE SEPARATOR */  
             case 0x2029:    /* PARAGRAPH SEPARATOR */  
4747  #endif  #endif
4748              break;              break;
4749              }              }
# Line 5007  for (;;) Line 5021  for (;;)
5021              }              }
5022            /* Control never gets here */            /* Control never gets here */
5023    
5024            /* This should never occur */            case PT_CLIST:
5025              for (fi = min;; fi++)
5026                {
5027                const pcre_uint32 *cp;
5028                RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5029                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5030                if (fi >= max) RRETURN(MATCH_NOMATCH);
5031                if (eptr >= md->end_subject)
5032                  {
5033                  SCHECK_PARTIAL();
5034                  RRETURN(MATCH_NOMATCH);
5035                  }
5036                GETCHARINCTEST(c, eptr);
5037                cp = PRIV(ucd_caseless_sets) + prop_value;
5038                for (;;)
5039                  {
5040                  if (c < *cp)
5041                    { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5042                  if (c == *cp++)
5043                    { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5044                  }
5045                }
5046              /* Control never gets here */
5047    
5048              case PT_UCNC:
5049              for (fi = min;; fi++)
5050                {
5051                RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
5052                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5053                if (fi >= max) RRETURN(MATCH_NOMATCH);
5054                if (eptr >= md->end_subject)
5055                  {
5056                  SCHECK_PARTIAL();
5057                  RRETURN(MATCH_NOMATCH);
5058                  }
5059                GETCHARINCTEST(c, eptr);
5060                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5061                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5062                     c >= 0xe000) == prop_fail_result)
5063                  RRETURN(MATCH_NOMATCH);
5064                }
5065              /* Control never gets here */
5066    
5067              /* This should never occur */
5068            default:            default:
5069            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5070            }            }
# Line 5029  for (;;) Line 5085  for (;;)
5085              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5086              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
5087              }              }
5088            GETCHARINCTEST(c, eptr);            else
           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);  
           while (eptr < md->end_subject)  
5089              {              {
5090              int len = 1;              int lgb, rgb;
5091              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }              GETCHARINCTEST(c, eptr);
5092              if (UCD_CATEGORY(c) != ucp_M) break;              lgb = UCD_GRAPHBREAK(c);
5093              eptr += len;              while (eptr < md->end_subject)
5094                  {
5095                  int len = 1;
5096                  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5097                  rgb = UCD_GRAPHBREAK(c);
5098                  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5099                  lgb = rgb;
5100                  eptr += len;
5101                  }
5102              }              }
5103            CHECK_PARTIAL();            CHECK_PARTIAL();
5104            }            }
# Line 5082  for (;;) Line 5144  for (;;)
5144              switch(c)              switch(c)
5145                {                {
5146                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
5147                case 0x000d:                case CHAR_CR:
5148                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
5149                break;                break;
5150                case 0x000a:  
5151                  case CHAR_LF:
5152                break;                break;
5153    
5154                case 0x000b:                case CHAR_VT:
5155                case 0x000c:                case CHAR_FF:
5156                case 0x0085:                case CHAR_NEL:
5157    #ifndef EBCDIC
5158                case 0x2028:                case 0x2028:
5159                case 0x2029:                case 0x2029:
5160    #endif  /* Not EBCDIC */
5161                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5162                break;                break;
5163                }                }
# Line 5101  for (;;) Line 5166  for (;;)
5166              case OP_NOT_HSPACE:              case OP_NOT_HSPACE:
5167              switch(c)              switch(c)
5168                {                {
5169                  HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5170                default: break;                default: break;
               case 0x09:      /* HT */  
               case 0x20:      /* SPACE */  
               case 0xa0:      /* NBSP */  
               case 0x1680:    /* OGHAM SPACE MARK */  
               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
               case 0x2000:    /* EN QUAD */  
               case 0x2001:    /* EM QUAD */  
               case 0x2002:    /* EN SPACE */  
               case 0x2003:    /* EM SPACE */  
               case 0x2004:    /* THREE-PER-EM SPACE */  
               case 0x2005:    /* FOUR-PER-EM SPACE */  
               case 0x2006:    /* SIX-PER-EM SPACE */  
               case 0x2007:    /* FIGURE SPACE */  
               case 0x2008:    /* PUNCTUATION SPACE */  
               case 0x2009:    /* THIN SPACE */  
               case 0x200A:    /* HAIR SPACE */  
               case 0x202f:    /* NARROW NO-BREAK SPACE */  
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
               case 0x3000:    /* IDEOGRAPHIC SPACE */  
               RRETURN(MATCH_NOMATCH);  
5171                }                }
5172              break;              break;
5173    
5174              case OP_HSPACE:              case OP_HSPACE:
5175              switch(c)              switch(c)
5176                {                {
5177                  HSPACE_CASES: break;
5178                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
               case 0x09:      /* HT */  
               case 0x20:      /* SPACE */  
               case 0xa0:      /* NBSP */  
               case 0x1680:    /* OGHAM SPACE MARK */  
               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
               case 0x2000:    /* EN QUAD */  
               case 0x2001:    /* EM QUAD */  
               case 0x2002:    /* EN SPACE */  
               case 0x2003:    /* EM SPACE */  
               case 0x2004:    /* THREE-PER-EM SPACE */  
               case 0x2005:    /* FOUR-PER-EM SPACE */  
               case 0x2006:    /* SIX-PER-EM SPACE */  
               case 0x2007:    /* FIGURE SPACE */  
               case 0x2008:    /* PUNCTUATION SPACE */  
               case 0x2009:    /* THIN SPACE */  
               case 0x200A:    /* HAIR SPACE */  
               case 0x202f:    /* NARROW NO-BREAK SPACE */  
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
               case 0x3000:    /* IDEOGRAPHIC SPACE */  
               break;  
5179                }                }
5180              break;              break;
5181    
5182              case OP_NOT_VSPACE:              case OP_NOT_VSPACE:
5183              switch(c)              switch(c)
5184                {                {
5185                  VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5186                default: break;                default: break;
               case 0x0a:      /* LF */  
               case 0x0b:      /* VT */  
               case 0x0c:      /* FF */  
               case 0x0d:      /* CR */  
               case 0x85:      /* NEL */  
               case 0x2028:    /* LINE SEPARATOR */  
               case 0x2029:    /* PARAGRAPH SEPARATOR */  
               RRETURN(MATCH_NOMATCH);  
5187                }                }
5188              break;              break;
5189    
5190              case OP_VSPACE:              case OP_VSPACE:
5191              switch(c)              switch(c)
5192                {                {
5193                  VSPACE_CASES: break;
5194                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
               case 0x0a:      /* LF */  
               case 0x0b:      /* VT */  
               case 0x0c:      /* FF */  
               case 0x0d:      /* CR */  
               case 0x85:      /* NEL */  
               case 0x2028:    /* LINE SEPARATOR */  
               case 0x2029:    /* PARAGRAPH SEPARATOR */  
               break;  
5195                }                }
5196              break;              break;
5197    
# Line 5256  for (;;) Line 5269  for (;;)
5269              switch(c)              switch(c)
5270                {                {
5271                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
5272                case 0x000d:                case CHAR_CR:
5273                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5274                break;                break;
5275    
5276                case 0x000a:                case CHAR_LF:
5277                break;                break;
5278    
5279                case 0x000b:                case CHAR_VT:
5280                case 0x000c:                case CHAR_FF:
5281                case 0x0085:                case CHAR_NEL:
5282  #ifdef COMPILE_PCRE16  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5283                case 0x2028:                case 0x2028:
5284                case 0x2029:                case 0x2029:
5285  #endif  #endif
# Line 5279  for (;;) Line 5292  for (;;)
5292              switch(c)              switch(c)
5293                {                {
5294                default: break;                default: break;
5295                case 0x09:      /* HT */                HSPACE_BYTE_CASES:
5296                case 0x20:      /* SPACE */  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5297                case 0xa0:      /* NBSP */                HSPACE_MULTIBYTE_CASES:
 #ifdef COMPILE_PCRE16  
               case 0x1680:    /* OGHAM SPACE MARK */  
               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
               case 0x2000:    /* EN QUAD */  
               case 0x2001:    /* EM QUAD */  
               case 0x2002:    /* EN SPACE */  
               case 0x2003:    /* EM SPACE */  
               case 0x2004:    /* THREE-PER-EM SPACE */  
               case 0x2005:    /* FOUR-PER-EM SPACE */  
               case 0x2006:    /* SIX-PER-EM SPACE */  
               case 0x2007:    /* FIGURE SPACE */  
               case 0x2008:    /* PUNCTUATION SPACE */  
               case 0x2009:    /* THIN SPACE */  
               case 0x200A:    /* HAIR SPACE */  
               case 0x202f:    /* NARROW NO-BREAK SPACE */  
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
               case 0x3000:    /* IDEOGRAPHIC SPACE */  
5298  #endif  #endif
5299                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5300                }                }
# Line 5308  for (;;) Line 5304  for (;;)
5304              switch(c)              switch(c)
5305                {                {
5306                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
5307                case 0x09:      /* HT */                HSPACE_BYTE_CASES:
5308                case 0x20:      /* SPACE */  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5309                case 0xa0:      /* NBSP */                HSPACE_MULTIBYTE_CASES:
 #ifdef COMPILE_PCRE16  
               case 0x1680:    /* OGHAM SPACE MARK */  
               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
               case 0x2000:    /* EN QUAD */  
               case 0x2001:    /* EM QUAD */  
               case 0x2002:    /* EN SPACE */  
               case 0x2003:    /* EM SPACE */  
               case 0x2004:    /* THREE-PER-EM SPACE */  
               case 0x2005:    /* FOUR-PER-EM SPACE */  
               case 0x2006:    /* SIX-PER-EM SPACE */  
               case 0x2007:    /* FIGURE SPACE */  
               case 0x2008:    /* PUNCTUATION SPACE */  
               case 0x2009:    /* THIN SPACE */  
               case 0x200A:    /* HAIR SPACE */  
               case 0x202f:    /* NARROW NO-BREAK SPACE */  
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
               case 0x3000:    /* IDEOGRAPHIC SPACE */  
5310  #endif  #endif
5311                break;                break;
5312                }                }
# Line 5337  for (;;) Line 5316  for (;;)
5316              switch(c)              switch(c)
5317                {                {
5318                default: break;                default: break;
5319                case 0x0a:      /* LF */                VSPACE_BYTE_CASES:
5320                case 0x0b:      /* VT */  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5321                case 0x0c:      /* FF */                VSPACE_MULTIBYTE_CASES:
               case 0x0d:      /* CR */  
               case 0x85:      /* NEL */  
 #ifdef COMPILE_PCRE16  
               case 0x2028:    /* LINE SEPARATOR */  
               case 0x2029:    /* PARAGRAPH SEPARATOR */  
5322  #endif  #endif
5323                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5324                }                }
# Line 5354  for (;;) Line 5328  for (;;)
5328              switch(c)              switch(c)
5329                {                {
5330                default: RRETURN(MATCH_NOMATCH);                default: RRETURN(MATCH_NOMATCH);
5331                case 0x0a:      /* LF */                VSPACE_BYTE_CASES:
5332                case 0x0b:      /* VT */  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5333                case 0x0c:      /* FF */                VSPACE_MULTIBYTE_CASES:
               case 0x0d:      /* CR */  
               case 0x85:      /* NEL */  
 #ifdef COMPILE_PCRE16  
               case 0x2028:    /* LINE SEPARATOR */  
               case 0x2029:    /* PARAGRAPH SEPARATOR */  
5334  #endif  #endif
5335                break;                break;
5336                }                }
# Line 5565  for (;;) Line 5534  for (;;)
5534              }              }
5535            break;            break;
5536    
5537              case PT_CLIST:
5538              for (i = min; i < max; i++)
5539                {
5540                const pcre_uint32 *cp;
5541                int len = 1;
5542                if (eptr >= md->end_subject)
5543                  {
5544                  SCHECK_PARTIAL();
5545                  break;
5546                  }
5547                GETCHARLENTEST(c, eptr, len);
5548                cp = PRIV(ucd_caseless_sets) + prop_value;
5549                for (;;)
5550                  {
5551                  if (c < *cp)
5552                    { if (prop_fail_result) break; else goto GOT_MAX; }
5553                  if (c == *cp++)
5554                    { if (prop_fail_result) goto GOT_MAX; else break; }
5555                  }
5556                eptr += len;
5557                }
5558              GOT_MAX:
5559              break;
5560    
5561              case PT_UCNC:
5562              for (i = min; i < max; i++)
5563                {
5564                int len = 1;
5565                if (eptr >= md->end_subject)
5566                  {
5567                  SCHECK_PARTIAL();
5568                  break;
5569                  }
5570                GETCHARLENTEST(c, eptr, len);
5571                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5572                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5573                     c >= 0xe000) == prop_fail_result)
5574                  break;
5575                eptr += len;
5576                }
5577              break;
5578    
5579            default:            default:
5580            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5581            }            }
# Line 5588  for (;;) Line 5599  for (;;)
5599          {          {
5600          for (i = min; i < max; i++)          for (i = min; i < max; i++)
5601            {            {
           int len = 1;  
5602            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
5603              {              {
5604              SCHECK_PARTIAL();              SCHECK_PARTIAL();
5605              break;              break;
5606              }              }
5607            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }            else
           if (UCD_CATEGORY(c) == ucp_M) break;  
           eptr += len;  
           while (eptr < md->end_subject)  
5608              {              {
5609              len = 1;              int lgb, rgb;
5610              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }              GETCHARINCTEST(c, eptr);
5611              if (UCD_CATEGORY(c) != ucp_M) break;              lgb = UCD_GRAPHBREAK(c);
5612              eptr += len;              while (eptr < md->end_subject)
5613                  {
5614                  int len = 1;
5615                  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5616                  rgb = UCD_GRAPHBREAK(c);
5617                  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5618                  lgb = rgb;
5619                  eptr += len;
5620                  }
5621              }              }
5622            CHECK_PARTIAL();            CHECK_PARTIAL();
5623            }            }
# Line 5652  for (;;) Line 5667  for (;;)
5667                    eptr + 1 >= md->end_subject &&                    eptr + 1 >= md->end_subject &&
5668                    NLBLOCK->nltype == NLTYPE_FIXED &&                    NLBLOCK->nltype == NLTYPE_FIXED &&
5669                    NLBLOCK->nllen == 2 &&                    NLBLOCK->nllen == 2 &&
5670                    *eptr == NLBLOCK->nl[0])                    RAWUCHAR(eptr) == NLBLOCK->nl[0])
5671                  {                  {
5672                  md->hitend = TRUE;                  md->hitend = TRUE;
5673                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
# Line 5678  for (;;) Line 5693  for (;;)
5693                    eptr + 1 >= md->end_subject &&                    eptr + 1 >= md->end_subject &&
5694                    NLBLOCK->nltype == NLTYPE_FIXED &&                    NLBLOCK->nltype == NLTYPE_FIXED &&
5695                    NLBLOCK->nllen == 2 &&                    NLBLOCK->nllen == 2 &&
5696                    *eptr == NLBLOCK->nl[0])                    RAWUCHAR(eptr) == NLBLOCK->nl[0])
5697                  {                  {
5698                  md->hitend = TRUE;                  md->hitend = TRUE;
5699                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
# Line 5732  for (;;) Line 5747  for (;;)
5747                break;                break;
5748                }                }
5749              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
5750              if (c == 0x000d)              if (c == CHAR_CR)
5751                {                {
5752                if (++eptr >= md->end_subject) break;                if (++eptr >= md->end_subject) break;
5753                if (*eptr == 0x000a) eptr++;                if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
5754                }                }
5755              else              else
5756                {                {
5757                if (c != 0x000a &&                if (c != CHAR_LF &&
5758                    (md->bsr_anycrlf ||                    (md->bsr_anycrlf ||
5759                     (c != 0x000b && c != 0x000c &&                     (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5760                      c != 0x0085 && c != 0x2028 && c != 0x2029)))  #ifndef EBCDIC
5761                        && c != 0x2028 && c != 0x2029
5762    #endif  /* Not EBCDIC */
5763                        )))
5764                  break;                  break;
5765                eptr += len;                eptr += len;
5766                }                }
# Line 5763  for (;;) Line 5781  for (;;)
5781              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
5782              switch(c)              switch(c)
5783                {                {
5784                  HSPACE_CASES: gotspace = TRUE; break;
5785                default: gotspace = FALSE; break;                default: gotspace = FALSE; break;
               case 0x09:      /* HT */  
               case 0x20:      /* SPACE */  
               case 0xa0:      /* NBSP */  
               case 0x1680:    /* OGHAM SPACE MARK */  
               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
               case 0x2000:    /* EN QUAD */  
               case 0x2001:    /* EM QUAD */  
               case 0x2002:    /* EN SPACE */  
               case 0x2003:    /* EM SPACE */  
               case 0x2004:    /* THREE-PER-EM SPACE */  
               case 0x2005:    /* FOUR-PER-EM SPACE */  
               case 0x2006:    /* SIX-PER-EM SPACE */  
               case 0x2007:    /* FIGURE SPACE */  
               case 0x2008:    /* PUNCTUATION SPACE */  
               case 0x2009:    /* THIN SPACE */  
               case 0x200A:    /* HAIR SPACE */  
               case 0x202f:    /* NARROW NO-BREAK SPACE */  
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
               case 0x3000:    /* IDEOGRAPHIC SPACE */  
               gotspace = TRUE;  
               break;  
5786                }                }
5787              if (gotspace == (ctype == OP_NOT_HSPACE)) break;              if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5788              eptr += len;              eptr += len;
# Line 5805  for (;;) Line 5803  for (;;)
5803              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
5804              switch(c)              switch(c)
5805                {                {
5806                  VSPACE_CASES: gotspace = TRUE; break;
5807                default: gotspace = FALSE; break;                default: gotspace = FALSE; break;
               case 0x0a:      /* LF */  
               case 0x0b:      /* VT */  
               case 0x0c:      /* FF */  
               case 0x0d:      /* CR */  
               case 0x85:      /* NEL */  
               case 0x2028:    /* LINE SEPARATOR */  
               case 0x2029:    /* PARAGRAPH SEPARATOR */  
               gotspace = TRUE;  
               break;  
5808                }                }
5809              if (gotspace == (ctype == OP_NOT_VSPACE)) break;              if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5810              eptr += len;              eptr += len;
# Line 5928  for (;;) Line 5918  for (;;)
5918            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5919            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
5920            BACKCHAR(eptr);            BACKCHAR(eptr);
5921            if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
5922                eptr[-1] == '\r') eptr--;                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
5923            }            }
5924          }          }
5925        else        else
# Line 5980  for (;;) Line 5970  for (;;)
5970                break;                break;
5971                }                }
5972              c = *eptr;              c = *eptr;
5973              if (c == 0x000d)              if (c == CHAR_CR)
5974                {                {
5975                if (++eptr >= md->end_subject) break;                if (++eptr >= md->end_subject) break;
5976                if (*eptr == 0x000a) eptr++;                if (*eptr == CHAR_LF) eptr++;
5977                }                }
5978              else              else
5979                {                {
5980                if (c != 0x000a && (md->bsr_anycrlf ||                if (c != CHAR_LF && (md->bsr_anycrlf ||
5981                  (c != 0x000b && c != 0x000c && c != 0x0085                   (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5982  #ifdef COMPILE_PCRE16  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5983                  && c != 0x2028 && c != 0x2029                   && c != 0x2028 && c != 0x2029
5984  #endif  #endif
5985                  ))) break;                   ))) break;
5986                eptr++;                eptr++;
5987                }                }
5988              }              }
# Line 6006  for (;;) Line 5996  for (;;)
5996                SCHECK_PARTIAL();                SCHECK_PARTIAL();
5997                break;                break;
5998                }                }
5999              c = *eptr;              switch(*eptr)
6000              if (c == 0x09 || c == 0x20 || c == 0xa0                {
6001  #ifdef COMPILE_PCRE16                default: eptr++; break;
6002                || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)                HSPACE_BYTE_CASES:
6003                || c == 0x202f || c == 0x205f || c == 0x3000  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6004                  HSPACE_MULTIBYTE_CASES:
6005  #endif  #endif
6006                ) break;                goto ENDLOOP00;
6007              eptr++;                }
6008              }              }
6009              ENDLOOP00:
6010            break;            break;
6011    
6012            case OP_HSPACE:            case OP_HSPACE:
# Line 6025  for (;;) Line 6017  for (;;)
6017                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6018                break;                break;
6019                }                }
6020              c = *eptr;              switch(*eptr)
6021              if (c != 0x09 && c != 0x20 && c != 0xa0                {
6022  #ifdef COMPILE_PCRE16                default: goto ENDLOOP01;
6023                && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)                HSPACE_BYTE_CASES:
6024                && c != 0x202f && c != 0x205f && c != 0x3000  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6025                  HSPACE_MULTIBYTE_CASES:
6026  #endif  #endif
6027                ) break;                eptr++; break;
6028              eptr++;                }
6029              }              }
6030              ENDLOOP01:
6031            break;            break;
6032    
6033            case OP_NOT_VSPACE:            case OP_NOT_VSPACE:
# Line 6044  for (;;) Line 6038  for (;;)
6038                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6039                break;                break;
6040                }                }
6041              c = *eptr;              switch(*eptr)
6042              if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85                {
6043  #ifdef COMPILE_PCRE16                default: eptr++; break;
6044                || c == 0x2028 || c == 0x2029                VSPACE_BYTE_CASES:
6045    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6046                  VSPACE_MULTIBYTE_CASES:
6047  #endif  #endif
6048                ) break;                goto ENDLOOP02;
6049              eptr++;                }
6050              }              }
6051              ENDLOOP02:
6052            break;            break;
6053    
6054            case OP_VSPACE:            case OP_VSPACE:
# Line 6062  for (;;) Line 6059  for (;;)
6059                SCHECK_PARTIAL();                SCHECK_PARTIAL();
6060                break;                break;
6061                }                }
6062              c = *eptr;              switch(*eptr)
6063              if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85                {
6064  #ifdef COMPILE_PCRE16                default: goto ENDLOOP03;
6065                && c != 0x2028 && c != 0x2029                VSPACE_BYTE_CASES:
6066    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6067                  VSPACE_MULTIBYTE_CASES:
6068  #endif  #endif
6069                ) break;                eptr++; break;
6070              eptr++;                }
6071              }              }
6072              ENDLOOP03:
6073            break;            break;
6074    
6075            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
# Line 6166  for (;;) Line 6166  for (;;)
6166            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6167            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6168            eptr--;            eptr--;
6169            if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&            if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6170                eptr[-1] == '\r') eptr--;                eptr[-1] == CHAR_CR) eptr--;
6171            }            }
6172          }          }
6173    
# Line 6217  switch (frame->Xwhere) Line 6217  switch (frame->Xwhere)
6217    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6218  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6219    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6220    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
6221  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6222  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6223    default:    default:
6224    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
   
 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);  
   
6225    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6226    }    }
6227  #undef LBL  #undef LBL
# Line 6336  Returns:          > 0 => success; value Line 6333  Returns:          > 0 => success; value
6333                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
6334  */  */
6335    
6336  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
6337  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6338  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6339    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6340    int offsetcount)    int offsetcount)
6341  #else  #elif defined COMPILE_PCRE16
6342  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6343  pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,  pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6344    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6345    int offsetcount)    int offsetcount)
6346    #elif defined COMPILE_PCRE32
6347    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6348    pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6349      PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6350      int offsetcount)
6351  #endif  #endif
6352  {  {
6353  int rc, ocount, arg_offset_max;  int rc, ocount, arg_offset_max;
# Line 6368  const pcre_uint8 *start_bits = NULL; Line 6370  const pcre_uint8 *start_bits = NULL;
6370  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6371  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6372  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6373    PCRE_PUCHAR match_partial;
6374  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6375    
6376  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6399  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 6402  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
6402  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6403    return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6404  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6405    if (length < 0) return PCRE_ERROR_BADLENGTH;
6406  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6407    
6408  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 6436  if (utf && (options & PCRE_NO_UTF8_CHECK Line 6440  if (utf && (options & PCRE_NO_UTF8_CHECK
6440        offsets[0] = erroroffset;        offsets[0] = erroroffset;
6441        offsets[1] = errorcode;        offsets[1] = errorcode;
6442        }        }
6443  #ifdef COMPILE_PCRE16  #if defined COMPILE_PCRE8
     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?  
       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;  
 #else  
6444      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6445        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6446    #elif defined COMPILE_PCRE16
6447        return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6448          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6449    #elif defined COMPILE_PCRE32
6450        return PCRE_ERROR_BADUTF32;
6451  #endif  #endif
6452      }      }
6453    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6454    /* Check that a start_offset points to the start of a UTF character. */    /* Check that a start_offset points to the start of a UTF character. */
6455    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
6456        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6457      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
6458    #endif
6459    }    }
6460  #endif  #endif
6461    
# Line 6462  if (extra_data != NULL Line 6469  if (extra_data != NULL
6469      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6470                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6471      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
6472      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |  
                     PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)  
6473    {    {
6474    rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,    rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6475         start_offset, options, offsets, offsetcount);         start_offset, options, offsets, offsetcount);
6476    
6477    /* PCRE_ERROR_NULL means that the selected normal or partial matching    /* PCRE_ERROR_NULL means that the selected normal or partial matching
6478    mode is not compiled. In this case we simply fallback to interpreter. */    mode is not compiled. In this case we simply fallback to interpreter. */
6479    
6480    if (rc != PCRE_ERROR_NULL) return rc;    if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6481    }    }
6482  #endif  #endif
6483    
# Line 6534  end_subject = md->end_subject; Line 6539  end_subject = md->end_subject;
6539  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6540  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
6541  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6542  md->ignore_skip_arg = FALSE;  md->ignore_skip_arg = 0;
6543    
6544  /* Some options are unpacked into BOOL variables in the hope that testing  /* Some options are unpacked into BOOL variables in the hope that testing
6545  them will be faster than individual option bits. */  them will be faster than individual option bits. */
# Line 6644  if (re->top_backref > 0 && re->top_backr Line 6649  if (re->top_backref > 0 && re->top_backr
6649    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
6650    }    }
6651  else md->offset_vector = offsets;  else md->offset_vector = offsets;
   
6652  md->offset_end = ocount;  md->offset_end = ocount;
6653  md->offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
6654  md->offset_overflow = FALSE;  md->capture_last = 0;
 md->capture_last = -1;  
6655    
6656  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6657  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
# Line 6756  for(;;) Line 6759  for(;;)
6759    
6760      if (has_first_char)      if (has_first_char)
6761        {        {
6762          pcre_uchar smc;
6763    
6764        if (first_char != first_char2)        if (first_char != first_char2)
6765          while (start_match < end_subject &&          while (start_match < end_subject &&
6766              *start_match != first_char && *start_match != first_char2)            (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
6767            start_match++;            start_match++;
6768        else        else
6769          while (start_match < end_subject && *start_match != first_char)          while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
6770            start_match++;            start_match++;
6771        }        }
6772    
# Line 6793  for(;;) Line 6798  for(;;)
6798          if (start_match[-1] == CHAR_CR &&          if (start_match[-1] == CHAR_CR &&
6799               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6800               start_match < end_subject &&               start_match < end_subject &&
6801               *start_match == CHAR_NL)               RAWUCHARTEST(start_match) == CHAR_NL)
6802            start_match++;            start_match++;
6803          }          }
6804        }        }
# Line 6804  for(;;) Line 6809  for(;;)
6809        {        {
6810        while (start_match < end_subject)        while (start_match < end_subject)
6811          {          {
6812          register unsigned int c = *start_match;          register pcre_uint32 c = RAWUCHARTEST(start_match);
6813  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
6814          if (c > 255) c = 255;          if (c > 255) c = 255;
6815  #endif  #endif
# Line 6872  for(;;) Line 6877  for(;;)
6877            {            {
6878            while (p < end_subject)            while (p < end_subject)
6879              {              {
6880              register int pp = *p++;              register pcre_uint32 pp = RAWUCHARINCTEST(p);
6881              if (pp == req_char || pp == req_char2) { p--; break; }              if (pp == req_char || pp == req_char2) { p--; break; }
6882              }              }
6883            }            }
# Line 6880  for(;;) Line 6885  for(;;)
6885            {            {
6886            while (p < end_subject)            while (p < end_subject)
6887              {              {
6888              if (*p++ == req_char) { p--; break; }              if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
6889              }              }
6890            }            }
6891    
# Line 6916  for(;;) Line 6921  for(;;)
6921    md->match_call_count = 0;    md->match_call_count = 0;
6922    md->match_function_type = 0;    md->match_function_type = 0;
6923    md->end_offset_top = 0;    md->end_offset_top = 0;
6924      md->skip_arg_count = 0;
6925    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6926    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
6927        {
6928        start_partial = md->start_used_ptr;
6929        match_partial = start_match;
6930        }
6931    
6932    switch(rc)    switch(rc)
6933      {      {
# Line 6930  for(;;) Line 6940  for(;;)
6940    
6941      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6942      new_start_match = start_match;      new_start_match = start_match;
6943      md->ignore_skip_arg = TRUE;      md->ignore_skip_arg = md->skip_arg_count;
6944      break;      break;
6945    
6946      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is no
6947      same as the match we have just done, treat it as NOMATCH. */      greater than the match we have just done, treat it as NOMATCH. */
6948    
6949      case MATCH_SKIP:      case MATCH_SKIP:
6950      if (md->start_match_ptr != start_match)      if (md->start_match_ptr > start_match)
6951        {        {
6952        new_start_match = md->start_match_ptr;        new_start_match = md->start_match_ptr;
6953        break;        break;
# Line 6945  for(;;) Line 6955  for(;;)
6955      /* Fall through */      /* Fall through */
6956    
6957      /* NOMATCH and PRUNE advance by one character. THEN at this level acts      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6958      exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */      exactly like PRUNE. Unset ignore SKIP-with-argument. */
6959    
6960      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6961      case MATCH_PRUNE:      case MATCH_PRUNE:
6962      case MATCH_THEN:      case MATCH_THEN:
6963      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = 0;
6964      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6965  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
6966      if (utf)      if (utf)
# Line 7043  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7053  if (rc == MATCH_MATCH || rc == MATCH_ACC
7053          (arg_offset_max - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
7054        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
7055        }        }
7056      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7057      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
7058      (PUBL(free))(md->offset_vector);      (PUBL(free))(md->offset_vector);
7059      }      }
# Line 7051  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7061  if (rc == MATCH_MATCH || rc == MATCH_ACC
7061    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
7062    too many to fit into the vector. */    too many to fit into the vector. */
7063    
7064    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?    rc = ((md->capture_last & OVFLBIT) != 0 &&
7065             md->end_offset_top >= arg_offset_max)?
7066      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
7067    
7068    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
# Line 7124  if (start_partial != NULL) Line 7135  if (start_partial != NULL)
7135      {      {
7136      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7137      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7138        if (offsetcount > 2)
7139          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7140      }      }
7141    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7142    }    }

Legend:
Removed from v.976  
changed lines
  Added in v.1298

  ViewVC Help
Powered by ViewVC 1.1.5