/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1102 by chpe, Tue Oct 16 15:56:34 2012 UTC revision 1313 by ph10, Wed Apr 24 12:07:09 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59    /* The md->capture_last field uses the lower 16 bits for the last captured
60    substring (which can never be greater than 65535) and a bit in the top half
61    to mean "capture vector overflowed". This odd way of doing things was
62    implemented when it was realized that preserving and restoring the overflow bit
63    whenever the last capture number was saved/restored made for a neater
64    interface, and doing it this way saved on (a) another variable, which would
65    have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66    separate set of save/restore instructions. The following defines are used in
67    implementing this. */
68    
69    #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70    #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71    #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72    
73  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
74  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
75  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
# Line 73  defined PCRE_ERROR_xxx codes, which are Line 87  defined PCRE_ERROR_xxx codes, which are
87  negative to avoid the external error codes. */  negative to avoid the external error codes. */
88    
89  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
90  #define MATCH_COMMIT       (-998)  #define MATCH_KETRPOS      (-998)
91  #define MATCH_KETRPOS      (-997)  #define MATCH_ONCE         (-997)
92  #define MATCH_ONCE         (-996)  /* The next 5 must be kept together and in sequence so that a test that checks
93    for any one of them can use a range. */
94    #define MATCH_COMMIT       (-996)
95  #define MATCH_PRUNE        (-995)  #define MATCH_PRUNE        (-995)
96  #define MATCH_SKIP         (-994)  #define MATCH_SKIP         (-994)
97  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
98  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
99    #define MATCH_BACKTRACK_MAX MATCH_THEN
100    #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101    
102  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
103  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 149  match_ref(int offset, register PCRE_PUCH Line 167  match_ref(int offset, register PCRE_PUCH
167  {  {
168  PCRE_PUCHAR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
169  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170    #ifdef SUPPORT_UTF
171  BOOL utf = md->utf;  BOOL utf = md->utf;
172    #endif
173    
174  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
175  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 197  if (caseless) Line 217  if (caseless)
217        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
218        GETCHARINC(d, p);        GETCHARINC(d, p);
219        ur = GET_UCD(d);        ur = GET_UCD(d);
220        if (c != d && c != d + ur->other_case)        if (c != d && c != d + ur->other_case)
221          {          {
222          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
223          for (;;)          for (;;)
224            {            {
225            if (c < *pp) return -1;            if (c < *pp) return -1;
# Line 217  if (caseless) Line 237  if (caseless)
237      {      {
238      while (length-- > 0)      while (length-- > 0)
239        {        {
240        pcre_uchar cc, cp;        pcre_uint32 cc, cp;
241        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
242        cc = RAWUCHARTEST(eptr);        cc = RAWUCHARTEST(eptr);
243        cp = RAWUCHARTEST(p);        cp = RAWUCHARTEST(p);
# Line 292  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 312  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
312         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
313         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
314         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
315         RM61,  RM62, RM63, RM64, RM65, RM66 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
316    
317  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
318  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 401  typedef struct heapframe { Line 421  typedef struct heapframe {
421    
422  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
423    int Xprop_type;    int Xprop_type;
424    int Xprop_value;    unsigned int Xprop_value;
425    int Xprop_fail_result;    int Xprop_fail_result;
426    int Xoclength;    int Xoclength;
427    pcre_uchar Xocchars[6];    pcre_uchar Xocchars[6];
# Line 414  typedef struct heapframe { Line 434  typedef struct heapframe {
434    int Xlength;    int Xlength;
435    int Xmax;    int Xmax;
436    int Xmin;    int Xmin;
437    int Xnumber;    unsigned int Xnumber;
438    int Xoffset;    int Xoffset;
439    int Xop;    unsigned int Xop;
440    int Xsave_capture_last;    pcre_int32 Xsave_capture_last;
441    int Xsave_offset1, Xsave_offset2, Xsave_offset3;    int Xsave_offset1, Xsave_offset2, Xsave_offset3;
442    int Xstacksave[REC_STACK_SAVE_MAX];    int Xstacksave[REC_STACK_SAVE_MAX];
443    
# Line 619  BOOL prev_is_word; Line 639  BOOL prev_is_word;
639    
640  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
641  int prop_type;  int prop_type;
642  int prop_value;  unsigned int prop_value;
643  int prop_fail_result;  int prop_fail_result;
644  int oclength;  int oclength;
645  pcre_uchar occhars[6];  pcre_uchar occhars[6];
# Line 630  int ctype; Line 650  int ctype;
650  int length;  int length;
651  int max;  int max;
652  int min;  int min;
653  int number;  unsigned int number;
654  int offset;  int offset;
655  pcre_uchar op;  unsigned int op;
656  int save_capture_last;  pcre_int32 save_capture_last;
657  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
658  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
659    
# Line 761  for (;;) Line 781  for (;;)
781      case OP_FAIL:      case OP_FAIL:
782      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
783    
     /* COMMIT overrides PRUNE, SKIP, and THEN */  
   
784      case OP_COMMIT:      case OP_COMMIT:
785      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
786        eptrb, RM52);        eptrb, RM52);
787      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&  
         rrc != MATCH_THEN)  
       RRETURN(rrc);  
788      RRETURN(MATCH_COMMIT);      RRETURN(MATCH_COMMIT);
789    
     /* PRUNE overrides THEN */  
   
790      case OP_PRUNE:      case OP_PRUNE:
791      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
792        eptrb, RM51);        eptrb, RM51);
793      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
794      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
795    
796      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
# Line 787  for (;;) Line 800  for (;;)
800        eptrb, RM56);        eptrb, RM56);
801      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&      if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
802           md->mark == NULL) md->mark = ecode + 2;           md->mark == NULL) md->mark = ecode + 2;
803      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
804      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
805    
     /* SKIP overrides PRUNE and THEN */  
   
806      case OP_SKIP:      case OP_SKIP:
807      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
808        eptrb, RM53);        eptrb, RM53);
809      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       RRETURN(rrc);  
810      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
811      RRETURN(MATCH_SKIP);      RRETURN(MATCH_SKIP);
812    
813      /* Note that, for Perl compatibility, SKIP with an argument does NOT set      /* Note that, for Perl compatibility, SKIP with an argument does NOT set
814      nomatch_mark. There is a flag that disables this opcode when re-matching a      nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
815      pattern that ended with a SKIP for which there was not a matching MARK. */      not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
816        that failed and any that preceed it (either they also failed, or were not
817        triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
818        SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
819        set to the count of the one that failed. */
820    
821      case OP_SKIP_ARG:      case OP_SKIP_ARG:
822      if (md->ignore_skip_arg)      md->skip_arg_count++;
823        if (md->skip_arg_count <= md->ignore_skip_arg)
824        {        {
825        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];        ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
826        break;        break;
827        }        }
828      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
829        eptrb, RM57);        eptrb, RM57);
830      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
831        RRETURN(rrc);  
   
832      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
833      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
834      caught by a matching MARK, or get to the top, where it causes a rematch      caught by a matching MARK, or get to the top, where it causes a rematch
835      with the md->ignore_skip_arg flag set. */      with md->ignore_skip_arg set to the value of md->skip_arg_count. */
836    
837      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
838      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
# Line 1064  for (;;) Line 1078  for (;;)
1078        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1079    
1080        save_mark = md->mark;        save_mark = md->mark;
1081          save_capture_last = md->capture_last;
1082        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1083          RM2);          RM2);
1084    
# Line 1095  for (;;) Line 1110  for (;;)
1110        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1111        md->mark = save_mark;        md->mark = save_mark;
1112        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1113          md->capture_last = save_capture_last;
1114        }        }
1115    
1116      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
# Line 1216  for (;;) Line 1232  for (;;)
1232      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1233      matched_once = FALSE;      matched_once = FALSE;
1234      code_offset = (int)(ecode - md->start_code);      code_offset = (int)(ecode - md->start_code);
1235        save_capture_last = md->capture_last;
1236    
1237      for (;;)      for (;;)
1238        {        {
# Line 1245  for (;;) Line 1262  for (;;)
1262        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1263        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1264        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1265          md->capture_last = save_capture_last;
1266        }        }
1267    
1268      if (matched_once || allow_zero)      if (matched_once || allow_zero)
# Line 1289  for (;;) Line 1307  for (;;)
1307          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1308          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1309          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1310          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last & CAPLMASK;
1311            /* Internal change requires this for API compatibility. */
1312            if (cb.capture_last == 0) cb.capture_last = -1;
1313          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1314          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1315          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1316          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1317          }          }
1318        ecode += PRIV(OP_lengths)[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1319          codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1320        }        }
1321    
1322      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1311  for (;;) Line 1332  for (;;)
1332          }          }
1333        else        else
1334          {          {
1335          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1336          condition = (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1337    
1338          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
# Line 1383  for (;;) Line 1404  for (;;)
1404    
1405        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1406          {          {
1407          int refno = offset >> 1;          unsigned int refno = offset >> 1;
1408          pcre_uchar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1409    
1410          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
# Line 1511  for (;;) Line 1532  for (;;)
1532      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1533    
1534      case OP_CLOSE:      case OP_CLOSE:
1535      number = GET2(ecode, 1);      number = GET2(ecode, 1);   /* Must be less than 65536 */
1536      offset = number << 1;      offset = number << 1;
1537    
1538  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 1519  for (;;) Line 1540  for (;;)
1540        printf("\n");        printf("\n");
1541  #endif  #endif
1542    
1543      md->capture_last = number;      md->capture_last = (md->capture_last & OVFLMASK) | number;
1544      if (offset >= md->offset_max) md->offset_overflow = TRUE; else      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1545        {        {
1546        md->offset_vector[offset] =        md->offset_vector[offset] =
1547          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
# Line 1582  for (;;) Line 1603  for (;;)
1603        }        }
1604      else condassert = FALSE;      else condassert = FALSE;
1605    
1606        /* Loop for each branch */
1607    
1608      do      do
1609        {        {
1610        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1611    
1612          /* A match means that the assertion is true; break out of the loop
1613          that matches its alternatives. */
1614    
1615        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1616          {          {
1617          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1618          break;          break;
1619          }          }
1620    
1621          /* If not matched, restore the previous mark setting. */
1622    
1623        md->mark = save_mark;        md->mark = save_mark;
1624    
1625        /* A COMMIT failure must fail the entire assertion, without trying any        /* See comment in the code for capturing groups above about handling
1626        subsequent branches. */        THEN. */
   
       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);  
1627    
1628        /* PCRE does not allow THEN to escape beyond an assertion; it        if (rrc == MATCH_THEN)
1629        is treated as NOMATCH. */          {
1630            next = ecode + GET(ecode,1);
1631            if (md->start_match_ptr < next &&
1632                (*ecode == OP_ALT || *next == OP_ALT))
1633              rrc = MATCH_NOMATCH;
1634            }
1635    
1636          /* Anything other than NOMATCH causes the entire assertion to fail,
1637          passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1638          uncaptured THEN, which means they take their normal effect. This
1639          consistent approach does not always have exactly the same effect as in
1640          Perl. */
1641    
1642        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1643        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1644        }        }
1645      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);   /* Continue for next alternative */
1646    
1647        /* If we have tried all the alternative branches, the assertion has
1648        failed. If not, we broke out after a match. */
1649    
1650      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1651    
# Line 1611  for (;;) Line 1653  for (;;)
1653    
1654      if (condassert) RRETURN(MATCH_MATCH);      if (condassert) RRETURN(MATCH_MATCH);
1655    
1656      /* Continue from after the assertion, updating the offsets high water      /* Continue from after a successful assertion, updating the offsets high
1657      mark, since extracts may have been taken during the assertion. */      water mark, since extracts may have been taken during the assertion. */
1658    
1659      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1660      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1661      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1662      continue;      continue;
1663    
1664      /* Negative assertion: all branches must fail to match. Encountering SKIP,      /* Negative assertion: all branches must fail to match for the assertion to
1665      PRUNE, or COMMIT means we must assume failure without checking subsequent      succeed. */
     branches. */  
1666    
1667      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1668      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1633  for (;;) Line 1674  for (;;)
1674        }        }
1675      else condassert = FALSE;      else condassert = FALSE;
1676    
1677        /* Loop for each alternative branch. */
1678    
1679      do      do
1680        {        {
1681        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1682        md->mark = save_mark;        md->mark = save_mark;   /* Always restore the mark setting */
1683        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);  
1684        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        switch(rrc)
1685          {          {
1686          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          case MATCH_MATCH:            /* A successful match means */
1687          break;          case MATCH_ACCEPT:           /* the assertion has failed. */
1688          }          RRETURN(MATCH_NOMATCH);
1689    
1690            case MATCH_NOMATCH:          /* Carry on with next branch */
1691            break;
1692    
1693        /* PCRE does not allow THEN to escape beyond an assertion; it is treated          /* See comment in the code for capturing groups above about handling
1694        as NOMATCH. */          THEN. */
1695    
1696        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          case MATCH_THEN:
1697            next = ecode + GET(ecode,1);
1698            if (md->start_match_ptr < next &&
1699                (*ecode == OP_ALT || *next == OP_ALT))
1700              {
1701              rrc = MATCH_NOMATCH;
1702              break;
1703              }
1704            /* Otherwise fall through. */
1705    
1706            /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1707            assertion to fail to match, without considering any more alternatives.
1708            Failing to match means the assertion is true. This is a consistent
1709            approach, but does not always have the same effect as in Perl. */
1710    
1711            case MATCH_COMMIT:
1712            case MATCH_SKIP:
1713            case MATCH_SKIP_ARG:
1714            case MATCH_PRUNE:
1715            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1716            goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1717    
1718            /* Anything else is an error */
1719    
1720            default:
1721            RRETURN(rrc);
1722            }
1723    
1724          /* Continue with next branch */
1725    
1726        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1727        }        }
1728      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1729    
1730        /* All branches in the assertion failed to match. */
1731    
1732        NEG_ASSERT_TRUE:
1733      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */      if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1734        ecode += 1 + LINK_SIZE;                /* Continue with current branch */
     ecode += 1 + LINK_SIZE;  
1735      continue;      continue;
1736    
1737      /* Move the subject pointer back. This occurs only at the start of      /* Move the subject pointer back. This occurs only at the start of
# Line 1714  for (;;) Line 1791  for (;;)
1791        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1792        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1793        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1794        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last & CAPLMASK;
1795          /* Internal change requires this for API compatibility. */
1796          if (cb.capture_last == 0) cb.capture_last = -1;
1797        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1798        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1799        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1743  for (;;) Line 1822  for (;;)
1822      case OP_RECURSE:      case OP_RECURSE:
1823        {        {
1824        recursion_info *ri;        recursion_info *ri;
1825        int recno;        unsigned int recno;
1826    
1827        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1828        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
# Line 1760  for (;;) Line 1839  for (;;)
1839        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1840    
1841        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1842          new_recursive.saved_capture_last = md->capture_last;
1843        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1844        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1845        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1783  for (;;) Line 1863  for (;;)
1863              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1864    
1865        /* OK, now we can do the recursion. After processing each alternative,        /* OK, now we can do the recursion. After processing each alternative,
1866        restore the offset data. If there were nested recursions, md->recursive        restore the offset data and the last captured value. If there were nested
1867        might be changed, so reset it before looping. */        recursions, md->recursive might be changed, so reset it before looping.
1868          */
1869    
1870        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1871        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
# Line 1795  for (;;) Line 1876  for (;;)
1876            md, eptrb, RM6);            md, eptrb, RM6);
1877          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1878              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1879            md->capture_last = new_recursive.saved_capture_last;
1880          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1881          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1882            {            {
# Line 1811  for (;;) Line 1893  for (;;)
1893            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1894            }            }
1895    
1896          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it          /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1897          is treated as NOMATCH. */          recursion; they cause a NOMATCH for the entire recursion. These codes
1898            are defined in a range that can be tested for. */
1899    
1900            if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1901              RRETURN(MATCH_NOMATCH);
1902    
1903            /* Any return code other than NOMATCH is an error. */
1904    
1905          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&          if (rrc != MATCH_NOMATCH)
                  rrc != MATCH_COMMIT)  
1906            {            {
1907            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1908            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1945  for (;;) Line 2032  for (;;)
2032    
2033        /* Deal with capturing */        /* Deal with capturing */
2034    
2035        md->capture_last = number;        md->capture_last = (md->capture_last & OVFLMASK) | number;
2036        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
2037          {          {
2038          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
2039          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
# Line 2530  for (;;) Line 2617  for (;;)
2617        }        }
2618      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2619        {        {
2620        const pcre_uint32 *cp;        const pcre_uint32 *cp;
2621        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2622    
2623        switch(ecode[1])        switch(ecode[1])
# Line 2592  for (;;) Line 2679  for (;;)
2679          break;          break;
2680    
2681          case PT_CLIST:          case PT_CLIST:
2682          cp = PRIV(ucd_caseless_sets) + prop->caseset;          cp = PRIV(ucd_caseless_sets) + ecode[2];
2683          for (;;)          for (;;)
2684            {            {
2685            if (c < *cp)            if (c < *cp)
# Line 2602  for (;;) Line 2689  for (;;)
2689            }            }
2690          break;          break;
2691    
2692            case PT_UCNC:
2693            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2694                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2695                 c >= 0xe000) == (op == OP_NOTPROP))
2696              RRETURN(MATCH_NOMATCH);
2697            break;
2698    
2699          /* This should never occur */          /* This should never occur */
2700    
2701          default:          default:
# Line 3188  for (;;) Line 3282  for (;;)
3282    
3283        if (fc < 128)        if (fc < 128)
3284          {          {
3285          pcre_uchar cc = RAWUCHAR(eptr);          pcre_uint32 cc = RAWUCHAR(eptr);
3286          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3287          ecode++;          ecode++;
3288          eptr++;          eptr++;
# Line 3293  for (;;) Line 3387  for (;;)
3387      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3388      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3389    
3390      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. We first check
3391        for the minimum number of characters. If the minimum equals the maximum, we
3392        are done. Otherwise, if minimizing, check the rest of the pattern for a
3393        match; if there isn't one, advance up to the maximum, one character at a
3394        time.
3395    
3396        If maximizing, advance up to the maximum number of matching characters,
3397        until eptr is past the end of the maximum run. If possessive, we are
3398        then done (no backing up). Otherwise, match at this position; anything
3399        other than no match is immediately returned. For nomatch, back up one
3400        character, unless we are matching \R and the last thing matched was
3401        \r\n, in which case, back up two bytes. When we reach the first optional
3402        character position, we can save stack by doing a tail recurse.
3403    
3404        The various UTF/non-UTF and caseful/caseless cases are handled separately,
3405        for speed. */
3406    
3407      REPEATCHAR:      REPEATCHAR:
3408  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 3377  for (;;) Line 3486  for (;;)
3486                }                }
3487              }              }
3488    
3489            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
   
3490            for(;;)            for(;;)
3491              {              {
3492                if (eptr == pp) goto TAIL_RECURSE;
3493              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3494              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }  
3495  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3496              eptr--;              eptr--;
3497              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 3437  for (;;) Line 3545  for (;;)
3545    
3546        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3547          {          {
3548          pcre_uchar cc;          pcre_uint32 cc;                 /* Faster than pcre_uchar */
   
3549          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3550            {            {
3551            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 3453  for (;;) Line 3560  for (;;)
3560          {          {
3561          for (fi = min;; fi++)          for (fi = min;; fi++)
3562            {            {
3563            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3564            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3565            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3566            if (fi >= max) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
# Line 3474  for (;;) Line 3580  for (;;)
3580          pp = eptr;          pp = eptr;
3581          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3582            {            {
3583            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3584            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3585              {              {
3586              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 3486  for (;;) Line 3591  for (;;)
3591            eptr++;            eptr++;
3592            }            }
3593    
3594          if (possessive) continue;          if (possessive) continue;       /* No backtracking */
3595            for (;;)
         while (eptr >= pp)  
3596            {            {
3597              if (eptr == pp) goto TAIL_RECURSE;
3598            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3599            eptr--;            eptr--;
3600            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3544  for (;;) Line 3649  for (;;)
3649            if (fc != RAWUCHARTEST(eptr)) break;            if (fc != RAWUCHARTEST(eptr)) break;
3650            eptr++;            eptr++;
3651            }            }
3652          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
3653            for (;;)
         while (eptr >= pp)  
3654            {            {
3655              if (eptr == pp) goto TAIL_RECURSE;
3656            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3657            eptr--;            eptr--;
3658            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 3724  for (;;) Line 3829  for (;;)
3829            }            }
3830          }          }
3831        else        else
3832  #endif  #endif  /* SUPPORT_UTF */
3833        /* Not UTF mode */        /* Not UTF mode */
3834          {          {
3835          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
# Line 3762  for (;;) Line 3867  for (;;)
3867              }              }
3868            }            }
3869          else          else
3870  #endif  #endif  /*SUPPORT_UTF */
3871          /* Not UTF mode */          /* Not UTF mode */
3872            {            {
3873            for (fi = min;; fi++)            for (fi = min;; fi++)
# Line 3804  for (;;) Line 3909  for (;;)
3909              if (fc == d || (unsigned int)foc == d) break;              if (fc == d || (unsigned int)foc == d) break;
3910              eptr += len;              eptr += len;
3911              }              }
3912            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
3913            for(;;)            for(;;)
3914              {              {
3915                if (eptr == pp) goto TAIL_RECURSE;
3916              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3917              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3918              if (eptr-- == pp) break;        /* Stop if tried at original pos */              eptr--;
3919              BACKCHAR(eptr);              BACKCHAR(eptr);
3920              }              }
3921            }            }
3922          else          else
3923  #endif  #endif  /* SUPPORT_UTF */
3924          /* Not UTF mode */          /* Not UTF mode */
3925            {            {
3926            for (i = min; i < max; i++)            for (i = min; i < max; i++)
# Line 3827  for (;;) Line 3933  for (;;)
3933              if (fc == *eptr || foc == *eptr) break;              if (fc == *eptr || foc == *eptr) break;
3934              eptr++;              eptr++;
3935              }              }
3936            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
3937            while (eptr >= pp)            for (;;)
3938              {              {
3939                if (eptr == pp) goto TAIL_RECURSE;
3940              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3941              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3942              eptr--;              eptr--;
# Line 3939  for (;;) Line 4046  for (;;)
4046              if (fc == d) break;              if (fc == d) break;
4047              eptr += len;              eptr += len;
4048              }              }
4049            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
4050            for(;;)            for(;;)
4051              {              {
4052                if (eptr == pp) goto TAIL_RECURSE;
4053              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4054              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4055              if (eptr-- == pp) break;        /* Stop if tried at original pos */              eptr--;
4056              BACKCHAR(eptr);              BACKCHAR(eptr);
4057              }              }
4058            }            }
# Line 3962  for (;;) Line 4070  for (;;)
4070              if (fc == *eptr) break;              if (fc == *eptr) break;
4071              eptr++;              eptr++;
4072              }              }
4073            if (possessive) continue;            if (possessive) continue;    /* No backtracking */
4074            while (eptr >= pp)            for (;;)
4075              {              {
4076                if (eptr == pp) goto TAIL_RECURSE;
4077              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4078              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4079              eptr--;              eptr--;
# Line 4201  for (;;) Line 4310  for (;;)
4310                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4311              }              }
4312            break;            break;
4313    
4314            case PT_CLIST:            case PT_CLIST:
4315            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4316              {              {
4317              const pcre_uint32 *cp;              const pcre_uint32 *cp;
4318              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4319                {                {
# Line 4212  for (;;) Line 4321  for (;;)
4321                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4322                }                }
4323              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4324              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
4325              for (;;)              for (;;)
4326                {                {
4327                if (c < *cp)                if (c < *cp)
4328                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4329                if (c == *cp++)                if (c == *cp++)
4330                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4331                }                }
4332              }              }
4333            break;            break;
4334    
4335              case PT_UCNC:
4336              for (i = 1; i <= min; i++)
4337                {
4338                if (eptr >= md->end_subject)
4339                  {
4340                  SCHECK_PARTIAL();
4341                  RRETURN(MATCH_NOMATCH);
4342                  }
4343                GETCHARINCTEST(c, eptr);
4344                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4345                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4346                     c >= 0xe000) == prop_fail_result)
4347                  RRETURN(MATCH_NOMATCH);
4348                }
4349              break;
4350    
4351            /* This should not occur */            /* This should not occur */
4352    
4353            default:            default:
# Line 4428  for (;;) Line 4553  for (;;)
4553          case OP_DIGIT:          case OP_DIGIT:
4554          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4555            {            {
4556            pcre_uchar cc;            pcre_uint32 cc;
   
4557            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4558              {              {
4559              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4446  for (;;) Line 4570  for (;;)
4570          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4571          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4572            {            {
4573            pcre_uchar cc;            pcre_uint32 cc;
   
4574            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4575              {              {
4576              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4464  for (;;) Line 4587  for (;;)
4587          case OP_WHITESPACE:          case OP_WHITESPACE:
4588          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4589            {            {
4590            pcre_uchar cc;            pcre_uint32 cc;
   
4591            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4592              {              {
4593              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4482  for (;;) Line 4604  for (;;)
4604          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4605          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4606            {            {
4607            pcre_uchar cc;            pcre_uint32 cc;
   
4608            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4609              {              {
4610              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4500  for (;;) Line 4621  for (;;)
4621          case OP_WORDCHAR:          case OP_WORDCHAR:
4622          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4623            {            {
4624            pcre_uchar cc;            pcre_uint32 cc;
   
4625            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4626              {              {
4627              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4952  for (;;) Line 5072  for (;;)
5072    
5073            case PT_CLIST:            case PT_CLIST:
5074            for (fi = min;; fi++)            for (fi = min;; fi++)
5075              {              {
5076              const pcre_uint32 *cp;              const pcre_uint32 *cp;
5077              RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5078              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5079              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
5080              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4963  for (;;) Line 5083  for (;;)
5083                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5084                }                }
5085              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
5086              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5087              for (;;)              for (;;)
5088                {                {
5089                if (c < *cp)                if (c < *cp)
# Line 4974  for (;;) Line 5094  for (;;)
5094              }              }
5095            /* Control never gets here */            /* Control never gets here */
5096    
5097              case PT_UCNC:
5098              for (fi = min;; fi++)
5099                {
5100                RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
5101                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5102                if (fi >= max) RRETURN(MATCH_NOMATCH);
5103                if (eptr >= md->end_subject)
5104                  {
5105                  SCHECK_PARTIAL();
5106                  RRETURN(MATCH_NOMATCH);
5107                  }
5108                GETCHARINCTEST(c, eptr);
5109                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5110                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5111                     c >= 0xe000) == prop_fail_result)
5112                  RRETURN(MATCH_NOMATCH);
5113                }
5114              /* Control never gets here */
5115    
5116            /* This should never occur */            /* This should never occur */
5117            default:            default:
5118            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
# Line 5443  for (;;) Line 5582  for (;;)
5582              eptr+= len;              eptr+= len;
5583              }              }
5584            break;            break;
5585    
5586            case PT_CLIST:            case PT_CLIST:
5587            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5588              {              {
# Line 5455  for (;;) Line 5594  for (;;)
5594                break;                break;
5595                }                }
5596              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5597              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5598              for (;;)              for (;;)
5599                {                {
5600                if (c < *cp)                if (c < *cp)
5601                  { if (prop_fail_result) break; else goto GOT_MAX; }                  { if (prop_fail_result) break; else goto GOT_MAX; }
5602                if (c == *cp++)                if (c == *cp++)
5603                  { if (prop_fail_result) goto GOT_MAX; else break; }                  { if (prop_fail_result) goto GOT_MAX; else break; }
5604                }                }
5605              eptr += len;              eptr += len;
5606                }
5607              GOT_MAX:
5608              break;
5609    
5610              case PT_UCNC:
5611              for (i = min; i < max; i++)
5612                {
5613                int len = 1;
5614                if (eptr >= md->end_subject)
5615                  {
5616                  SCHECK_PARTIAL();
5617                  break;
5618                  }
5619                GETCHARLENTEST(c, eptr, len);
5620                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5621                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5622                     c >= 0xe000) == prop_fail_result)
5623                  break;
5624                eptr += len;
5625              }              }
           GOT_MAX:  
5626            break;            break;
5627    
5628            default:            default:
# Line 5474  for (;;) Line 5631  for (;;)
5631    
5632          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
5633    
5634          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
5635          for(;;)          for(;;)
5636            {            {
5637              if (eptr == pp) goto TAIL_RECURSE;
5638            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5639            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5640            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5641            if (utf) BACKCHAR(eptr);            if (utf) BACKCHAR(eptr);
5642            }            }
5643          }          }
# Line 5516  for (;;) Line 5674  for (;;)
5674    
5675          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
5676    
5677          if (possessive) continue;          if (possessive) continue;    /* No backtracking */
   
5678          for(;;)          for(;;)
5679            {            {
5680              if (eptr == pp) goto TAIL_RECURSE;
5681            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5682            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5683            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5684            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
5685              {              {
5686              if (!utf) c = *eptr; else              if (!utf) c = *eptr; else
# Line 5797  for (;;) Line 5955  for (;;)
5955            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
5956            }            }
5957    
5958          /* eptr is now past the end of the maximum run. If possessive, we are          if (possessive) continue;    /* No backtracking */
         done (no backing up). Otherwise, match at this position; anything other  
         than no match is immediately returned. For nomatch, back up one  
         character, unless we are matching \R and the last thing matched was  
         \r\n, in which case, back up two bytes. */  
   
         if (possessive) continue;  
5959          for(;;)          for(;;)
5960            {            {
5961              if (eptr == pp) goto TAIL_RECURSE;
5962            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5963            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5964            if (eptr-- == pp) break;        /* Stop if tried at original pos */            eptr--;
5965            BACKCHAR(eptr);            BACKCHAR(eptr);
5966            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&            if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
5967                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;                RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
# Line 6046  for (;;) Line 6199  for (;;)
6199            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
6200            }            }
6201    
6202          /* eptr is now past the end of the maximum run. If possessive, we are          if (possessive) continue;    /* No backtracking */
6203          done (no backing up). Otherwise, match at this position; anything other          for (;;)
         than no match is immediately returned. For nomatch, back up one  
         character (byte), unless we are matching \R and the last thing matched  
         was \r\n, in which case, back up two bytes. */  
   
         if (possessive) continue;  
         while (eptr >= pp)  
6204            {            {
6205              if (eptr == pp) goto TAIL_RECURSE;
6206            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6207            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6208            eptr--;            eptr--;
# Line 6109  switch (frame->Xwhere) Line 6257  switch (frame->Xwhere)
6257    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6258  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6259    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6260    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
6261  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6262  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6263    default:    default:
6264    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
   
 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);  
   
6265    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6266    }    }
6267  #undef LBL  #undef LBL
# Line 6265  const pcre_uint8 *start_bits = NULL; Line 6410  const pcre_uint8 *start_bits = NULL;
6410  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6411  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6412  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6413    PCRE_PUCHAR match_partial;
6414  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6415    
6416  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6296  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 6442  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
6442  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6443    return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6444  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6445    if (length < 0) return PCRE_ERROR_BADLENGTH;
6446  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6447    
6448  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 6362  if (extra_data != NULL Line 6509  if (extra_data != NULL
6509      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6510                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6511      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
6512      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |  
                     PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)  
6513    {    {
6514    rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,    /* A facility for setting the match limit in the regex was added; this puts
6515      a value in the compiled block. (Similarly for recursion limit, but the JIT
6516      does not make use of that.) Because the regex is not passed to jit_exec, we
6517      fudge up an alternative extra block, because we must not modify the extra
6518      block that the user has passed. */
6519    
6520    #if defined COMPILE_PCRE8
6521      pcre_extra extra_data_copy;
6522    #elif defined COMPILE_PCRE16
6523      pcre16_extra extra_data_copy;
6524    #elif defined COMPILE_PCRE32
6525      pcre32_extra extra_data_copy;
6526    #endif
6527    
6528      if ((re->flags & PCRE_MLSET) != 0 &&
6529          ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0 ||
6530           re->limit_match < extra_data->match_limit))
6531        {
6532        extra_data_copy = *extra_data;
6533        extra_data_copy.match_limit = re->limit_match;
6534        extra_data_copy.flags |= PCRE_EXTRA_MATCH_LIMIT;
6535        extra_data = &extra_data_copy;
6536        }
6537    
6538      rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6539         start_offset, options, offsets, offsetcount);         start_offset, options, offsets, offsetcount);
6540    
6541    /* PCRE_ERROR_NULL means that the selected normal or partial matching    /* PCRE_ERROR_NULL means that the selected normal or partial matching
6542    mode is not compiled. In this case we simply fallback to interpreter. */    mode is not compiled. In this case we simply fallback to interpreter. */
6543    
6544    if (rc != PCRE_ERROR_NULL) return rc;    if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6545    }    }
6546  #endif  #endif
6547    
# Line 6395  md->callout_data = NULL; Line 6564  md->callout_data = NULL;
6564    
6565  tables = re->tables;  tables = re->tables;
6566    
6567    /* The two limit values override the defaults, whatever their value. */
6568    
6569  if (extra_data != NULL)  if (extra_data != NULL)
6570    {    {
6571    register unsigned int flags = extra_data->flags;    register unsigned int flags = extra_data->flags;
# Line 6409  if (extra_data != NULL) Line 6580  if (extra_data != NULL)
6580    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6581    }    }
6582    
6583    /* Limits in the regex override only if they are smaller. */
6584    
6585    if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
6586      md->match_limit = re->limit_match;
6587    
6588    if ((re->flags & PCRE_RLSET) != 0 &&
6589        re->limit_recursion < md->match_limit_recursion)
6590      md->match_limit_recursion = re->limit_recursion;
6591    
6592  /* If the exec call supplied NULL for tables, use the inbuilt ones. This  /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6593  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
6594  in other programs later. */  in other programs later. */
# Line 6434  end_subject = md->end_subject; Line 6614  end_subject = md->end_subject;
6614  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6615  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
6616  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6617  md->ignore_skip_arg = FALSE;  md->ignore_skip_arg = 0;
6618    
6619  /* Some options are unpacked into BOOL variables in the hope that testing  /* Some options are unpacked into BOOL variables in the hope that testing
6620  them will be faster than individual option bits. */  them will be faster than individual option bits. */
# Line 6544  if (re->top_backref > 0 && re->top_backr Line 6724  if (re->top_backref > 0 && re->top_backr
6724    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
6725    }    }
6726  else md->offset_vector = offsets;  else md->offset_vector = offsets;
   
6727  md->offset_end = ocount;  md->offset_end = ocount;
6728  md->offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
6729  md->offset_overflow = FALSE;  md->capture_last = 0;
 md->capture_last = -1;  
6730    
6731  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6732  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
# Line 6818  for(;;) Line 6996  for(;;)
6996    md->match_call_count = 0;    md->match_call_count = 0;
6997    md->match_function_type = 0;    md->match_function_type = 0;
6998    md->end_offset_top = 0;    md->end_offset_top = 0;
6999      md->skip_arg_count = 0;
7000    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
7001    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
7002        {
7003        start_partial = md->start_used_ptr;
7004        match_partial = start_match;
7005        }
7006    
7007    switch(rc)    switch(rc)
7008      {      {
# Line 6832  for(;;) Line 7015  for(;;)
7015    
7016      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
7017      new_start_match = start_match;      new_start_match = start_match;
7018      md->ignore_skip_arg = TRUE;      md->ignore_skip_arg = md->skip_arg_count;
7019      break;      break;
7020    
7021      /* SKIP passes back the next starting point explicitly, but if it is the      /* SKIP passes back the next starting point explicitly, but if it is no
7022      same as the match we have just done, treat it as NOMATCH. */      greater than the match we have just done, treat it as NOMATCH. */
7023    
7024      case MATCH_SKIP:      case MATCH_SKIP:
7025      if (md->start_match_ptr != start_match)      if (md->start_match_ptr > start_match)
7026        {        {
7027        new_start_match = md->start_match_ptr;        new_start_match = md->start_match_ptr;
7028        break;        break;
# Line 6847  for(;;) Line 7030  for(;;)
7030      /* Fall through */      /* Fall through */
7031    
7032      /* NOMATCH and PRUNE advance by one character. THEN at this level acts      /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7033      exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */      exactly like PRUNE. Unset ignore SKIP-with-argument. */
7034    
7035      case MATCH_NOMATCH:      case MATCH_NOMATCH:
7036      case MATCH_PRUNE:      case MATCH_PRUNE:
7037      case MATCH_THEN:      case MATCH_THEN:
7038      md->ignore_skip_arg = FALSE;      md->ignore_skip_arg = 0;
7039      new_start_match = start_match + 1;      new_start_match = start_match + 1;
7040  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
7041      if (utf)      if (utf)
# Line 6945  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7128  if (rc == MATCH_MATCH || rc == MATCH_ACC
7128          (arg_offset_max - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
7129        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
7130        }        }
7131      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7132      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
7133      (PUBL(free))(md->offset_vector);      (PUBL(free))(md->offset_vector);
7134      }      }
# Line 6953  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7136  if (rc == MATCH_MATCH || rc == MATCH_ACC
7136    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
7137    too many to fit into the vector. */    too many to fit into the vector. */
7138    
7139    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?    rc = ((md->capture_last & OVFLBIT) != 0 &&
7140             md->end_offset_top >= arg_offset_max)?
7141      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
7142    
7143    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
# Line 7026  if (start_partial != NULL) Line 7210  if (start_partial != NULL)
7210      {      {
7211      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7212      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7213        if (offsetcount > 2)
7214          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7215      }      }
7216    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7217    }    }

Legend:
Removed from v.1102  
changed lines
  Added in v.1313

  ViewVC Help
Powered by ViewVC 1.1.5