/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1189 by ph10, Tue Oct 30 16:34:17 2012 UTC revision 1271 by ph10, Wed Mar 6 16:50:38 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59    /* The md->capture_last field uses the lower 16 bits for the last captured
60    substring (which can never be greater than 65535) and a bit in the top half
61    to mean "capture vector overflowed". This odd way of doing things was
62    implemented when it was realized that preserving and restoring the overflow bit
63    whenever the last capture number was saved/restored made for a neater
64    interface, and doing it this way saved on (a) another variable, which would
65    have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66    separate set of save/restore instructions. The following defines are used in
67    implementing this. */
68    
69    #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70    #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71    #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72    
73  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
74  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
75  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
# Line 73  defined PCRE_ERROR_xxx codes, which are Line 87  defined PCRE_ERROR_xxx codes, which are
87  negative to avoid the external error codes. */  negative to avoid the external error codes. */
88    
89  #define MATCH_ACCEPT       (-999)  #define MATCH_ACCEPT       (-999)
90  #define MATCH_COMMIT       (-998)  #define MATCH_KETRPOS      (-998)
91  #define MATCH_KETRPOS      (-997)  #define MATCH_ONCE         (-997)
92  #define MATCH_ONCE         (-996)  /* The next 5 must be kept together and in sequence so that a test that checks
93    for any one of them can use a range. */
94    #define MATCH_COMMIT       (-996)
95  #define MATCH_PRUNE        (-995)  #define MATCH_PRUNE        (-995)
96  #define MATCH_SKIP         (-994)  #define MATCH_SKIP         (-994)
97  #define MATCH_SKIP_ARG     (-993)  #define MATCH_SKIP_ARG     (-993)
98  #define MATCH_THEN         (-992)  #define MATCH_THEN         (-992)
99    #define MATCH_BACKTRACK_MAX MATCH_THEN
100    #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101    
102  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
103  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
# Line 199  if (caseless) Line 217  if (caseless)
217        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
218        GETCHARINC(d, p);        GETCHARINC(d, p);
219        ur = GET_UCD(d);        ur = GET_UCD(d);
220        if (c != d && c != d + ur->other_case)        if (c != d && c != d + ur->other_case)
221          {          {
222          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
223          for (;;)          for (;;)
224            {            {
225            if (c < *pp) return -1;            if (c < *pp) return -1;
# Line 219  if (caseless) Line 237  if (caseless)
237      {      {
238      while (length-- > 0)      while (length-- > 0)
239        {        {
240        pcre_uchar cc, cp;        pcre_uint32 cc, cp;
241        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
242        cc = RAWUCHARTEST(eptr);        cc = RAWUCHARTEST(eptr);
243        cp = RAWUCHARTEST(p);        cp = RAWUCHARTEST(p);
# Line 294  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 312  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
312         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
313         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
314         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
315         RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
316    
317  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
318  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 416  typedef struct heapframe { Line 434  typedef struct heapframe {
434    int Xlength;    int Xlength;
435    int Xmax;    int Xmax;
436    int Xmin;    int Xmin;
437    int Xnumber;    unsigned int Xnumber;
438    int Xoffset;    int Xoffset;
439    int Xop;    unsigned int Xop;
440    int Xsave_capture_last;    pcre_int32 Xsave_capture_last;
441    int Xsave_offset1, Xsave_offset2, Xsave_offset3;    int Xsave_offset1, Xsave_offset2, Xsave_offset3;
442    int Xstacksave[REC_STACK_SAVE_MAX];    int Xstacksave[REC_STACK_SAVE_MAX];
443    
# Line 634  int max; Line 652  int max;
652  int min;  int min;
653  unsigned int number;  unsigned int number;
654  int offset;  int offset;
655  pcre_uchar op;  unsigned int op;
656  int save_capture_last;  pcre_int32 save_capture_last;
657  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
658  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
659    
# Line 1066  for (;;) Line 1084  for (;;)
1084        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1085    
1086        save_mark = md->mark;        save_mark = md->mark;
1087          save_capture_last = md->capture_last;
1088        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1089          RM2);          RM2);
1090    
# Line 1097  for (;;) Line 1116  for (;;)
1116        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1117        md->mark = save_mark;        md->mark = save_mark;
1118        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1119          md->capture_last = save_capture_last;
1120        }        }
1121    
1122      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
# Line 1218  for (;;) Line 1238  for (;;)
1238      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1239      matched_once = FALSE;      matched_once = FALSE;
1240      code_offset = (int)(ecode - md->start_code);      code_offset = (int)(ecode - md->start_code);
1241        save_capture_last = md->capture_last;
1242    
1243      for (;;)      for (;;)
1244        {        {
# Line 1247  for (;;) Line 1268  for (;;)
1268        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1269        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1270        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1271          md->capture_last = save_capture_last;
1272        }        }
1273    
1274      if (matched_once || allow_zero)      if (matched_once || allow_zero)
# Line 1291  for (;;) Line 1313  for (;;)
1313          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1314          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1315          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1316          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last & CAPLMASK;
1317            /* Internal change requires this for API compatibility. */
1318            if (cb.capture_last == 0) cb.capture_last = -1;
1319          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1320          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1321          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1322          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1323          }          }
1324        ecode += PRIV(OP_lengths)[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1325          codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1326        }        }
1327    
1328      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1513  for (;;) Line 1538  for (;;)
1538      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1539    
1540      case OP_CLOSE:      case OP_CLOSE:
1541      number = GET2(ecode, 1);      number = GET2(ecode, 1);   /* Must be less than 65536 */
1542      offset = number << 1;      offset = number << 1;
1543    
1544  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 1521  for (;;) Line 1546  for (;;)
1546        printf("\n");        printf("\n");
1547  #endif  #endif
1548    
1549      md->capture_last = number;      md->capture_last = (md->capture_last & OVFLMASK) | number;
1550      if (offset >= md->offset_max) md->offset_overflow = TRUE; else      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1551        {        {
1552        md->offset_vector[offset] =        md->offset_vector[offset] =
1553          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
# Line 1716  for (;;) Line 1741  for (;;)
1741        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1742        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1743        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1744        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last & CAPLMASK;
1745          /* Internal change requires this for API compatibility. */
1746          if (cb.capture_last == 0) cb.capture_last = -1;
1747        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1748        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1749        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1762  for (;;) Line 1789  for (;;)
1789        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1790    
1791        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1792          new_recursive.saved_capture_last = md->capture_last;
1793        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1794        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1795        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1785  for (;;) Line 1813  for (;;)
1813              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1814    
1815        /* OK, now we can do the recursion. After processing each alternative,        /* OK, now we can do the recursion. After processing each alternative,
1816        restore the offset data. If there were nested recursions, md->recursive        restore the offset data and the last captured value. If there were nested
1817        might be changed, so reset it before looping. */        recursions, md->recursive might be changed, so reset it before looping.
1818          */
1819    
1820        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1821        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
# Line 1797  for (;;) Line 1826  for (;;)
1826            md, eptrb, RM6);            md, eptrb, RM6);
1827          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1828              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1829            md->capture_last = new_recursive.saved_capture_last;
1830          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1831          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1832            {            {
# Line 1813  for (;;) Line 1843  for (;;)
1843            goto RECURSION_MATCHED;        /* Exit loop; end processing */            goto RECURSION_MATCHED;        /* Exit loop; end processing */
1844            }            }
1845    
1846          /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it          /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1847          is treated as NOMATCH. */          recursion; they are treated as NOMATCH. These codes are defined in a
1848            range that can be tested for. Any other return code is an error. */
1849    
1850          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&          else if (rrc != MATCH_NOMATCH &&
1851                   rrc != MATCH_COMMIT)                   (rrc < MATCH_BACKTRACK_MIN || rrc > MATCH_BACKTRACK_MAX))
1852            {            {
1853            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1854            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1947  for (;;) Line 1978  for (;;)
1978    
1979        /* Deal with capturing */        /* Deal with capturing */
1980    
1981        md->capture_last = number;        md->capture_last = (md->capture_last & OVFLMASK) | number;
1982        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1983          {          {
1984          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
1985          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
# Line 2532  for (;;) Line 2563  for (;;)
2563        }        }
2564      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2565        {        {
2566        const pcre_uint32 *cp;        const pcre_uint32 *cp;
2567        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2568    
2569        switch(ecode[1])        switch(ecode[1])
# Line 2594  for (;;) Line 2625  for (;;)
2625          break;          break;
2626    
2627          case PT_CLIST:          case PT_CLIST:
2628          cp = PRIV(ucd_caseless_sets) + prop->caseset;          cp = PRIV(ucd_caseless_sets) + ecode[2];
2629          for (;;)          for (;;)
2630            {            {
2631            if (c < *cp)            if (c < *cp)
# Line 2604  for (;;) Line 2635  for (;;)
2635            }            }
2636          break;          break;
2637    
2638            case PT_UCNC:
2639            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2640                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2641                 c >= 0xe000) == (op == OP_NOTPROP))
2642              RRETURN(MATCH_NOMATCH);
2643            break;
2644    
2645          /* This should never occur */          /* This should never occur */
2646    
2647          default:          default:
# Line 3190  for (;;) Line 3228  for (;;)
3228    
3229        if (fc < 128)        if (fc < 128)
3230          {          {
3231          pcre_uchar cc = RAWUCHAR(eptr);          pcre_uint32 cc = RAWUCHAR(eptr);
3232          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3233          ecode++;          ecode++;
3234          eptr++;          eptr++;
# Line 3439  for (;;) Line 3477  for (;;)
3477    
3478        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3479          {          {
3480          pcre_uchar cc;          pcre_uint32 cc;                 /* Faster than pcre_uchar */
   
3481          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3482            {            {
3483            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 3455  for (;;) Line 3492  for (;;)
3492          {          {
3493          for (fi = min;; fi++)          for (fi = min;; fi++)
3494            {            {
3495            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3496            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3497            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498            if (fi >= max) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
# Line 3476  for (;;) Line 3512  for (;;)
3512          pp = eptr;          pp = eptr;
3513          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3514            {            {
3515            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3516            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3517              {              {
3518              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4203  for (;;) Line 4238  for (;;)
4238                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4239              }              }
4240            break;            break;
4241    
4242            case PT_CLIST:            case PT_CLIST:
4243            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4244              {              {
4245              const pcre_uint32 *cp;              const pcre_uint32 *cp;
4246              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4247                {                {
# Line 4214  for (;;) Line 4249  for (;;)
4249                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4250                }                }
4251              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4252              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
4253              for (;;)              for (;;)
4254                {                {
4255                if (c < *cp)                if (c < *cp)
4256                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4257                if (c == *cp++)                if (c == *cp++)
4258                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4259                }                }
4260              }              }
4261            break;            break;
4262    
4263              case PT_UCNC:
4264              for (i = 1; i <= min; i++)
4265                {
4266                if (eptr >= md->end_subject)
4267                  {
4268                  SCHECK_PARTIAL();
4269                  RRETURN(MATCH_NOMATCH);
4270                  }
4271                GETCHARINCTEST(c, eptr);
4272                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4273                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4274                     c >= 0xe000) == prop_fail_result)
4275                  RRETURN(MATCH_NOMATCH);
4276                }
4277              break;
4278    
4279            /* This should not occur */            /* This should not occur */
4280    
4281            default:            default:
# Line 4430  for (;;) Line 4481  for (;;)
4481          case OP_DIGIT:          case OP_DIGIT:
4482          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4483            {            {
4484            pcre_uchar cc;            pcre_uint32 cc;
   
4485            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4486              {              {
4487              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4448  for (;;) Line 4498  for (;;)
4498          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4499          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4500            {            {
4501            pcre_uchar cc;            pcre_uint32 cc;
   
4502            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4503              {              {
4504              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4466  for (;;) Line 4515  for (;;)
4515          case OP_WHITESPACE:          case OP_WHITESPACE:
4516          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4517            {            {
4518            pcre_uchar cc;            pcre_uint32 cc;
   
4519            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4520              {              {
4521              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4484  for (;;) Line 4532  for (;;)
4532          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4533          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4534            {            {
4535            pcre_uchar cc;            pcre_uint32 cc;
   
4536            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4537              {              {
4538              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4502  for (;;) Line 4549  for (;;)
4549          case OP_WORDCHAR:          case OP_WORDCHAR:
4550          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4551            {            {
4552            pcre_uchar cc;            pcre_uint32 cc;
   
4553            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4554              {              {
4555              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4954  for (;;) Line 5000  for (;;)
5000    
5001            case PT_CLIST:            case PT_CLIST:
5002            for (fi = min;; fi++)            for (fi = min;; fi++)
5003              {              {
5004              const pcre_uint32 *cp;              const pcre_uint32 *cp;
5005              RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5006              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 4965  for (;;) Line 5011  for (;;)
5011                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5012                }                }
5013              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
5014              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5015              for (;;)              for (;;)
5016                {                {
5017                if (c < *cp)                if (c < *cp)
# Line 4976  for (;;) Line 5022  for (;;)
5022              }              }
5023            /* Control never gets here */            /* Control never gets here */
5024    
5025              case PT_UCNC:
5026              for (fi = min;; fi++)
5027                {
5028                RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
5029                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5030                if (fi >= max) RRETURN(MATCH_NOMATCH);
5031                if (eptr >= md->end_subject)
5032                  {
5033                  SCHECK_PARTIAL();
5034                  RRETURN(MATCH_NOMATCH);
5035                  }
5036                GETCHARINCTEST(c, eptr);
5037                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5038                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5039                     c >= 0xe000) == prop_fail_result)
5040                  RRETURN(MATCH_NOMATCH);
5041                }
5042              /* Control never gets here */
5043    
5044            /* This should never occur */            /* This should never occur */
5045            default:            default:
5046            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
# Line 5445  for (;;) Line 5510  for (;;)
5510              eptr+= len;              eptr+= len;
5511              }              }
5512            break;            break;
5513    
5514            case PT_CLIST:            case PT_CLIST:
5515            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5516              {              {
# Line 5457  for (;;) Line 5522  for (;;)
5522                break;                break;
5523                }                }
5524              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5525              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5526              for (;;)              for (;;)
5527                {                {
5528                if (c < *cp)                if (c < *cp)
5529                  { if (prop_fail_result) break; else goto GOT_MAX; }                  { if (prop_fail_result) break; else goto GOT_MAX; }
5530                if (c == *cp++)                if (c == *cp++)
5531                  { if (prop_fail_result) goto GOT_MAX; else break; }                  { if (prop_fail_result) goto GOT_MAX; else break; }
5532                }                }
5533              eptr += len;              eptr += len;
5534                }
5535              GOT_MAX:
5536              break;
5537    
5538              case PT_UCNC:
5539              for (i = min; i < max; i++)
5540                {
5541                int len = 1;
5542                if (eptr >= md->end_subject)
5543                  {
5544                  SCHECK_PARTIAL();
5545                  break;
5546                  }
5547                GETCHARLENTEST(c, eptr, len);
5548                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5549                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5550                     c >= 0xe000) == prop_fail_result)
5551                  break;
5552                eptr += len;
5553              }              }
           GOT_MAX:  
5554            break;            break;
5555    
5556            default:            default:
# Line 6111  switch (frame->Xwhere) Line 6194  switch (frame->Xwhere)
6194    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6195  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6196    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6197    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
6198  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6199  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6200    default:    default:
# Line 6264  const pcre_uint8 *start_bits = NULL; Line 6347  const pcre_uint8 *start_bits = NULL;
6347  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6348  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6349  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6350    PCRE_PUCHAR match_partial;
6351  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6352    
6353  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6542  if (re->top_backref > 0 && re->top_backr Line 6626  if (re->top_backref > 0 && re->top_backr
6626    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
6627    }    }
6628  else md->offset_vector = offsets;  else md->offset_vector = offsets;
   
6629  md->offset_end = ocount;  md->offset_end = ocount;
6630  md->offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
6631  md->offset_overflow = FALSE;  md->capture_last = 0;
 md->capture_last = -1;  
6632    
6633  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6634  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
# Line 6817  for(;;) Line 6899  for(;;)
6899    md->match_function_type = 0;    md->match_function_type = 0;
6900    md->end_offset_top = 0;    md->end_offset_top = 0;
6901    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6902    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
6903        {
6904        start_partial = md->start_used_ptr;
6905        match_partial = start_match;
6906        }
6907    
6908    switch(rc)    switch(rc)
6909      {      {
# Line 6943  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7029  if (rc == MATCH_MATCH || rc == MATCH_ACC
7029          (arg_offset_max - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
7030        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
7031        }        }
7032      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7033      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
7034      (PUBL(free))(md->offset_vector);      (PUBL(free))(md->offset_vector);
7035      }      }
# Line 6951  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7037  if (rc == MATCH_MATCH || rc == MATCH_ACC
7037    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
7038    too many to fit into the vector. */    too many to fit into the vector. */
7039    
7040    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?    rc = ((md->capture_last & OVFLBIT) != 0 &&
7041             md->end_offset_top >= arg_offset_max)?
7042      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
7043    
7044    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
# Line 7024  if (start_partial != NULL) Line 7111  if (start_partial != NULL)
7111      {      {
7112      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7113      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7114        if (offsetcount > 2)
7115          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7116      }      }
7117    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7118    }    }

Legend:
Removed from v.1189  
changed lines
  Added in v.1271

  ViewVC Help
Powered by ViewVC 1.1.5