/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1155 by ph10, Sun Oct 21 17:13:28 2012 UTC revision 1265 by ph10, Sun Mar 3 10:42:46 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59    /* The md->capture_last field uses the lower 16 bits for the last captured
60    substring (which can never be greater than 65535) and a bit in the top half
61    to mean "capture vector overflowed". This odd way of doing things was
62    implemented when it was realized that preserving and restoring the overflow bit
63    whenever the last capture number was saved/restored made for a neater
64    interface, and doing it this way saved on (a) another variable, which would
65    have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66    separate set of save/restore instructions. The following defines are used in
67    implementing this. */
68    
69    #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70    #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71    #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72    
73  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
74  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
75  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
# Line 199  if (caseless) Line 213  if (caseless)
213        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
214        GETCHARINC(d, p);        GETCHARINC(d, p);
215        ur = GET_UCD(d);        ur = GET_UCD(d);
216        if (c != d && c != d + ur->other_case)        if (c != d && c != d + ur->other_case)
217          {          {
218          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
219          for (;;)          for (;;)
220            {            {
221            if (c < *pp) return -1;            if (c < *pp) return -1;
# Line 219  if (caseless) Line 233  if (caseless)
233      {      {
234      while (length-- > 0)      while (length-- > 0)
235        {        {
236        pcre_uchar cc, cp;        pcre_uint32 cc, cp;
237        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
238        cc = RAWUCHARTEST(eptr);        cc = RAWUCHARTEST(eptr);
239        cp = RAWUCHARTEST(p);        cp = RAWUCHARTEST(p);
# Line 294  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 308  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
308         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
309         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
310         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
311         RM61,  RM62, RM63, RM64, RM65, RM66 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
312    
313  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
314  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 416  typedef struct heapframe { Line 430  typedef struct heapframe {
430    int Xlength;    int Xlength;
431    int Xmax;    int Xmax;
432    int Xmin;    int Xmin;
433    int Xnumber;    unsigned int Xnumber;
434    int Xoffset;    int Xoffset;
435    int Xop;    unsigned int Xop;
436    int Xsave_capture_last;    pcre_int32 Xsave_capture_last;
437    int Xsave_offset1, Xsave_offset2, Xsave_offset3;    int Xsave_offset1, Xsave_offset2, Xsave_offset3;
438    int Xstacksave[REC_STACK_SAVE_MAX];    int Xstacksave[REC_STACK_SAVE_MAX];
439    
# Line 634  int max; Line 648  int max;
648  int min;  int min;
649  unsigned int number;  unsigned int number;
650  int offset;  int offset;
651  pcre_uchar op;  unsigned int op;
652  int save_capture_last;  pcre_int32 save_capture_last;
653  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
654  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
655    
# Line 1066  for (;;) Line 1080  for (;;)
1080        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1081    
1082        save_mark = md->mark;        save_mark = md->mark;
1083          save_capture_last = md->capture_last;
1084        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1085          RM2);          RM2);
1086    
# Line 1097  for (;;) Line 1112  for (;;)
1112        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1113        md->mark = save_mark;        md->mark = save_mark;
1114        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1115          md->capture_last = save_capture_last;
1116        }        }
1117    
1118      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
# Line 1218  for (;;) Line 1234  for (;;)
1234      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1235      matched_once = FALSE;      matched_once = FALSE;
1236      code_offset = (int)(ecode - md->start_code);      code_offset = (int)(ecode - md->start_code);
1237        save_capture_last = md->capture_last;
1238    
1239      for (;;)      for (;;)
1240        {        {
# Line 1247  for (;;) Line 1264  for (;;)
1264        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1265        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1266        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1267          md->capture_last = save_capture_last;
1268        }        }
1269    
1270      if (matched_once || allow_zero)      if (matched_once || allow_zero)
# Line 1291  for (;;) Line 1309  for (;;)
1309          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1310          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1311          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1312          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last & CAPLMASK;
1313            /* Internal change requires this for API compatibility. */
1314            if (cb.capture_last == 0) cb.capture_last = -1;
1315          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1316          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1317          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1318          if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
1319          }          }
1320        ecode += PRIV(OP_lengths)[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
1321          codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1322        }        }
1323    
1324      condcode = ecode[LINK_SIZE+1];      condcode = ecode[LINK_SIZE+1];
# Line 1513  for (;;) Line 1534  for (;;)
1534      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1535    
1536      case OP_CLOSE:      case OP_CLOSE:
1537      number = GET2(ecode, 1);      number = GET2(ecode, 1);   /* Must be less than 65536 */
1538      offset = number << 1;      offset = number << 1;
1539    
1540  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 1521  for (;;) Line 1542  for (;;)
1542        printf("\n");        printf("\n");
1543  #endif  #endif
1544    
1545      md->capture_last = number;      md->capture_last = (md->capture_last & OVFLMASK) | number;
1546      if (offset >= md->offset_max) md->offset_overflow = TRUE; else      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1547        {        {
1548        md->offset_vector[offset] =        md->offset_vector[offset] =
1549          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
# Line 1716  for (;;) Line 1737  for (;;)
1737        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1738        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1739        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1740        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last & CAPLMASK;
1741          /* Internal change requires this for API compatibility. */
1742          if (cb.capture_last == 0) cb.capture_last = -1;
1743        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1744        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1745        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1762  for (;;) Line 1785  for (;;)
1785        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1786    
1787        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1788          new_recursive.saved_capture_last = md->capture_last;
1789        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1790        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1791        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1785  for (;;) Line 1809  for (;;)
1809              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1810    
1811        /* OK, now we can do the recursion. After processing each alternative,        /* OK, now we can do the recursion. After processing each alternative,
1812        restore the offset data. If there were nested recursions, md->recursive        restore the offset data and the last captured value. If there were nested
1813        might be changed, so reset it before looping. */        recursions, md->recursive might be changed, so reset it before looping.
1814          */
1815    
1816        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1817        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
# Line 1797  for (;;) Line 1822  for (;;)
1822            md, eptrb, RM6);            md, eptrb, RM6);
1823          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1824              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1825            md->capture_last = new_recursive.saved_capture_last;
1826          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1827          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1828            {            {
# Line 1947  for (;;) Line 1973  for (;;)
1973    
1974        /* Deal with capturing */        /* Deal with capturing */
1975    
1976        md->capture_last = number;        md->capture_last = (md->capture_last & OVFLMASK) | number;
1977        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1978          {          {
1979          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
1980          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
# Line 2532  for (;;) Line 2558  for (;;)
2558        }        }
2559      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2560        {        {
2561        const pcre_uint32 *cp;        const pcre_uint32 *cp;
2562        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2563    
2564        switch(ecode[1])        switch(ecode[1])
# Line 2594  for (;;) Line 2620  for (;;)
2620          break;          break;
2621    
2622          case PT_CLIST:          case PT_CLIST:
2623          cp = PRIV(ucd_caseless_sets) + prop->caseset;          cp = PRIV(ucd_caseless_sets) + ecode[2];
2624          for (;;)          for (;;)
2625            {            {
2626            if (c < *cp)            if (c < *cp)
# Line 2603  for (;;) Line 2629  for (;;)
2629              { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }              { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2630            }            }
2631          break;          break;
2632    
2633            case PT_UCNC:
2634            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2635                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2636                 c >= 0xe000) == (op == OP_NOTPROP))
2637              RRETURN(MATCH_NOMATCH);
2638            break;
2639    
2640          /* This should never occur */          /* This should never occur */
2641    
# Line 3190  for (;;) Line 3223  for (;;)
3223    
3224        if (fc < 128)        if (fc < 128)
3225          {          {
3226          pcre_uchar cc = RAWUCHAR(eptr);          pcre_uint32 cc = RAWUCHAR(eptr);
3227          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3228          ecode++;          ecode++;
3229          eptr++;          eptr++;
# Line 3439  for (;;) Line 3472  for (;;)
3472    
3473        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3474          {          {
3475          pcre_uchar cc;          pcre_uint32 cc;                 /* Faster than pcre_uchar */
   
3476          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3477            {            {
3478            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 3455  for (;;) Line 3487  for (;;)
3487          {          {
3488          for (fi = min;; fi++)          for (fi = min;; fi++)
3489            {            {
3490            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3491            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3492            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3493            if (fi >= max) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
# Line 3476  for (;;) Line 3507  for (;;)
3507          pp = eptr;          pp = eptr;
3508          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3509            {            {
3510            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3511            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3512              {              {
3513              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4203  for (;;) Line 4233  for (;;)
4233                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4234              }              }
4235            break;            break;
4236    
4237            case PT_CLIST:            case PT_CLIST:
4238            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4239              {              {
4240              const pcre_uint32 *cp;              const pcre_uint32 *cp;
4241              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4242                {                {
# Line 4214  for (;;) Line 4244  for (;;)
4244                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4245                }                }
4246              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4247              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
4248              for (;;)              for (;;)
4249                {                {
4250                if (c < *cp)                if (c < *cp)
4251                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4252                if (c == *cp++)                if (c == *cp++)
4253                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4254                }                }
4255              }              }
4256            break;            break;
4257    
4258              case PT_UCNC:
4259              for (i = 1; i <= min; i++)
4260                {
4261                if (eptr >= md->end_subject)
4262                  {
4263                  SCHECK_PARTIAL();
4264                  RRETURN(MATCH_NOMATCH);
4265                  }
4266                GETCHARINCTEST(c, eptr);
4267                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4268                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4269                     c >= 0xe000) == prop_fail_result)
4270                  RRETURN(MATCH_NOMATCH);
4271                }
4272              break;
4273    
4274            /* This should not occur */            /* This should not occur */
4275    
4276            default:            default:
# Line 4430  for (;;) Line 4476  for (;;)
4476          case OP_DIGIT:          case OP_DIGIT:
4477          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4478            {            {
4479            pcre_uchar cc;            pcre_uint32 cc;
   
4480            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4481              {              {
4482              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4448  for (;;) Line 4493  for (;;)
4493          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4494          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4495            {            {
4496            pcre_uchar cc;            pcre_uint32 cc;
   
4497            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4498              {              {
4499              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4466  for (;;) Line 4510  for (;;)
4510          case OP_WHITESPACE:          case OP_WHITESPACE:
4511          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4512            {            {
4513            pcre_uchar cc;            pcre_uint32 cc;
   
4514            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4515              {              {
4516              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4484  for (;;) Line 4527  for (;;)
4527          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4528          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4529            {            {
4530            pcre_uchar cc;            pcre_uint32 cc;
   
4531            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4532              {              {
4533              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4502  for (;;) Line 4544  for (;;)
4544          case OP_WORDCHAR:          case OP_WORDCHAR:
4545          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4546            {            {
4547            pcre_uchar cc;            pcre_uint32 cc;
   
4548            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4549              {              {
4550              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4954  for (;;) Line 4995  for (;;)
4995    
4996            case PT_CLIST:            case PT_CLIST:
4997            for (fi = min;; fi++)            for (fi = min;; fi++)
4998              {              {
4999              const pcre_uint32 *cp;              const pcre_uint32 *cp;
5000              RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5001              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5002              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
5003              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4965  for (;;) Line 5006  for (;;)
5006                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5007                }                }
5008              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
5009              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5010              for (;;)              for (;;)
5011                {                {
5012                if (c < *cp)                if (c < *cp)
# Line 4975  for (;;) Line 5016  for (;;)
5016                }                }
5017              }              }
5018            /* Control never gets here */            /* Control never gets here */
5019    
5020              case PT_UCNC:
5021              for (fi = min;; fi++)
5022                {
5023                RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
5024                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5025                if (fi >= max) RRETURN(MATCH_NOMATCH);
5026                if (eptr >= md->end_subject)
5027                  {
5028                  SCHECK_PARTIAL();
5029                  RRETURN(MATCH_NOMATCH);
5030                  }
5031                GETCHARINCTEST(c, eptr);
5032                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5033                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5034                     c >= 0xe000) == prop_fail_result)
5035                  RRETURN(MATCH_NOMATCH);
5036                }
5037              /* Control never gets here */
5038    
5039            /* This should never occur */            /* This should never occur */
5040            default:            default:
# Line 5445  for (;;) Line 5505  for (;;)
5505              eptr+= len;              eptr+= len;
5506              }              }
5507            break;            break;
5508    
5509            case PT_CLIST:            case PT_CLIST:
5510            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5511              {              {
# Line 5457  for (;;) Line 5517  for (;;)
5517                break;                break;
5518                }                }
5519              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5520              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5521              for (;;)              for (;;)
5522                {                {
5523                if (c < *cp)                if (c < *cp)
5524                  { if (prop_fail_result) break; else goto GOT_MAX; }                  { if (prop_fail_result) break; else goto GOT_MAX; }
5525                if (c == *cp++)                if (c == *cp++)
5526                  { if (prop_fail_result) goto GOT_MAX; else break; }                  { if (prop_fail_result) goto GOT_MAX; else break; }
5527                }                }
5528              eptr += len;              eptr += len;
5529              }              }
5530            GOT_MAX:            GOT_MAX:
5531              break;
5532    
5533              case PT_UCNC:
5534              for (i = min; i < max; i++)
5535                {
5536                int len = 1;
5537                if (eptr >= md->end_subject)
5538                  {
5539                  SCHECK_PARTIAL();
5540                  break;
5541                  }
5542                GETCHARLENTEST(c, eptr, len);
5543                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5544                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5545                     c >= 0xe000) == prop_fail_result)
5546                  break;
5547                eptr += len;
5548                }
5549            break;            break;
5550    
5551            default:            default:
# Line 6111  switch (frame->Xwhere) Line 6189  switch (frame->Xwhere)
6189    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6190  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6191    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6192    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
6193  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6194  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6195    default:    default:
6196    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
   
 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);  
   
6197    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6198    }    }
6199  #undef LBL  #undef LBL
# Line 6267  const pcre_uint8 *start_bits = NULL; Line 6342  const pcre_uint8 *start_bits = NULL;
6342  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6343  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6344  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6345    PCRE_PUCHAR match_partial;
6346  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6347    
6348  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6298  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 6374  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
6374  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6375    return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6376  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6377    if (length < 0) return PCRE_ERROR_BADLENGTH;
6378  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6379    
6380  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 6364  if (extra_data != NULL Line 6441  if (extra_data != NULL
6441      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6442                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6443      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
6444      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |  
                     PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)  
6445    {    {
6446    rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,    rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6447         start_offset, options, offsets, offsetcount);         start_offset, options, offsets, offsetcount);
6448    
6449    /* PCRE_ERROR_NULL means that the selected normal or partial matching    /* PCRE_ERROR_NULL means that the selected normal or partial matching
6450    mode is not compiled. In this case we simply fallback to interpreter. */    mode is not compiled. In this case we simply fallback to interpreter. */
6451    
6452    if (rc != PCRE_ERROR_NULL) return rc;    if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6453    }    }
6454  #endif  #endif
6455    
# Line 6546  if (re->top_backref > 0 && re->top_backr Line 6621  if (re->top_backref > 0 && re->top_backr
6621    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
6622    }    }
6623  else md->offset_vector = offsets;  else md->offset_vector = offsets;
   
6624  md->offset_end = ocount;  md->offset_end = ocount;
6625  md->offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
6626  md->offset_overflow = FALSE;  md->capture_last = 0;
 md->capture_last = -1;  
6627    
6628  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6629  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
# Line 6821  for(;;) Line 6894  for(;;)
6894    md->match_function_type = 0;    md->match_function_type = 0;
6895    md->end_offset_top = 0;    md->end_offset_top = 0;
6896    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6897    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
6898        {
6899        start_partial = md->start_used_ptr;
6900        match_partial = start_match;
6901        }
6902    
6903    switch(rc)    switch(rc)
6904      {      {
# Line 6947  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7024  if (rc == MATCH_MATCH || rc == MATCH_ACC
7024          (arg_offset_max - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
7025        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
7026        }        }
7027      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7028      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
7029      (PUBL(free))(md->offset_vector);      (PUBL(free))(md->offset_vector);
7030      }      }
# Line 6955  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7032  if (rc == MATCH_MATCH || rc == MATCH_ACC
7032    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
7033    too many to fit into the vector. */    too many to fit into the vector. */
7034    
7035    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?    rc = ((md->capture_last & OVFLBIT) != 0 &&
7036             md->end_offset_top >= arg_offset_max)?
7037      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
7038    
7039    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
# Line 7028  if (start_partial != NULL) Line 7106  if (start_partial != NULL)
7106      {      {
7107      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7108      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7109        if (offsetcount > 2)
7110          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7111      }      }
7112    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7113    }    }

Legend:
Removed from v.1155  
changed lines
  Added in v.1265

  ViewVC Help
Powered by ViewVC 1.1.5