/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1189 by ph10, Tue Oct 30 16:34:17 2012 UTC revision 1251 by ph10, Wed Feb 20 17:42:03 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59    /* The md->capture_last field uses the lower 16 bits for the last captured
60    substring (which can never be greater than 65535) and a bit in the top half
61    to mean "capture vector overflowed". This odd way of doing things was
62    implemented when it was realized that preserving and restoring the overflow bit
63    whenever the last capture number was saved/restored made for a neater
64    interface, and doing it this way saved on (a) another variable, which would
65    have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66    separate set of save/restore instructions. The following defines are used in
67    implementing this. */
68    
69    #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70    #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71    #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72    
73  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
74  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
75  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
# Line 199  if (caseless) Line 213  if (caseless)
213        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
214        GETCHARINC(d, p);        GETCHARINC(d, p);
215        ur = GET_UCD(d);        ur = GET_UCD(d);
216        if (c != d && c != d + ur->other_case)        if (c != d && c != d + ur->other_case)
217          {          {
218          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
219          for (;;)          for (;;)
220            {            {
221            if (c < *pp) return -1;            if (c < *pp) return -1;
# Line 416  typedef struct heapframe { Line 430  typedef struct heapframe {
430    int Xlength;    int Xlength;
431    int Xmax;    int Xmax;
432    int Xmin;    int Xmin;
433    int Xnumber;    unsigned int Xnumber;
434    int Xoffset;    int Xoffset;
435    int Xop;    unsigned int Xop;
436    int Xsave_capture_last;    pcre_int32 Xsave_capture_last;
437    int Xsave_offset1, Xsave_offset2, Xsave_offset3;    int Xsave_offset1, Xsave_offset2, Xsave_offset3;
438    int Xstacksave[REC_STACK_SAVE_MAX];    int Xstacksave[REC_STACK_SAVE_MAX];
439    
# Line 634  int max; Line 648  int max;
648  int min;  int min;
649  unsigned int number;  unsigned int number;
650  int offset;  int offset;
651  pcre_uchar op;  unsigned int op;
652  int save_capture_last;  pcre_int32 save_capture_last;
653  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
654  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
655    
# Line 1066  for (;;) Line 1080  for (;;)
1080        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1081    
1082        save_mark = md->mark;        save_mark = md->mark;
1083          save_capture_last = md->capture_last;
1084        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1085          RM2);          RM2);
1086    
# Line 1097  for (;;) Line 1112  for (;;)
1112        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1113        md->mark = save_mark;        md->mark = save_mark;
1114        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1115          md->capture_last = save_capture_last;
1116        }        }
1117    
1118      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
# Line 1218  for (;;) Line 1234  for (;;)
1234      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1235      matched_once = FALSE;      matched_once = FALSE;
1236      code_offset = (int)(ecode - md->start_code);      code_offset = (int)(ecode - md->start_code);
1237        save_capture_last = md->capture_last;
1238    
1239      for (;;)      for (;;)
1240        {        {
# Line 1247  for (;;) Line 1264  for (;;)
1264        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1265        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1266        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1267          md->capture_last = save_capture_last;
1268        }        }
1269    
1270      if (matched_once || allow_zero)      if (matched_once || allow_zero)
# Line 1291  for (;;) Line 1309  for (;;)
1309          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1310          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1311          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1312          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last & CAPLMASK;
1313            /* Internal change requires this for API compatibility. */
1314            if (cb.capture_last == 0) cb.capture_last = -1;
1315          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1316          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1317          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1513  for (;;) Line 1533  for (;;)
1533      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1534    
1535      case OP_CLOSE:      case OP_CLOSE:
1536      number = GET2(ecode, 1);      number = GET2(ecode, 1);   /* Must be less than 65536 */
1537      offset = number << 1;      offset = number << 1;
1538    
1539  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 1521  for (;;) Line 1541  for (;;)
1541        printf("\n");        printf("\n");
1542  #endif  #endif
1543    
1544      md->capture_last = number;      md->capture_last = (md->capture_last & OVFLMASK) | number;
1545      if (offset >= md->offset_max) md->offset_overflow = TRUE; else      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1546        {        {
1547        md->offset_vector[offset] =        md->offset_vector[offset] =
1548          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
# Line 1716  for (;;) Line 1736  for (;;)
1736        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1737        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1738        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1739        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last & CAPLMASK;
1740          /* Internal change requires this for API compatibility. */
1741          if (cb.capture_last == 0) cb.capture_last = -1;
1742        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1743        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1744        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1762  for (;;) Line 1784  for (;;)
1784        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1785    
1786        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1787          new_recursive.saved_capture_last = md->capture_last;
1788        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1789        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1790        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1785  for (;;) Line 1808  for (;;)
1808              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1809    
1810        /* OK, now we can do the recursion. After processing each alternative,        /* OK, now we can do the recursion. After processing each alternative,
1811        restore the offset data. If there were nested recursions, md->recursive        restore the offset data and the last captured value. If there were nested
1812        might be changed, so reset it before looping. */        recursions, md->recursive might be changed, so reset it before looping.
1813          */
1814    
1815        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1816        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
# Line 1797  for (;;) Line 1821  for (;;)
1821            md, eptrb, RM6);            md, eptrb, RM6);
1822          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1823              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1824            md->capture_last = new_recursive.saved_capture_last;
1825          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1826          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1827            {            {
# Line 1947  for (;;) Line 1972  for (;;)
1972    
1973        /* Deal with capturing */        /* Deal with capturing */
1974    
1975        md->capture_last = number;        md->capture_last = (md->capture_last & OVFLMASK) | number;
1976        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1977          {          {
1978          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
1979          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
# Line 2532  for (;;) Line 2557  for (;;)
2557        }        }
2558      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2559        {        {
2560        const pcre_uint32 *cp;        const pcre_uint32 *cp;
2561        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2562    
2563        switch(ecode[1])        switch(ecode[1])
# Line 2594  for (;;) Line 2619  for (;;)
2619          break;          break;
2620    
2621          case PT_CLIST:          case PT_CLIST:
2622          cp = PRIV(ucd_caseless_sets) + prop->caseset;          cp = PRIV(ucd_caseless_sets) + ecode[2];
2623          for (;;)          for (;;)
2624            {            {
2625            if (c < *cp)            if (c < *cp)
# Line 3439  for (;;) Line 3464  for (;;)
3464    
3465        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3466          {          {
3467          pcre_uchar cc;          pcre_uint32 cc;                 /* Faster than pcre_uchar */
   
3468          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3469            {            {
3470            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 3455  for (;;) Line 3479  for (;;)
3479          {          {
3480          for (fi = min;; fi++)          for (fi = min;; fi++)
3481            {            {
3482            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3483            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3484            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3485            if (fi >= max) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
# Line 3476  for (;;) Line 3499  for (;;)
3499          pp = eptr;          pp = eptr;
3500          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3501            {            {
3502            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3503            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3504              {              {
3505              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4203  for (;;) Line 4225  for (;;)
4225                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4226              }              }
4227            break;            break;
4228    
4229            case PT_CLIST:            case PT_CLIST:
4230            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4231              {              {
4232              const pcre_uint32 *cp;              const pcre_uint32 *cp;
4233              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4234                {                {
# Line 4214  for (;;) Line 4236  for (;;)
4236                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4237                }                }
4238              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4239              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
4240              for (;;)              for (;;)
4241                {                {
4242                if (c < *cp)                if (c < *cp)
4243                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4244                if (c == *cp++)                if (c == *cp++)
4245                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4246                }                }
4247              }              }
4248            break;            break;
4249    
4250            /* This should not occur */            /* This should not occur */
4251    
4252            default:            default:
# Line 4954  for (;;) Line 4976  for (;;)
4976    
4977            case PT_CLIST:            case PT_CLIST:
4978            for (fi = min;; fi++)            for (fi = min;; fi++)
4979              {              {
4980              const pcre_uint32 *cp;              const pcre_uint32 *cp;
4981              RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
4982              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 4965  for (;;) Line 4987  for (;;)
4987                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4988                }                }
4989              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4990              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
4991              for (;;)              for (;;)
4992                {                {
4993                if (c < *cp)                if (c < *cp)
# Line 5445  for (;;) Line 5467  for (;;)
5467              eptr+= len;              eptr+= len;
5468              }              }
5469            break;            break;
5470    
5471            case PT_CLIST:            case PT_CLIST:
5472            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5473              {              {
# Line 5457  for (;;) Line 5479  for (;;)
5479                break;                break;
5480                }                }
5481              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5482              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5483              for (;;)              for (;;)
5484                {                {
5485                if (c < *cp)                if (c < *cp)
5486                  { if (prop_fail_result) break; else goto GOT_MAX; }                  { if (prop_fail_result) break; else goto GOT_MAX; }
5487                if (c == *cp++)                if (c == *cp++)
5488                  { if (prop_fail_result) goto GOT_MAX; else break; }                  { if (prop_fail_result) goto GOT_MAX; else break; }
5489                }                }
5490              eptr += len;              eptr += len;
5491              }              }
5492            GOT_MAX:            GOT_MAX:
5493            break;            break;
5494    
5495            default:            default:
# Line 6264  const pcre_uint8 *start_bits = NULL; Line 6286  const pcre_uint8 *start_bits = NULL;
6286  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6287  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6288  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6289    PCRE_PUCHAR match_partial;
6290  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6291    
6292  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6542  if (re->top_backref > 0 && re->top_backr Line 6565  if (re->top_backref > 0 && re->top_backr
6565    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
6566    }    }
6567  else md->offset_vector = offsets;  else md->offset_vector = offsets;
   
6568  md->offset_end = ocount;  md->offset_end = ocount;
6569  md->offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
6570  md->offset_overflow = FALSE;  md->capture_last = 0;
 md->capture_last = -1;  
6571    
6572  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6573  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
# Line 6817  for(;;) Line 6838  for(;;)
6838    md->match_function_type = 0;    md->match_function_type = 0;
6839    md->end_offset_top = 0;    md->end_offset_top = 0;
6840    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6841    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
6842        {
6843        start_partial = md->start_used_ptr;
6844        match_partial = start_match;
6845        }
6846    
6847    switch(rc)    switch(rc)
6848      {      {
# Line 6943  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6968  if (rc == MATCH_MATCH || rc == MATCH_ACC
6968          (arg_offset_max - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
6969        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
6970        }        }
6971      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
6972      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
6973      (PUBL(free))(md->offset_vector);      (PUBL(free))(md->offset_vector);
6974      }      }
# Line 6951  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6976  if (rc == MATCH_MATCH || rc == MATCH_ACC
6976    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
6977    too many to fit into the vector. */    too many to fit into the vector. */
6978    
6979    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?    rc = ((md->capture_last & OVFLBIT) != 0 &&
6980             md->end_offset_top >= arg_offset_max)?
6981      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
6982    
6983    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
# Line 7024  if (start_partial != NULL) Line 7050  if (start_partial != NULL)
7050      {      {
7051      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7052      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7053        if (offsetcount > 2)
7054          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7055      }      }
7056    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7057    }    }

Legend:
Removed from v.1189  
changed lines
  Added in v.1251

  ViewVC Help
Powered by ViewVC 1.1.5