/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 893 by ph10, Thu Jan 19 17:15:11 2012 UTC revision 915 by zherczeg, Tue Feb 14 13:05:39 2012 UTC
# Line 147  static int Line 147  static int
147  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,  match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
148    BOOL caseless)    BOOL caseless)
149  {  {
150  PCRE_PUCHAR eptr_start = eptr;  int matched_length = length;
151  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 186  if (caseless) Line 186  if (caseless)
186      reference, not along the subject (earlier code did this wrong). */      reference, not along the subject (earlier code did this wrong). */
187    
188      PCRE_PUCHAR endptr = p + length;      PCRE_PUCHAR endptr = p + length;
189        PCRE_PUCHAR eptr_start = eptr;
190      while (p < endptr)      while (p < endptr)
191        {        {
192        int c, d;        int c, d;
193        if (eptr >= md->end_subject) return -1;        if (eptr >= md->end_subject) return -((int)(eptr - eptr_start) + 1);
194        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
195        GETCHARINC(d, p);        GETCHARINC(d, p);
196        if (c != d && c != UCD_OTHERCASE(d)) return -1;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
197        }        }
198        matched_length = (int)(eptr - eptr_start);
199      }      }
200    else    else
201  #endif  #endif
# Line 202  if (caseless) Line 204  if (caseless)
204    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
205    is no UCP support. */    is no UCP support. */
206      {      {
207      if (eptr + length > md->end_subject) return -1;      if (eptr + length > md->end_subject)
208          {
209          if (md->partial == 0)
210            return -1;
211          length = (int)(md->end_subject - eptr);
212          matched_length = -(length + 1);
213          }
214      while (length-- > 0)      while (length-- > 0)
215        {        {
216        if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;        if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
# Line 217  are in UTF-8 mode. */ Line 225  are in UTF-8 mode. */
225    
226  else  else
227    {    {
228    if (eptr + length > md->end_subject) return -1;    if (eptr + length > md->end_subject)
229        {
230        if (md->partial == 0)
231          return -1;
232        length = (int)(md->end_subject - eptr);
233        matched_length = -(length + 1);
234        }
235    while (length-- > 0) if (*p++ != *eptr++) return -1;    while (length-- > 0) if (*p++ != *eptr++) return -1;
236    }    }
237    
238  return (int)(eptr - eptr_start);  return matched_length;
239  }  }
240    
241    
# Line 487  int condcode; Line 501  int condcode;
501  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
502  preserved over calls to RMATCH() are part of a "frame". We set up the top-level  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
503  frame on the stack here; subsequent instantiations are obtained from the heap  frame on the stack here; subsequent instantiations are obtained from the heap
504  whenever RMATCH() does a "recursion". See the macro definitions above. Putting  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
505  the top-level on the stack rather than malloc-ing them all gives a performance  the top-level on the stack rather than malloc-ing them all gives a performance
506  boost in many cases where there is not much "recursion". */  boost in many cases where there is not much "recursion". */
507    
508  #ifdef NO_RECURSE  #ifdef NO_RECURSE
509  heapframe frame_zero;  heapframe frame_zero;
510  heapframe *frame = &frame_zero;  heapframe *frame = &frame_zero;
511  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
512    
513  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 616  int stacksave[REC_STACK_SAVE_MAX]; Line 630  int stacksave[REC_STACK_SAVE_MAX];
630    
631  eptrblock newptrb;  eptrblock newptrb;
632    
633  /* There is a special fudge for calling match() in a way that causes it to  /* There is a special fudge for calling match() in a way that causes it to
634  measure the size of its basic stack frame when the stack is being used for  measure the size of its basic stack frame when the stack is being used for
635  recursion. The first argument (eptr) points to a pointer that is used  recursion. The second argument (ecode) being NULL triggers this behaviour. It
636  "statically" for doing the calculation. The second argument (ecode) being NULL  cannot normally ever be NULL. The return is the negated value of the frame
637  triggers this behaviour. It cannot normally every be NULL. The return is the  size. */
 negated value of the frame size. */  
638    
639  if (ecode == NULL)  if (ecode == NULL)
640    {    {
   char **aptr = (char **)eptr;  
641    if (rdepth == 0)    if (rdepth == 0)
642      {      return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
     *aptr = (char *)&rdepth;  
     return match(eptr, NULL, NULL, 0, NULL, NULL, 1);  
     }  
643    else    else
644      {      {
645      int len = (char *)&rdepth - *aptr;      int len = (char *)&rdepth - (char *)eptr;
646      return (len > 0)? -len : len;      return (len > 0)? -len : len;
647      }      }
648    }    }
649  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
650    
651  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 843  for (;;) Line 852  for (;;)
852      case OP_ONCE_NC:      case OP_ONCE_NC:
853      prev = ecode;      prev = ecode;
854      saved_eptr = eptr;      saved_eptr = eptr;
855      save_mark = md->mark;      save_mark = md->mark;
856      do      do
857        {        {
858        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 862  for (;;) Line 871  for (;;)
871    
872        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
873        ecode += GET(ecode,1);        ecode += GET(ecode,1);
874        md->mark = save_mark;        md->mark = save_mark;
875        }        }
876      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
877    
# Line 942  for (;;) Line 951  for (;;)
951        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
952        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
953        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
954        save_mark = md->mark;        save_mark = md->mark;
955    
956        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
957        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 1048  for (;;) Line 1057  for (;;)
1057        save_mark = md->mark;        save_mark = md->mark;
1058        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1059          RM2);          RM2);
1060    
1061        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1062        THEN. */        THEN. */
1063    
# Line 1075  for (;;) Line 1084  for (;;)
1084          RRETURN(rrc);          RRETURN(rrc);
1085          }          }
1086        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1087        md->mark = save_mark;        md->mark = save_mark;
1088        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1089        }        }
1090    
# Line 1554  for (;;) Line 1563  for (;;)
1563    
1564      case OP_ASSERT:      case OP_ASSERT:
1565      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1566      save_mark = md->mark;      save_mark = md->mark;
1567      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1568        {        {
1569        condassert = TRUE;        condassert = TRUE;
# Line 1576  for (;;) Line 1585  for (;;)
1585    
1586        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1587        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1588        md->mark = save_mark;        md->mark = save_mark;
1589        }        }
1590      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1591    
# Line 1600  for (;;) Line 1609  for (;;)
1609    
1610      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1611      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1612      save_mark = md->mark;      save_mark = md->mark;
1613      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1614        {        {
1615        condassert = TRUE;        condassert = TRUE;
# Line 1611  for (;;) Line 1620  for (;;)
1620      do      do
1621        {        {
1622        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1623        md->mark = save_mark;        md->mark = save_mark;
1624        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1625        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1626          {          {
# Line 2600  for (;;) Line 2609  for (;;)
2609        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2610        eptr += len;        eptr += len;
2611        }        }
2612        if (md->partial != 0 && eptr >= md->end_subject)
2613          {
2614          SCHECK_PARTIAL();
2615          }
2616      ecode++;      ecode++;
2617      break;      break;
2618  #endif  #endif
# Line 2665  for (;;) Line 2678  for (;;)
2678        default:               /* No repeat follows */        default:               /* No repeat follows */
2679        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2680          {          {
2681            eptr += -(length + 1);
2682          CHECK_PARTIAL();          CHECK_PARTIAL();
2683          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2684          }          }
# Line 2690  for (;;) Line 2704  for (;;)
2704        int slength;        int slength;
2705        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2706          {          {
2707            eptr += -(slength + 1);
2708          CHECK_PARTIAL();          CHECK_PARTIAL();
2709          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2710          }          }
# Line 2713  for (;;) Line 2728  for (;;)
2728          if (fi >= max) RRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2729          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2730            {            {
2731              eptr += -(slength + 1);
2732            CHECK_PARTIAL();            CHECK_PARTIAL();
2733            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2734            }            }
# Line 2731  for (;;) Line 2747  for (;;)
2747          int slength;          int slength;
2748          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2749            {            {
2750              /* Restore the eptr after the check. */
2751              eptr += -(slength + 1);
2752            CHECK_PARTIAL();            CHECK_PARTIAL();
2753              eptr -= -(slength + 1);
2754            break;            break;
2755            }            }
2756          eptr += slength;          eptr += slength;
# Line 3513  for (;;) Line 3532  for (;;)
3532      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3533      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3534        {        {
3535        register int ch, och;        register unsigned int ch, och;
3536        ch = *ecode++;        ch = *ecode++;
3537  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3538        /* ch must be < 128 if UTF is enabled. */        /* ch must be < 128 if UTF is enabled. */
# Line 3659  for (;;) Line 3678  for (;;)
3678              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3679              }              }
3680            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3681            if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);            if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
3682            }            }
3683          }          }
3684        else        else
# Line 3697  for (;;) Line 3716  for (;;)
3716                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3717                }                }
3718              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3719              if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3720              }              }
3721            }            }
3722          else          else
# Line 3740  for (;;) Line 3759  for (;;)
3759                break;                break;
3760                }                }
3761              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3762              if (fc == d || foc == d) break;              if (fc == d || (unsigned int)foc == d) break;
3763              eptr += len;              eptr += len;
3764              }              }
3765            if (possessive) continue;            if (possessive) continue;
# Line 4170  for (;;) Line 4189  for (;;)
4189              eptr += len;              eptr += len;
4190              }              }
4191            }            }
4192            if (md->partial != 0 && eptr >= md->end_subject)
4193              {
4194              SCHECK_PARTIAL();
4195              }
4196          }          }
4197    
4198        else        else
# Line 4953  for (;;) Line 4976  for (;;)
4976              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4977              eptr += len;              eptr += len;
4978              }              }
4979              if (md->partial != 0 && eptr >= md->end_subject)
4980                {
4981                SCHECK_PARTIAL();
4982                }
4983            }            }
4984          }          }
4985        else        else
# Line 5496  for (;;) Line 5523  for (;;)
5523              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5524              eptr += len;              eptr += len;
5525              }              }
5526              if (eptr >= md->end_subject)
5527                {
5528                SCHECK_PARTIAL();
5529                }
5530            }            }
5531    
5532          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 6212  PCRE_PUCHAR req_char_ptr = start_match - Line 6243  PCRE_PUCHAR req_char_ptr = start_match -
6243  const pcre_study_data *study;  const pcre_study_data *study;
6244  const REAL_PCRE *re = (const REAL_PCRE *)argument_re;  const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6245    
6246  /* Check for the special magic call that measures the size of the stack used  /* Check for the special magic call that measures the size of the stack used
6247  per recursive call of match(). */  per recursive call of match(). Without the funny casting for sizeof, a Windows
6248    compiler gave this error: "unary minus operator applied to unsigned type,
6249    result still unsigned". Hopefully the cast fixes that. */
6250    
6251  if (re == NULL && extra_data == NULL && subject == NULL && length == -1)  if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6252        start_offset == -999)
6253  #ifdef NO_RECURSE  #ifdef NO_RECURSE
6254    return -sizeof(heapframe);    return -((int)sizeof(heapframe));
6255  #else  #else
6256    return match((PCRE_PUCHAR)&start_partial, NULL, NULL, 0, NULL, NULL, 0);    return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6257  #endif  #endif
6258    
6259  /* Plausibility checks */  /* Plausibility checks */
6260    
6261  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6262  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6263    return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6264  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6265  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
# Line 6289  matching. */ Line 6323  matching. */
6323    
6324  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
6325  if (extra_data != NULL  if (extra_data != NULL
6326      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6327                                 PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6328      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
     && (extra_data->flags & PCRE_EXTRA_TABLES) == 0  
6329      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6330                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6331    return PRIV(jit_exec)(re, extra_data->executable_jit,                      PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6332      {
6333      rc = PRIV(jit_exec)(re, extra_data->executable_jit,
6334      (const pcre_uchar *)subject, length, start_offset, options,      (const pcre_uchar *)subject, length, start_offset, options,
6335      ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)      ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6336      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6337      /* PCRE_ERROR_NULL means that the selected normal or partial matching
6338      mode is not compiled. In this case we simply fallback to interpreter. */
6339      if (rc != PCRE_ERROR_NULL)
6340        return rc;
6341      }
6342  #endif  #endif
6343    
6344  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
# Line 6500  if (!anchored) Line 6541  if (!anchored)
6541    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6542      {      {
6543      has_first_char = TRUE;      has_first_char = TRUE;
6544      first_char = first_char2 = re->first_char;      first_char = first_char2 = (pcre_uchar)(re->first_char);
6545      if ((re->flags & PCRE_FCH_CASELESS) != 0)      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6546        {        {
6547        first_char2 = TABLE_GET(first_char, md->fcc, first_char);        first_char2 = TABLE_GET(first_char, md->fcc, first_char);
# Line 6522  character" set. */ Line 6563  character" set. */
6563  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6564    {    {
6565    has_req_char = TRUE;    has_req_char = TRUE;
6566    req_char = req_char2 = re->req_char;    req_char = req_char2 = (pcre_uchar)(re->req_char);
6567    if ((re->flags & PCRE_RCH_CASELESS) != 0)    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6568      {      {
6569      req_char2 = TABLE_GET(req_char, md->fcc, req_char);      req_char2 = TABLE_GET(req_char, md->fcc, req_char);

Legend:
Removed from v.893  
changed lines
  Added in v.915

  ViewVC Help
Powered by ViewVC 1.1.5