/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 842 by ph10, Sat Dec 31 15:19:04 2011 UTC revision 893 by ph10, Thu Jan 19 17:15:11 2012 UTC
# Line 332  argument of match(), which never changes Line 332  argument of match(), which never changes
332    {\    {\
333    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
334    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
335    (PUBL(stack_free))(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
336    if (frame != NULL)\    if (frame != NULL)\
337      {\      {\
338      rrc = ra;\      rrc = ra;\
# Line 485  BOOL caseless; Line 485  BOOL caseless;
485  int condcode;  int condcode;
486    
487  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
488  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
489  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
490  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
491    the top-level on the stack rather than malloc-ing them all gives a performance
492    boost in many cases where there is not much "recursion". */
493    
494  #ifdef NO_RECURSE  #ifdef NO_RECURSE
495  heapframe *frame = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));  heapframe frame_zero;
496  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
497  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
498    
499  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 613  int save_offset1, save_offset2, save_off Line 615  int save_offset1, save_offset2, save_off
615  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
616    
617  eptrblock newptrb;  eptrblock newptrb;
618    
619    /* There is a special fudge for calling match() in a way that causes it to
620    measure the size of its basic stack frame when the stack is being used for
621    recursion. The first argument (eptr) points to a pointer that is used
622    "statically" for doing the calculation. The second argument (ecode) being NULL
623    triggers this behaviour. It cannot normally every be NULL. The return is the
624    negated value of the frame size. */
625    
626    if (ecode == NULL)
627      {
628      char **aptr = (char **)eptr;
629      if (rdepth == 0)
630        {
631        *aptr = (char *)&rdepth;
632        return match(eptr, NULL, NULL, 0, NULL, NULL, 1);
633        }
634      else
635        {
636        int len = (char *)&rdepth - *aptr;
637        return (len > 0)? -len : len;
638        }
639      }
640  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
641    
642  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 626  the alternative names that are used. */ Line 650  the alternative names that are used. */
650  #define condassert    condition  #define condassert    condition
651  #define matched_once  prev_is_word  #define matched_once  prev_is_word
652  #define foc           number  #define foc           number
653    #define save_mark     data
654    
655  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
656  variables. */  variables. */
# Line 818  for (;;) Line 843  for (;;)
843      case OP_ONCE_NC:      case OP_ONCE_NC:
844      prev = ecode;      prev = ecode;
845      saved_eptr = eptr;      saved_eptr = eptr;
846        save_mark = md->mark;
847      do      do
848        {        {
849        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 836  for (;;) Line 862  for (;;)
862    
863        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
864        ecode += GET(ecode,1);        ecode += GET(ecode,1);
865          md->mark = save_mark;
866        }        }
867      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
868    
# Line 915  for (;;) Line 942  for (;;)
942        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
943        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
944        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
945          save_mark = md->mark;
946    
947        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
948        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 951  for (;;) Line 979  for (;;)
979          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
980          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
981          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
982            md->mark = save_mark;
983          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
984          }          }
985    
# Line 1016  for (;;) Line 1045  for (;;)
1045    
1046        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1047    
1048          save_mark = md->mark;
1049        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1050          RM2);          RM2);
1051    
1052        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1053        THEN. */        THEN. */
1054    
# Line 1045  for (;;) Line 1075  for (;;)
1075          RRETURN(rrc);          RRETURN(rrc);
1076          }          }
1077        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1078          md->mark = save_mark;
1079        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1080        }        }
1081    
# Line 1223  for (;;) Line 1254  for (;;)
1254        {        {
1255        if (PUBL(callout) != NULL)        if (PUBL(callout) != NULL)
1256          {          {
1257          pcre_callout_block cb;          PUBL(callout_block) cb;
1258          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1259          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1260          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1261    #ifdef COMPILE_PCRE8
1262          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1263    #else
1264            cb.subject          = (PCRE_SPTR16)md->start_subject;
1265    #endif
1266          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1267          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1268          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1519  for (;;) Line 1554  for (;;)
1554    
1555      case OP_ASSERT:      case OP_ASSERT:
1556      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1557        save_mark = md->mark;
1558      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1559        {        {
1560        condassert = TRUE;        condassert = TRUE;
# Line 1540  for (;;) Line 1576  for (;;)
1576    
1577        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1578        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1579          md->mark = save_mark;
1580        }        }
1581      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1582    
# Line 1563  for (;;) Line 1600  for (;;)
1600    
1601      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1602      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1603        save_mark = md->mark;
1604      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1605        {        {
1606        condassert = TRUE;        condassert = TRUE;
# Line 1573  for (;;) Line 1611  for (;;)
1611      do      do
1612        {        {
1613        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1614          md->mark = save_mark;
1615        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1616        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1617          {          {
# Line 1633  for (;;) Line 1672  for (;;)
1672      case OP_CALLOUT:      case OP_CALLOUT:
1673      if (PUBL(callout) != NULL)      if (PUBL(callout) != NULL)
1674        {        {
1675        pcre_callout_block cb;        PUBL(callout_block) cb;
1676        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1677        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1678        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1679    #ifdef COMPILE_PCRE8
1680        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1681    #else
1682          cb.subject          = (PCRE_SPTR16)md->start_subject;
1683    #endif
1684        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1685        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1686        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 3700  for (;;) Line 3743  for (;;)
3743              if (fc == d || foc == d) break;              if (fc == d || foc == d) break;
3744              eptr += len;              eptr += len;
3745              }              }
3746          if (possessive) continue;            if (possessive) continue;
3747          for(;;)            for(;;)
3748              {              {
3749              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3750              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 6139  pcre_exec(const pcre *argument_re, const Line 6182  pcre_exec(const pcre *argument_re, const
6182    int offsetcount)    int offsetcount)
6183  #else  #else
6184  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6185  pcre16_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6186    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6187    int offsetcount)    int offsetcount)
6188  #endif  #endif
# Line 6167  PCRE_PUCHAR start_partial = NULL; Line 6210  PCRE_PUCHAR start_partial = NULL;
6210  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6211    
6212  const pcre_study_data *study;  const pcre_study_data *study;
6213  const real_pcre *external_re = (const real_pcre *)argument_re;  const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6214  const real_pcre *re = external_re;  
6215    /* Check for the special magic call that measures the size of the stack used
6216    per recursive call of match(). */
6217    
6218    if (re == NULL && extra_data == NULL && subject == NULL && length == -1)
6219    #ifdef NO_RECURSE
6220      return -sizeof(heapframe);
6221    #else
6222      return match((PCRE_PUCHAR)&start_partial, NULL, NULL, 0, NULL, NULL, 0);
6223    #endif
6224    
6225  /* Plausibility checks */  /* Plausibility checks */
6226    
6227  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6228  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6229     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6230  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6231  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6232    
6233    /* Check that the first field in the block is the magic number. If it is not,
6234    return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6235    REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6236    means that the pattern is likely compiled with different endianness. */
6237    
6238    if (re->magic_number != MAGIC_NUMBER)
6239      return re->magic_number == REVERSED_MAGIC_NUMBER?
6240        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6241    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6242    
6243  /* These two settings are used in the code for checking a UTF-8 string that  /* These two settings are used in the code for checking a UTF-8 string that
6244  follows immediately afterwards. Other values in the md block are used only  follows immediately afterwards. Other values in the md block are used only
6245  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
# Line 6255  md->callout_data = NULL; Line 6317  md->callout_data = NULL;
6317    
6318  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
6319    
6320  tables = external_re->tables;  tables = re->tables;
6321    
6322  if (extra_data != NULL)  if (extra_data != NULL)
6323    {    {
# Line 6277  in other programs later. */ Line 6339  in other programs later. */
6339    
6340  if (tables == NULL) tables = PRIV(default_tables);  if (tables == NULL) tables = PRIV(default_tables);
6341    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
6342  /* Set up other data */  /* Set up other data */
6343    
6344  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
# Line 6295  firstline = (re->options & PCRE_FIRSTLIN Line 6347  firstline = (re->options & PCRE_FIRSTLIN
6347    
6348  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6349    
6350  md->start_code = (const pcre_uchar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6351    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6352    
6353  md->start_subject = (PCRE_PUCHAR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
# Line 6858  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6910  if (rc == MATCH_MATCH || rc == MATCH_ACC
6910    /* Return MARK data if requested */    /* Return MARK data if requested */
6911    
6912    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6913      *(extra_data->mark) = (unsigned char *)(md->mark);      *(extra_data->mark) = (pcre_uchar *)md->mark;
6914    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
6915    return rc;    return rc;
6916    }    }
# Line 6905  else Line 6957  else
6957  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
6958    
6959  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6960    *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);    *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
6961  return rc;  return rc;
6962  }  }
6963    

Legend:
Removed from v.842  
changed lines
  Added in v.893

  ViewVC Help
Powered by ViewVC 1.1.5