/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 836 by ph10, Wed Dec 28 17:16:11 2011 UTC revision 895 by ph10, Fri Jan 20 12:12:03 2012 UTC
# Line 332  argument of match(), which never changes Line 332  argument of match(), which never changes
332    {\    {\
333    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
334    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
335    (PUBL(stack_free))(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
336    if (frame != NULL)\    if (frame != NULL)\
337      {\      {\
338      rrc = ra;\      rrc = ra;\
# Line 468  Returns:       MATCH_MATCH if matched Line 468  Returns:       MATCH_MATCH if matched
468    
469  static int  static int
470  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,  match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
471    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,    PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
472    unsigned int rdepth)    unsigned int rdepth)
473  {  {
474  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 485  BOOL caseless; Line 485  BOOL caseless;
485  int condcode;  int condcode;
486    
487  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
488  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
489  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
490  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
491    the top-level on the stack rather than malloc-ing them all gives a performance
492    boost in many cases where there is not much "recursion". */
493    
494  #ifdef NO_RECURSE  #ifdef NO_RECURSE
495  heapframe *frame = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));  heapframe frame_zero;
496  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
497  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
498    
499  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 613  int save_offset1, save_offset2, save_off Line 615  int save_offset1, save_offset2, save_off
615  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
616    
617  eptrblock newptrb;  eptrblock newptrb;
618    
619    /* There is a special fudge for calling match() in a way that causes it to
620    measure the size of its basic stack frame when the stack is being used for
621    recursion. The second argument (ecode) being NULL triggers this behaviour. It
622    cannot normally every be NULL. The return is the negated value of the frame
623    size. */
624    
625    if (ecode == NULL)
626      {
627      if (rdepth == 0)
628        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
629      else
630        {
631        int len = (char *)&rdepth - (char *)eptr;
632        return (len > 0)? -len : len;
633        }
634      }
635  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
636    
637  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 626  the alternative names that are used. */ Line 645  the alternative names that are used. */
645  #define condassert    condition  #define condassert    condition
646  #define matched_once  prev_is_word  #define matched_once  prev_is_word
647  #define foc           number  #define foc           number
648    #define save_mark     data
649    
650  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
651  variables. */  variables. */
# Line 818  for (;;) Line 838  for (;;)
838      case OP_ONCE_NC:      case OP_ONCE_NC:
839      prev = ecode;      prev = ecode;
840      saved_eptr = eptr;      saved_eptr = eptr;
841        save_mark = md->mark;
842      do      do
843        {        {
844        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 836  for (;;) Line 857  for (;;)
857    
858        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
859        ecode += GET(ecode,1);        ecode += GET(ecode,1);
860          md->mark = save_mark;
861        }        }
862      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
863    
# Line 915  for (;;) Line 937  for (;;)
937        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
938        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
939        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
940          save_mark = md->mark;
941    
942        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
943        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 951  for (;;) Line 974  for (;;)
974          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
975          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
976          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
977            md->mark = save_mark;
978          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
979          }          }
980    
# Line 1016  for (;;) Line 1040  for (;;)
1040    
1041        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1042    
1043          save_mark = md->mark;
1044        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1045          RM2);          RM2);
1046    
1047        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1048        THEN. */        THEN. */
1049    
# Line 1045  for (;;) Line 1070  for (;;)
1070          RRETURN(rrc);          RRETURN(rrc);
1071          }          }
1072        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1073          md->mark = save_mark;
1074        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1075        }        }
1076    
# Line 1223  for (;;) Line 1249  for (;;)
1249        {        {
1250        if (PUBL(callout) != NULL)        if (PUBL(callout) != NULL)
1251          {          {
1252          pcre_callout_block cb;          PUBL(callout_block) cb;
1253          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1254          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1255          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1256    #ifdef COMPILE_PCRE8
1257          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1258    #else
1259            cb.subject          = (PCRE_SPTR16)md->start_subject;
1260    #endif
1261          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1262          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1263          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1519  for (;;) Line 1549  for (;;)
1549    
1550      case OP_ASSERT:      case OP_ASSERT:
1551      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1552        save_mark = md->mark;
1553      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1554        {        {
1555        condassert = TRUE;        condassert = TRUE;
# Line 1540  for (;;) Line 1571  for (;;)
1571    
1572        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1573        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1574          md->mark = save_mark;
1575        }        }
1576      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1577    
# Line 1563  for (;;) Line 1595  for (;;)
1595    
1596      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1597      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1598        save_mark = md->mark;
1599      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1600        {        {
1601        condassert = TRUE;        condassert = TRUE;
# Line 1573  for (;;) Line 1606  for (;;)
1606      do      do
1607        {        {
1608        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1609          md->mark = save_mark;
1610        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1611        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1612          {          {
# Line 1633  for (;;) Line 1667  for (;;)
1667      case OP_CALLOUT:      case OP_CALLOUT:
1668      if (PUBL(callout) != NULL)      if (PUBL(callout) != NULL)
1669        {        {
1670        pcre_callout_block cb;        PUBL(callout_block) cb;
1671        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1672        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1673        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1674    #ifdef COMPILE_PCRE8
1675        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1676    #else
1677          cb.subject          = (PCRE_SPTR16)md->start_subject;
1678    #endif
1679        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1680        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1681        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 2631  for (;;) Line 2669  for (;;)
2669    
2670      /* Handle repeated back references. If the length of the reference is      /* Handle repeated back references. If the length of the reference is
2671      zero, just continue with the main loop. If the length is negative, it      zero, just continue with the main loop. If the length is negative, it
2672      means the reference is unset in non-Java-compatible mode. If the minimum is      means the reference is unset in non-Java-compatible mode. If the minimum is
2673      zero, we can continue at the same level without recursion. For any other      zero, we can continue at the same level without recursion. For any other
2674      minimum, carrying on will result in NOMATCH. */      minimum, carrying on will result in NOMATCH. */
2675    
2676      if (length == 0) continue;      if (length == 0) continue;
# Line 3700  for (;;) Line 3738  for (;;)
3738              if (fc == d || foc == d) break;              if (fc == d || foc == d) break;
3739              eptr += len;              eptr += len;
3740              }              }
3741          if (possessive) continue;            if (possessive) continue;
3742          for(;;)            for(;;)
3743              {              {
3744              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3745              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 6030  switch (frame->Xwhere) Line 6068  switch (frame->Xwhere)
6068    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6069    LBL(65) LBL(66)    LBL(65) LBL(66)
6070  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6071    LBL(21)    LBL(21)
6072  #endif  #endif
6073  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
6074    LBL(16) LBL(18) LBL(20)    LBL(16) LBL(18) LBL(20)
6075    LBL(22) LBL(23) LBL(28) LBL(30)    LBL(22) LBL(23) LBL(28) LBL(30)
6076    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6077  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 6043  switch (frame->Xwhere) Line 6081  switch (frame->Xwhere)
6081  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6082    default:    default:
6083    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6084    
6085  printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);  printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6086    
6087    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 6139  pcre_exec(const pcre *argument_re, const Line 6177  pcre_exec(const pcre *argument_re, const
6177    int offsetcount)    int offsetcount)
6178  #else  #else
6179  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6180  pcre16_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6181    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6182    int offsetcount)    int offsetcount)
6183  #endif  #endif
# Line 6167  PCRE_PUCHAR start_partial = NULL; Line 6205  PCRE_PUCHAR start_partial = NULL;
6205  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6206    
6207  const pcre_study_data *study;  const pcre_study_data *study;
6208  const real_pcre *external_re = (const real_pcre *)argument_re;  const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6209  const real_pcre *re = external_re;  
6210    /* Check for the special magic call that measures the size of the stack used
6211    per recursive call of match(). */
6212    
6213    if (re == NULL && extra_data == NULL && subject == NULL && length == -1)
6214    #ifdef NO_RECURSE
6215      return -sizeof(heapframe);
6216    #else
6217      return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6218    #endif
6219    
6220  /* Plausibility checks */  /* Plausibility checks */
6221    
6222  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6223  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6224     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6225  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6226  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6227    
6228    /* Check that the first field in the block is the magic number. If it is not,
6229    return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6230    REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6231    means that the pattern is likely compiled with different endianness. */
6232    
6233    if (re->magic_number != MAGIC_NUMBER)
6234      return re->magic_number == REVERSED_MAGIC_NUMBER?
6235        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6236    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6237    
6238  /* These two settings are used in the code for checking a UTF-8 string that  /* These two settings are used in the code for checking a UTF-8 string that
6239  follows immediately afterwards. Other values in the md block are used only  follows immediately afterwards. Other values in the md block are used only
6240  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
# Line 6209  if (utf && (options & PCRE_NO_UTF8_CHECK Line 6266  if (utf && (options & PCRE_NO_UTF8_CHECK
6266  #else  #else
6267      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?      return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6268        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6269  #endif  #endif
6270      }      }
6271    
6272    /* Check that a start_offset points to the start of a UTF character. */    /* Check that a start_offset points to the start of a UTF character. */
# Line 6255  md->callout_data = NULL; Line 6312  md->callout_data = NULL;
6312    
6313  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
6314    
6315  tables = external_re->tables;  tables = re->tables;
6316    
6317  if (extra_data != NULL)  if (extra_data != NULL)
6318    {    {
# Line 6277  in other programs later. */ Line 6334  in other programs later. */
6334    
6335  if (tables == NULL) tables = PRIV(default_tables);  if (tables == NULL) tables = PRIV(default_tables);
6336    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
6337  /* Set up other data */  /* Set up other data */
6338    
6339  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
# Line 6295  firstline = (re->options & PCRE_FIRSTLIN Line 6342  firstline = (re->options & PCRE_FIRSTLIN
6342    
6343  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6344    
6345  md->start_code = (const pcre_uchar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6346    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6347    
6348  md->start_subject = (PCRE_PUCHAR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
# Line 6765  for(;;) Line 6812  for(;;)
6812    
6813    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
6814    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
6815    or ANY or ANYCRLF, advance the match position by one more character. In    or ANY or ANYCRLF, advance the match position by one more character. In
6816    normal matching start_match will aways be greater than the first position at    normal matching start_match will aways be greater than the first position at
6817    this stage, but a failed *SKIP can cause a return at the same point, which is    this stage, but a failed *SKIP can cause a return at the same point, which is
6818    why the first test exists. */    why the first test exists. */
6819    
6820    if (start_match > (PCRE_PUCHAR)subject + start_offset &&    if (start_match > (PCRE_PUCHAR)subject + start_offset &&
# Line 6858  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6905  if (rc == MATCH_MATCH || rc == MATCH_ACC
6905    /* Return MARK data if requested */    /* Return MARK data if requested */
6906    
6907    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)    if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6908      *(extra_data->mark) = (unsigned char *)(md->mark);      *(extra_data->mark) = (pcre_uchar *)md->mark;
6909    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
6910    return rc;    return rc;
6911    }    }
# Line 6905  else Line 6952  else
6952  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
6953    
6954  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6955    *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);    *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
6956  return rc;  return rc;
6957  }  }
6958    

Legend:
Removed from v.836  
changed lines
  Added in v.895

  ViewVC Help
Powered by ViewVC 1.1.5