/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 850 by zherczeg, Wed Jan 4 17:29:11 2012 UTC revision 904 by ph10, Mon Jan 23 17:30:49 2012 UTC
# Line 332  argument of match(), which never changes Line 332  argument of match(), which never changes
332    {\    {\
333    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
334    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
335    (PUBL(stack_free))(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
336    if (frame != NULL)\    if (frame != NULL)\
337      {\      {\
338      rrc = ra;\      rrc = ra;\
# Line 485  BOOL caseless; Line 485  BOOL caseless;
485  int condcode;  int condcode;
486    
487  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
488  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
489  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
490  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
491    the top-level on the stack rather than malloc-ing them all gives a performance
492    boost in many cases where there is not much "recursion". */
493    
494  #ifdef NO_RECURSE  #ifdef NO_RECURSE
495  heapframe *frame = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));  heapframe frame_zero;
496  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
497  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
498    
499  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 613  int save_offset1, save_offset2, save_off Line 615  int save_offset1, save_offset2, save_off
615  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
616    
617  eptrblock newptrb;  eptrblock newptrb;
618    
619    /* There is a special fudge for calling match() in a way that causes it to
620    measure the size of its basic stack frame when the stack is being used for
621    recursion. The second argument (ecode) being NULL triggers this behaviour. It
622    cannot normally ever be NULL. The return is the negated value of the frame
623    size. */
624    
625    if (ecode == NULL)
626      {
627      if (rdepth == 0)
628        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
629      else
630        {
631        int len = (char *)&rdepth - (char *)eptr;
632        return (len > 0)? -len : len;
633        }
634      }
635  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
636    
637  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 626  the alternative names that are used. */ Line 645  the alternative names that are used. */
645  #define condassert    condition  #define condassert    condition
646  #define matched_once  prev_is_word  #define matched_once  prev_is_word
647  #define foc           number  #define foc           number
648    #define save_mark     data
649    
650  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
651  variables. */  variables. */
# Line 818  for (;;) Line 838  for (;;)
838      case OP_ONCE_NC:      case OP_ONCE_NC:
839      prev = ecode;      prev = ecode;
840      saved_eptr = eptr;      saved_eptr = eptr;
841        save_mark = md->mark;
842      do      do
843        {        {
844        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 836  for (;;) Line 857  for (;;)
857    
858        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
859        ecode += GET(ecode,1);        ecode += GET(ecode,1);
860          md->mark = save_mark;
861        }        }
862      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
863    
# Line 915  for (;;) Line 937  for (;;)
937        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
938        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
939        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
940          save_mark = md->mark;
941    
942        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
943        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 951  for (;;) Line 974  for (;;)
974          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
975          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
976          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
977            md->mark = save_mark;
978          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
979          }          }
980    
# Line 1016  for (;;) Line 1040  for (;;)
1040    
1041        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1042    
1043          save_mark = md->mark;
1044        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1045          RM2);          RM2);
1046    
# Line 1045  for (;;) Line 1070  for (;;)
1070          RRETURN(rrc);          RRETURN(rrc);
1071          }          }
1072        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1073          md->mark = save_mark;
1074        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1075        }        }
1076    
# Line 1227  for (;;) Line 1253  for (;;)
1253          cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
1254          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
1255          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
1256    #ifdef COMPILE_PCRE8
1257          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
1258    #else
1259            cb.subject          = (PCRE_SPTR16)md->start_subject;
1260    #endif
1261          cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
1262          cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
1263          cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
# Line 1519  for (;;) Line 1549  for (;;)
1549    
1550      case OP_ASSERT:      case OP_ASSERT:
1551      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1552        save_mark = md->mark;
1553      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1554        {        {
1555        condassert = TRUE;        condassert = TRUE;
# Line 1540  for (;;) Line 1571  for (;;)
1571    
1572        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1573        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1574          md->mark = save_mark;
1575        }        }
1576      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1577    
# Line 1563  for (;;) Line 1595  for (;;)
1595    
1596      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1597      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1598        save_mark = md->mark;
1599      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1600        {        {
1601        condassert = TRUE;        condassert = TRUE;
# Line 1573  for (;;) Line 1606  for (;;)
1606      do      do
1607        {        {
1608        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1609          md->mark = save_mark;
1610        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1611        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1612          {          {
# Line 1637  for (;;) Line 1671  for (;;)
1671        cb.version          = 2;   /* Version 1 of the callout block */        cb.version          = 2;   /* Version 1 of the callout block */
1672        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1673        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1674    #ifdef COMPILE_PCRE8
1675        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1676    #else
1677          cb.subject          = (PCRE_SPTR16)md->start_subject;
1678    #endif
1679        cb.subject_length   = (int)(md->end_subject - md->start_subject);        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1680        cb.start_match      = (int)(mstart - md->start_subject);        cb.start_match      = (int)(mstart - md->start_subject);
1681        cb.current_position = (int)(eptr - md->start_subject);        cb.current_position = (int)(eptr - md->start_subject);
# Line 3470  for (;;) Line 3508  for (;;)
3508      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3509      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3510        {        {
3511        register int ch, och;        register unsigned int ch, och;
3512        ch = *ecode++;        ch = *ecode++;
3513  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3514        /* ch must be < 128 if UTF is enabled. */        /* ch must be < 128 if UTF is enabled. */
# Line 3616  for (;;) Line 3654  for (;;)
3654              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3655              }              }
3656            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3657            if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);            if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
3658            }            }
3659          }          }
3660        else        else
# Line 3654  for (;;) Line 3692  for (;;)
3692                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3693                }                }
3694              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3695              if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3696              }              }
3697            }            }
3698          else          else
# Line 3697  for (;;) Line 3735  for (;;)
3735                break;                break;
3736                }                }
3737              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3738              if (fc == d || foc == d) break;              if (fc == d || (unsigned int)foc == d) break;
3739              eptr += len;              eptr += len;
3740              }              }
3741          if (possessive) continue;            if (possessive) continue;
3742          for(;;)            for(;;)
3743              {              {
3744              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3745              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 6139  pcre_exec(const pcre *argument_re, const Line 6177  pcre_exec(const pcre *argument_re, const
6177    int offsetcount)    int offsetcount)
6178  #else  #else
6179  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6180  pcre16_exec(const pcre *argument_re, const pcre16_extra *extra_data,  pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6181    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6182    int offsetcount)    int offsetcount)
6183  #endif  #endif
# Line 6167  PCRE_PUCHAR start_partial = NULL; Line 6205  PCRE_PUCHAR start_partial = NULL;
6205  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6206    
6207  const pcre_study_data *study;  const pcre_study_data *study;
6208  const real_pcre *external_re = (const real_pcre *)argument_re;  const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6209  const real_pcre *re = external_re;  
6210    /* Check for the special magic call that measures the size of the stack used
6211    per recursive call of match(). */
6212    
6213    if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6214        start_offset == -999)
6215    #ifdef NO_RECURSE
6216      return -sizeof(heapframe);
6217    #else
6218      return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6219    #endif
6220    
6221  /* Plausibility checks */  /* Plausibility checks */
6222    
6223  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6224  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6225     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6226  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6227  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6228    
6229    /* Check that the first field in the block is the magic number. If it is not,
6230    return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6231    REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6232    means that the pattern is likely compiled with different endianness. */
6233    
6234    if (re->magic_number != MAGIC_NUMBER)
6235      return re->magic_number == REVERSED_MAGIC_NUMBER?
6236        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6237    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6238    
6239  /* These two settings are used in the code for checking a UTF-8 string that  /* These two settings are used in the code for checking a UTF-8 string that
6240  follows immediately afterwards. Other values in the md block are used only  follows immediately afterwards. Other values in the md block are used only
6241  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
# Line 6255  md->callout_data = NULL; Line 6313  md->callout_data = NULL;
6313    
6314  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
6315    
6316  tables = external_re->tables;  tables = re->tables;
6317    
6318  if (extra_data != NULL)  if (extra_data != NULL)
6319    {    {
# Line 6277  in other programs later. */ Line 6335  in other programs later. */
6335    
6336  if (tables == NULL) tables = PRIV(default_tables);  if (tables == NULL) tables = PRIV(default_tables);
6337    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
6338  /* Set up other data */  /* Set up other data */
6339    
6340  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
# Line 6295  firstline = (re->options & PCRE_FIRSTLIN Line 6343  firstline = (re->options & PCRE_FIRSTLIN
6343    
6344  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
6345    
6346  md->start_code = (const pcre_uchar *)external_re + re->name_table_offset +  md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6347    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
6348    
6349  md->start_subject = (PCRE_PUCHAR)subject;  md->start_subject = (PCRE_PUCHAR)subject;
# Line 6448  if (!anchored) Line 6496  if (!anchored)
6496    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6497      {      {
6498      has_first_char = TRUE;      has_first_char = TRUE;
6499      first_char = first_char2 = re->first_char;      first_char = first_char2 = (pcre_uchar)(re->first_char);
6500      if ((re->flags & PCRE_FCH_CASELESS) != 0)      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6501        {        {
6502        first_char2 = TABLE_GET(first_char, md->fcc, first_char);        first_char2 = TABLE_GET(first_char, md->fcc, first_char);
# Line 6470  character" set. */ Line 6518  character" set. */
6518  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6519    {    {
6520    has_req_char = TRUE;    has_req_char = TRUE;
6521    req_char = req_char2 = re->req_char;    req_char = req_char2 = (pcre_uchar)(re->req_char);
6522    if ((re->flags & PCRE_RCH_CASELESS) != 0)    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6523      {      {
6524      req_char2 = TABLE_GET(req_char, md->fcc, req_char);      req_char2 = TABLE_GET(req_char, md->fcc, req_char);

Legend:
Removed from v.850  
changed lines
  Added in v.904

  ViewVC Help
Powered by ViewVC 1.1.5