# Diff of /code/trunk/pcre_exec.c

revision 667 by ph10, Mon Aug 22 14:57:32 2011 UTC revision 702 by ph10, Tue Sep 20 15:45:06 2011 UTC
# Line 870  for (;;) Line 870  for (;;)
870      /* VVVVVVVVVVVVVVVVVVVVVVVVV */      /* VVVVVVVVVVVVVVVVVVVVVVVVV */
871
872      /* Non-capturing or atomic group, except for possessive with unlimited      /* Non-capturing or atomic group, except for possessive with unlimited
873      repeat. Loop for all the alternatives. When we get to the final alternative      repeat. Loop for all the alternatives.
874      within the brackets, we used to return the result of a recursive call to
875      match() whatever happened so it was possible to reduce stack usage by      When we get to the final alternative within the brackets, we used to return
876      turning this into a tail recursion, except in the case of a possibly empty      the result of a recursive call to match() whatever happened so it was
877      group. However, now that there is the possiblity of (*THEN) occurring in      possible to reduce stack usage by turning this into a tail recursion,
878      the final alternative, this optimization is no longer possible.      except in the case of a possibly empty group. However, now that there is
879        the possiblity of (*THEN) occurring in the final alternative, this
880        optimization is no longer always possible.
881
882        We can optimize if we know there are no (*THEN)s in the pattern; at present
883        this is the best that can be done.
884
885      MATCH_ONCE is returned when the end of an atomic group is successfully      MATCH_ONCE is returned when the end of an atomic group is successfully
886      reached, but subsequent matching fails. It passes back up the tree (causing      reached, but subsequent matching fails. It passes back up the tree (causing
# Line 892  for (;;) Line 897  for (;;)
897      for (;;)      for (;;)
898        {        {
899        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
900        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
901          /* If this is not a possibly empty group, and there are no (*THEN)s in
902          the pattern, and this is the final alternative, optimize as described
903          above. */
904
905          else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
906            {
907            ecode += _pcre_OP_lengths[*ecode];
908            goto TAIL_RECURSE;
909            }
910
911          /* In all other cases, we have to make another call to match(). */
912
913          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
914          RM2);          RM2);
915        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
916            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
# Line 1264  for (;;) Line 1282  for (;;)
1282        }        }
1283
1284      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1285      we used to use tail recursion to avoid using another stack frame, except      we used always to use tail recursion to avoid using another stack frame,
1286      when there was unlimited repeat of a possibly empty group. However, that      except when there was unlimited repeat of a possibly empty group. However,
1287      strategy no longer works because of the possibilty of (*THEN) being      that strategy no longer works because of the possibilty of (*THEN) being
1288      encountered in the branch. A recursive call to match() is always required,      encountered in the branch. However, we can still use tail recursion if
1289      unless the second alternative doesn't exist, in which case we can just      there are no (*THEN)s in the pattern. Otherwise, a recursive call to
1290      plough on. */      match() is always required, unless the second alternative doesn't exist, in
1291        which case we can just plough on. */
1292
1293      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1294        {        {
1295        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;
1296          else if (!md->hasthen)
1297            {
1298            ecode += 1 + LINK_SIZE;
1299            goto TAIL_RECURSE;
1300            }
1301
1302          /* A call to match() is required. */
1303
1304        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1305        if (rrc == MATCH_THEN && md->start_match_ptr == ecode)
1306          rrc = MATCH_NOMATCH;        /* If the result is THEN from within the "true" branch of the condition,
1307          md->start_match_ptr will point to the original OP_COND, not to the start
1308          of the branch, so we have do work to see if it matches. If THEN comes
1309          from the "false" branch, md->start_match_ptr does point to OP_ALT. */
1310
1311          if (rrc == MATCH_THEN)
1312            {
1313            if (*ecode != OP_ALT)
1314              {
1315              do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1316              ecode -= GET(ecode, 1);
1317              }
1318            if (md->start_match_ptr == ecode) rrc = MATCH_NOMATCH;
1319            }
1320        RRETURN(rrc);        RRETURN(rrc);
1321        }        }
1322      else                         /* Condition false & no alternative */
1323         /* Condition false & no alternative; continue after the group. */
1324
1325        else
1326        {        {
1328        }        }
# Line 1556  for (;;) Line 1599  for (;;)
1599            md, eptrb, RM6);            md, eptrb, RM6);
1600          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1601              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1602            md->recursive = new_recursive.prevrec;
1603          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1604            {            {
1605            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
md->recursive = new_recursive.prevrec;
1606            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1607              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1608
# Line 5844  if (extra_data != NULL Line 5887  if (extra_data != NULL
5887      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
5888      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
5889                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
5890    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,    return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
5891      start_offset, options, offsets, offsetcount);      start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
5892        ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
5893  #endif  #endif
5894
5895  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
# Line 5931  md->hitend = FALSE; Line 5975  md->hitend = FALSE;
5975  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
5976
5977  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
5978    md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
5979
5980  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
5981  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
# Line 6402  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6447  if (rc == MATCH_MATCH || rc == MATCH_ACC
6447
6448    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
6449    too many to fit into the vector. */    too many to fit into the vector. */
6450
6451    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6452      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
6453

Legend:
 Removed from v.667 changed lines Added in v.702