message deconstruct under busy env isn't a very short period, because cpu may
stuck into tcmalloc library to do gc.
from a rough bench(3k iops), reduce send_message latency from 54us to 37.5us. in
real env(higher iops), it should be much more better.
Signed-off-by: Haomai Wang <haomai@xsky.com>
{
ldout(async_msgr->cct, 15) << __func__ << " got ack seq " << seq << dendl;
// trim sent list
- std::lock_guard<std::mutex> l(write_lock);
- while (!sent.empty() && sent.front()->get_seq() <= seq) {
+ static const int max_pending = 128;
+ int i = 0;
+ Message *pending[max_pending];
+ write_lock.lock();
+ while (!sent.empty() && sent.front()->get_seq() <= seq && i < max_pending) {
Message* m = sent.front();
sent.pop_front();
+ pending[i++] = m;
ldout(async_msgr->cct, 10) << __func__ << " got ack seq "
<< seq << " >= " << m->get_seq() << " on "
<< m << " " << *m << dendl;
- m->put();
}
+ write_lock.unlock();
+ for (int k = 0; k < i; k++)
+ pending[k]->put();
}
void AsyncConnection::DelayedDelivery::do_request(uint64_t id)