From: Yan, Zheng Date: Wed, 15 Aug 2018 07:14:03 +0000 (+0800) Subject: mds: hold slave request refernce when dumping MDRequestImpl X-Git-Tag: v13.2.2~13^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f052d85443812c1701b3b47389bb2a7352ddde97;p=ceph.git mds: hold slave request refernce when dumping MDRequestImpl dump_ops_in_flight asok command dumps MDRequestImpl without holding mds_lock. MDS may free MDRequestImpl::slave_request in the middle of dumping MDRequestImpl. Fixes: http://tracker.ceph.com/issues/26894 Signed-off-by: "Yan, Zheng" (cherry picked from commit 1013374863188172f4839fb90ce97bb2e61be2e1) Conflicts: src/mds/MDCache.cc src/mds/Mutation.cc src/mds/Mutation.h src/mds/Server.cc - implement MClientRequest and MMDSSlaveRequest using boost::intrusive_ptr because there is no MFoo::const_ref in mimic (modeled after luminous backport f91e914ac71af95a8790f0874a45711cc64cbb6e) --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 1e291b3a106d..352c473307bb 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2996,10 +2996,7 @@ void MDCache::handle_mds_failure(mds_rank_t who) if (!mdr->more()->waiting_on_slave.empty()) { assert(mdr->more()->srcdn_auth_mds == mds->get_nodeid()); // will rollback, no need to wait - if (mdr->slave_request) { - mdr->slave_request->put(); - mdr->slave_request = 0; - } + mdr->reset_slave_request(); mdr->more()->waiting_on_slave.clear(); } } else if (!mdr->committing) { @@ -9350,8 +9347,7 @@ void MDCache::request_forward(MDRequestRef& mdr, mds_rank_t who, int port) if (mdr->client_request && mdr->client_request->get_source().is_client()) { dout(7) << "request_forward " << *mdr << " to mds." << who << " req " << *mdr->client_request << dendl; - mds->forward_message_mds(mdr->client_request, who); - mdr->client_request = 0; + mds->forward_message_mds(mdr->release_client_request(), who); if (mds->logger) mds->logger->inc(l_mds_forward); } else if (mdr->internal_op >= 0) { dout(10) << "request_forward on internal op; cancelling" << dendl; diff --git a/src/mds/Mutation.cc b/src/mds/Mutation.cc index ba0790ed42d6..2e83f8e7086a 100644 --- a/src/mds/Mutation.cc +++ b/src/mds/Mutation.cc @@ -318,6 +318,25 @@ bool MDRequestImpl::is_queued_for_replay() const return client_request ? client_request->is_queued_for_replay() : false; } +MClientRequest* MDRequestImpl::release_client_request() +{ + msg_lock.lock(); + auto req = client_request; + client_request = nullptr; + msg_lock.unlock(); + return req; +} + +void MDRequestImpl::reset_slave_request(MMDSSlaveRequest *req) +{ + msg_lock.lock(); + auto old = slave_request; + slave_request = req; + msg_lock.unlock(); + if (old) + old->put(); +} + void MDRequestImpl::print(ostream &out) const { out << "request(" << reqid; @@ -338,31 +357,36 @@ void MDRequestImpl::_dump(Formatter *f) const f->dump_string("flag_point", state_string()); f->dump_stream("reqid") << reqid; { - if (client_request) { + msg_lock.lock(); + boost::intrusive_ptr _client_request(client_request); + boost::intrusive_ptr _slave_request(slave_request); + msg_lock.unlock(); + + if (_client_request) { f->dump_string("op_type", "client_request"); f->open_object_section("client_info"); - f->dump_stream("client") << client_request->get_orig_source(); - f->dump_int("tid", client_request->get_tid()); + f->dump_stream("client") << _client_request->get_orig_source(); + f->dump_int("tid", _client_request->get_tid()); f->close_section(); // client_info - } else if (is_slave() && slave_request) { // replies go to an existing mdr + } else if (is_slave() && _slave_request) { // replies go to an existing mdr f->dump_string("op_type", "slave_request"); f->open_object_section("master_info"); - f->dump_stream("master") << slave_request->get_orig_source(); + f->dump_stream("master") << _slave_request->get_orig_source(); f->close_section(); // master_info f->open_object_section("request_info"); - f->dump_int("attempt", slave_request->get_attempt()); + f->dump_int("attempt", _slave_request->get_attempt()); f->dump_string("op_type", - slave_request->get_opname(slave_request->get_op())); - f->dump_int("lock_type", slave_request->get_lock_type()); - f->dump_stream("object_info") << slave_request->get_object_info(); - f->dump_stream("srcdnpath") << slave_request->srcdnpath; - f->dump_stream("destdnpath") << slave_request->destdnpath; - f->dump_stream("witnesses") << slave_request->witnesses; + MMDSSlaveRequest::get_opname(_slave_request->get_op())); + f->dump_int("lock_type", _slave_request->get_lock_type()); + f->dump_stream("object_info") << _slave_request->get_object_info(); + f->dump_stream("srcdnpath") << _slave_request->srcdnpath; + f->dump_stream("destdnpath") << _slave_request->destdnpath; + f->dump_stream("witnesses") << _slave_request->witnesses; f->dump_bool("has_inode_export", - slave_request->inode_export.length() != 0); - f->dump_int("inode_export_v", slave_request->inode_export_v); - f->dump_stream("op_stamp") << slave_request->op_stamp; + _slave_request->inode_export_v != 0); + f->dump_int("inode_export_v", _slave_request->inode_export_v); + f->dump_stream("op_stamp") << _slave_request->op_stamp; f->close_section(); // request_info } else if (internal_op != -1) { // internal request @@ -386,10 +410,15 @@ void MDRequestImpl::_dump(Formatter *f) const void MDRequestImpl::_dump_op_descriptor_unlocked(ostream& stream) const { - if (client_request) { - client_request->print(stream); - } else if (slave_request) { - slave_request->print(stream); + msg_lock.lock(); + boost::intrusive_ptr _client_request(client_request); + boost::intrusive_ptr _slave_request(slave_request); + msg_lock.unlock(); + + if (_client_request) { + _client_request->print(stream); + } else if (_slave_request) { + _slave_request->print(stream); } else if (internal_op >= 0) { stream << "internal op " << ceph_mds_op_name(internal_op) << ":" << reqid; } else { diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index 128867f09473..c1deb23197eb 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -347,11 +347,16 @@ struct MDRequestImpl : public MutationImpl { void print(ostream &out) const override; void dump(Formatter *f) const override; + MClientRequest* release_client_request(); + void reset_slave_request(MMDSSlaveRequest *req=nullptr); + // TrackedOp stuff typedef boost::intrusive_ptr Ref; protected: void _dump(Formatter *f) const override; void _dump_op_descriptor_unlocked(ostream& stream) const override; +private: + mutable ceph::spinlock msg_lock; }; typedef boost::intrusive_ptr MDRequestRef; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index ceabf8f9b44e..d52e85b5e758 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2038,7 +2038,7 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) return; } - mdr->slave_request = m; + mdr->reset_slave_request(m); dispatch_slave_request(mdr); } @@ -2203,8 +2203,7 @@ void Server::dispatch_slave_request(MDRequestRef& mdr) } // done. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); } break; @@ -2227,15 +2226,13 @@ void Server::dispatch_slave_request(MDRequestRef& mdr) mds->locker->issue_caps(static_cast(lock->get_parent())); // done. no ack necessary. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); } break; case MMDSSlaveRequest::OP_DROPLOCKS: mds->locker->drop_locks(mdr.get()); - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); break; case MMDSSlaveRequest::OP_AUTHPIN: @@ -2386,8 +2383,7 @@ void Server::handle_slave_auth_pin(MDRequestRef& mdr) mds->send_message_mds(reply, mdr->slave_to_mds); // clean up this request - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); return; } @@ -5862,8 +5858,7 @@ void Server::_logged_slave_link(MDRequestRef& mdr, CInode *targeti, bool adjust_ mds->balancer->hit_inode(now, targeti, META_POP_IWR); // done. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); if (adjust_realm) { int op = CEPH_SNAP_OP_SPLIT; @@ -6578,8 +6573,7 @@ void Server::_logged_slave_rmdir(MDRequestRef& mdr, CDentry *dn, CDentry *strayd mdcache->do_realm_invalidate_and_update_notify(in, CEPH_SNAP_OP_SPLIT, false); // done. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); mdr->straydn = 0; if (!mdr->aborted) { @@ -8025,8 +8019,7 @@ void Server::handle_slave_rename_prep(MDRequestRef& mdr) MMDSSlaveRequest *reply= new MMDSSlaveRequest(mdr->reqid, mdr->attempt, MMDSSlaveRequest::OP_RENAMEPREPACK); reply->mark_interrupted(); mds->send_message_mds(reply, mdr->slave_to_mds); - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); return; } @@ -8160,8 +8153,7 @@ void Server::handle_slave_rename_prep(MDRequestRef& mdr) MMDSSlaveRequest::OP_RENAMEPREPACK); reply->witnesses.swap(srcdnrep); mds->send_message_mds(reply, mdr->slave_to_mds); - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); return; } dout(10) << " witness list sufficient: includes all srcdn replicas" << dendl; @@ -8315,8 +8307,7 @@ void Server::_logged_slave_rename(MDRequestRef& mdr, mds->balancer->hit_inode(now, destdnl->get_inode(), META_POP_IWR); // done. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); mdr->straydn = 0; if (reply) {