From: Yan, Zheng Date: Wed, 15 Aug 2018 07:14:03 +0000 (+0800) Subject: mds: hold slave request refernce when dumping MDRequestImpl X-Git-Tag: v12.2.9~103^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f91e914ac71af95a8790f0874a45711cc64cbb6e;p=ceph.git mds: hold slave request refernce when dumping MDRequestImpl dump_ops_in_flight asok command dumps MDRequestImpl without holding mds_lock. MDS may free MDRequestImpl::slave_request in the middle of dumping MDRequestImpl. Fixes: http://tracker.ceph.com/issues/26894 Signed-off-by: "Yan, Zheng" (cherry picked from commit 1013374863188172f4839fb90ce97bb2e61be2e1) Conflicts: src/mds/Mutation.h src/mds/Mutation.cc --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index a79f7aea250..3dee97ba910 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2925,10 +2925,7 @@ void MDCache::handle_mds_failure(mds_rank_t who) if (!mdr->more()->waiting_on_slave.empty()) { assert(mdr->more()->srcdn_auth_mds == mds->get_nodeid()); // will rollback, no need to wait - if (mdr->slave_request) { - mdr->slave_request->put(); - mdr->slave_request = 0; - } + mdr->reset_slave_request(); mdr->more()->waiting_on_slave.clear(); } } else if (!mdr->committing) { @@ -9135,8 +9132,7 @@ void MDCache::request_forward(MDRequestRef& mdr, mds_rank_t who, int port) if (mdr->client_request && mdr->client_request->get_source().is_client()) { dout(7) << "request_forward " << *mdr << " to mds." << who << " req " << *mdr->client_request << dendl; - mds->forward_message_mds(mdr->client_request, who); - mdr->client_request = 0; + mds->forward_message_mds(mdr->release_client_request(), who); if (mds->logger) mds->logger->inc(l_mds_forward); } else if (mdr->internal_op >= 0) { dout(10) << "request_forward on internal op; cancelling" << dendl; diff --git a/src/mds/Mutation.cc b/src/mds/Mutation.cc index 52c0ca2339d..2e83f8e7086 100644 --- a/src/mds/Mutation.cc +++ b/src/mds/Mutation.cc @@ -318,6 +318,25 @@ bool MDRequestImpl::is_queued_for_replay() const return client_request ? client_request->is_queued_for_replay() : false; } +MClientRequest* MDRequestImpl::release_client_request() +{ + msg_lock.lock(); + auto req = client_request; + client_request = nullptr; + msg_lock.unlock(); + return req; +} + +void MDRequestImpl::reset_slave_request(MMDSSlaveRequest *req) +{ + msg_lock.lock(); + auto old = slave_request; + slave_request = req; + msg_lock.unlock(); + if (old) + old->put(); +} + void MDRequestImpl::print(ostream &out) const { out << "request(" << reqid; @@ -338,33 +357,36 @@ void MDRequestImpl::_dump(Formatter *f) const f->dump_string("flag_point", state_string()); f->dump_stream("reqid") << reqid; { - if (client_request) { + msg_lock.lock(); + boost::intrusive_ptr _client_request(client_request); + boost::intrusive_ptr _slave_request(slave_request); + msg_lock.unlock(); + + if (_client_request) { f->dump_string("op_type", "client_request"); f->open_object_section("client_info"); - f->dump_stream("client") << client_request->get_orig_source(); - f->dump_int("tid", client_request->get_tid()); + f->dump_stream("client") << _client_request->get_orig_source(); + f->dump_int("tid", _client_request->get_tid()); f->close_section(); // client_info - } else if (is_slave() && slave_request) { // replies go to an existing mdr + } else if (is_slave() && _slave_request) { // replies go to an existing mdr f->dump_string("op_type", "slave_request"); f->open_object_section("master_info"); - f->dump_stream("master") << slave_request->get_orig_source(); + f->dump_stream("master") << _slave_request->get_orig_source(); f->close_section(); // master_info f->open_object_section("request_info"); - f->dump_int("attempt", slave_request->get_attempt()); + f->dump_int("attempt", _slave_request->get_attempt()); f->dump_string("op_type", - slave_request->get_opname(slave_request->get_op())); - f->dump_int("lock_type", slave_request->get_lock_type()); - f->dump_stream("object_info") << slave_request->get_object_info(); - f->dump_stream("srcdnpath") << slave_request->srcdnpath; - f->dump_stream("destdnpath") << slave_request->destdnpath; - f->dump_stream("witnesses") << slave_request->witnesses; + MMDSSlaveRequest::get_opname(_slave_request->get_op())); + f->dump_int("lock_type", _slave_request->get_lock_type()); + f->dump_stream("object_info") << _slave_request->get_object_info(); + f->dump_stream("srcdnpath") << _slave_request->srcdnpath; + f->dump_stream("destdnpath") << _slave_request->destdnpath; + f->dump_stream("witnesses") << _slave_request->witnesses; f->dump_bool("has_inode_export", - slave_request->inode_export.length() != 0); - f->dump_int("inode_export_v", slave_request->inode_export_v); - f->dump_bool("has_srci_replica", - slave_request->srci_replica.length() != 0); - f->dump_stream("op_stamp") << slave_request->op_stamp; + _slave_request->inode_export_v != 0); + f->dump_int("inode_export_v", _slave_request->inode_export_v); + f->dump_stream("op_stamp") << _slave_request->op_stamp; f->close_section(); // request_info } else if (internal_op != -1) { // internal request @@ -388,10 +410,15 @@ void MDRequestImpl::_dump(Formatter *f) const void MDRequestImpl::_dump_op_descriptor_unlocked(ostream& stream) const { - if (client_request) { - client_request->print(stream); - } else if (slave_request) { - slave_request->print(stream); + msg_lock.lock(); + boost::intrusive_ptr _client_request(client_request); + boost::intrusive_ptr _slave_request(slave_request); + msg_lock.unlock(); + + if (_client_request) { + _client_request->print(stream); + } else if (_slave_request) { + _slave_request->print(stream); } else if (internal_op >= 0) { stream << "internal op " << ceph_mds_op_name(internal_op) << ":" << reqid; } else { diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index c9ffcfe9859..0bd4106e26e 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -335,11 +335,26 @@ struct MDRequestImpl : public MutationImpl { void print(ostream &out) const override; void dump(Formatter *f) const override; + MClientRequest* release_client_request(); + void reset_slave_request(MMDSSlaveRequest *req=nullptr); + // TrackedOp stuff typedef boost::intrusive_ptr Ref; protected: void _dump(Formatter *f) const override; void _dump_op_descriptor_unlocked(ostream& stream) const override; +private: + class { + std::atomic_flag _lock = ATOMIC_FLAG_INIT; + public: + void lock() { + while(_lock.test_and_set(std::memory_order_acquire)) + ; + } + void unlock() { + _lock.clear(std::memory_order_release); + } + } mutable msg_lock; }; typedef boost::intrusive_ptr MDRequestRef; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index fd48bea1918..e04803ff661 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2012,7 +2012,7 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) return; } - mdr->slave_request = m; + mdr->reset_slave_request(m); dispatch_slave_request(mdr); } @@ -2174,8 +2174,7 @@ void Server::dispatch_slave_request(MDRequestRef& mdr) } // done. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); } break; @@ -2198,15 +2197,13 @@ void Server::dispatch_slave_request(MDRequestRef& mdr) mds->locker->issue_caps(static_cast(lock->get_parent())); // done. no ack necessary. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); } break; case MMDSSlaveRequest::OP_DROPLOCKS: mds->locker->drop_locks(mdr.get()); - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); break; case MMDSSlaveRequest::OP_AUTHPIN: @@ -2357,8 +2354,7 @@ void Server::handle_slave_auth_pin(MDRequestRef& mdr) mds->send_message_mds(reply, mdr->slave_to_mds); // clean up this request - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); return; } @@ -5663,8 +5659,7 @@ void Server::_logged_slave_link(MDRequestRef& mdr, CInode *targeti) mds->balancer->hit_inode(now, targeti, META_POP_IWR); // done. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); // ack if (!mdr->aborted) { @@ -6291,8 +6286,7 @@ void Server::_logged_slave_rmdir(MDRequestRef& mdr, CDentry *dn, CDentry *strayd mdcache->adjust_subtree_after_rename(in, dn->get_dir(), true); // done. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); mdr->straydn = 0; if (!mdr->aborted) { @@ -7589,8 +7583,7 @@ void Server::handle_slave_rename_prep(MDRequestRef& mdr) MMDSSlaveRequest *reply= new MMDSSlaveRequest(mdr->reqid, mdr->attempt, MMDSSlaveRequest::OP_RENAMEPREPACK); reply->mark_interrupted(); mds->send_message_mds(reply, mdr->slave_to_mds); - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); return; } @@ -7724,8 +7717,7 @@ void Server::handle_slave_rename_prep(MDRequestRef& mdr) MMDSSlaveRequest::OP_RENAMEPREPACK); reply->witnesses.swap(srcdnrep); mds->send_message_mds(reply, mdr->slave_to_mds); - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); return; } dout(10) << " witness list sufficient: includes all srcdn replicas" << dendl; @@ -7862,8 +7854,7 @@ void Server::_logged_slave_rename(MDRequestRef& mdr, mds->balancer->hit_inode(now, destdnl->get_inode(), META_POP_IWR); // done. - mdr->slave_request->put(); - mdr->slave_request = 0; + mdr->reset_slave_request(); mdr->straydn = 0; if (reply) {