From 39dcbc5d15f99c84ba982d0962fb384e3460191e Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Fri, 26 Jan 2024 16:36:53 -0500 Subject: [PATCH] mds: add vxattr to block quiesce on an inode Signed-off-by: Patrick Donnelly (cherry picked from commit e2529fc74808b102b1ca85a3ee6644160da32e60) --- src/include/cephfs/types.h | 8 ++++++ src/mds/CInode.cc | 3 +++ src/mds/MDCache.cc | 11 ++++++++ src/mds/MDCache.h | 8 ++++++ src/mds/Server.cc | 53 ++++++++++++++++++++++++++++++++++++++ src/mds/Server.h | 3 +++ 6 files changed, 86 insertions(+) diff --git a/src/include/cephfs/types.h b/src/include/cephfs/types.h index 5ab5c229a06..108878794f7 100644 --- a/src/include/cephfs/types.h +++ b/src/include/cephfs/types.h @@ -395,6 +395,7 @@ struct inode_t { using client_range_map = std::map,Allocator>>; static const uint8_t F_EPHEMERAL_DISTRIBUTED_PIN = (1<<0); + static const uint8_t F_QUIESCE_BLOCK = (1<<1); inode_t() { @@ -500,6 +501,12 @@ struct inode_t { bool get_ephemeral_distributed_pin() const { return get_flag(F_EPHEMERAL_DISTRIBUTED_PIN); } + void set_quiesce_block(bool v) { + set_flag(v, F_QUIESCE_BLOCK); + } + bool get_quiesce_block() const { + return get_flag(F_QUIESCE_BLOCK); + } void encode(ceph::buffer::list &bl, uint64_t features) const; void decode(ceph::buffer::list::const_iterator& bl); @@ -828,6 +835,7 @@ void inode_t::dump(ceph::Formatter *f) const f->dump_int("export_pin", export_pin); f->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin); f->dump_bool("export_ephemeral_distributed_pin", get_ephemeral_distributed_pin()); + f->dump_bool("quiesce_block", get_quiesce_block()); f->open_array_section("client_ranges"); for (const auto &p : client_ranges) { diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 8225e4fbb5a..ed9a74f4435 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -310,6 +310,9 @@ ostream& operator<<(ostream& out, const CInode& in) if (in.state_test(CInode::STATE_RANDEPHEMERALPIN)) { out << " randepin"; } + if (in.get_inode()->get_quiesce_block()) { + out << " qblock"; + } out << " " << ∈ out << "]"; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 8a3dd21d57b..a6df7351069 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -13614,6 +13614,17 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr) return; } + if (in->get_projected_inode()->get_quiesce_block()) { + dout(10) << __func__ << " quiesce is blocked for this inode; dropping locks!" << dendl; + mdr->mark_event("quiesce blocked"); + mds->locker->drop_locks(mdr.get()); + /* keep authpins! */ + qs.inc_inodes_blocked(); + mdr->internal_op_finish->complete(0); + mdr->internal_op_finish = nullptr; + return; + } + if (in->is_dir()) { for (auto& dir : in->get_dirfrags()) { if (!dir->is_auth() && !splitauth) { diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index ab34d238eaf..92a6be6d2a0 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -540,12 +540,18 @@ public: uint64_t inc_heartbeat_count() { return ++heartbeat_count; } + void inc_inodes_blocked() { + inodes_blocked++; + } uint64_t get_inodes() const { return inodes; } uint64_t get_inodes_quiesced() const { return inodes_quiesced; } + uint64_t get_inodes_blocked() const { + return inodes_blocked; + } void add_failed(const MDRequestRef& mdr, int rc) { failed[mdr] = rc; } @@ -559,6 +565,7 @@ public: void dump(Formatter* f) const { f->dump_unsigned("inodes", inodes); f->dump_unsigned("inodes_quiesced", inodes_quiesced); + f->dump_unsigned("inodes_blocked", inodes_blocked); f->open_array_section("failed"); for (auto& [mdr, rc] : failed) { f->open_object_section("failure"); @@ -572,6 +579,7 @@ private: uint64_t heartbeat_count = 0; uint64_t inodes = 0; uint64_t inodes_quiesced = 0; + uint64_t inodes_blocked = 0; std::map failed; }; class C_MDS_QuiescePath : public MDSInternalContext { diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 3bc4f9196b6..04330cd308d 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -6175,6 +6175,57 @@ void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) client_t exclude_ct = mdr->get_client(); mdcache->broadcast_quota_to_client(cur, exclude_ct, true); + } else if (name == "ceph.quiesce.block"sv) { + bool val; + try { + val = boost::lexical_cast(value); + } catch (boost::bad_lexical_cast const&) { + dout(10) << "bad vxattr value, unable to parse bool for " << name << dendl; + respond_to_request(mdr, -CEPHFS_EINVAL); + return; + } + + /* Verify it's not already marked with lighter weight + * rdlock. + */ + if (!mdr->more()->rdonly_checks) { + if (!(mdr->locking_state & MutationImpl::ALL_LOCKED)) { + lov.add_rdlock(&cur->policylock); + if (!mds->locker->acquire_locks(mdr, lov)) + return; + mdr->locking_state |= MutationImpl::ALL_LOCKED; + } + bool is_blocked = cur->get_projected_inode()->get_quiesce_block(); + if (is_blocked == val) { + dout(20) << "already F_QUIESCE_BLOCK set" << dendl; + respond_to_request(mdr, 0); + return; + } + mdr->more()->rdonly_checks = true; + } + + if ((mdr->locking_state & MutationImpl::ALL_LOCKED) && !mdr->is_xlocked(&cur->policylock)) { + /* drop the rdlock and acquire xlocks */ + dout(20) << "dropping rdlocks" << dendl; + mds->locker->drop_locks(mdr.get()); + if (!xlock_policylock(mdr, cur, false, true)) + return; + } + + /* repeat rdonly checks in case changed between rdlock -> xlock */ + bool is_blocked = cur->get_projected_inode()->get_quiesce_block(); + if (is_blocked == val) { + dout(20) << "already F_QUIESCE_BLOCK set" << dendl; + respond_to_request(mdr, 0); + return; + } + + auto pi = cur->project_inode(mdr); + pi.inode->set_quiesce_block(val); + dout(20) << (val ? "setting" : "unsetting") << " F_QUIESCE_BLOCK on ino: " << cur->ino() << dendl; + + mdr->no_early_reply = true; + pip = pi.inode.get(); } else if (name == "ceph.dir.subvolume"sv) { if (!cur->is_dir()) { respond_to_request(mdr, -CEPHFS_EINVAL); @@ -6879,6 +6930,8 @@ void Server::handle_client_getvxattr(const MDRequestRef& mdr) } else { r = -CEPHFS_ENODATA; // no such attribute } + } else if (xattr_name == "ceph.quiesce.block"sv) { + *css << cur->get_projected_inode()->get_quiesce_block(); } else if (xattr_name.substr(0, 12) == "ceph.dir.pin"sv) { if (xattr_name == "ceph.dir.pin"sv) { *css << cur->get_projected_inode()->export_pin; diff --git a/src/mds/Server.h b/src/mds/Server.h index 63d05ea32ab..152ab1b7019 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -17,6 +17,8 @@ #include +using namespace std::literals::string_view_literals; + #include #include "include/common_fwd.h" @@ -441,6 +443,7 @@ private: return xattr_name.rfind("ceph.dir.layout", 0) == 0 || xattr_name.rfind("ceph.file.layout", 0) == 0 || xattr_name.rfind("ceph.quota", 0) == 0 || + xattr_name == "ceph.quiesce.block"sv || xattr_name == "ceph.dir.subvolume" || xattr_name == "ceph.dir.pin" || xattr_name == "ceph.dir.pin.random" || -- 2.39.5