From 24484742564d957b17c0069995bd1e1e812948e9 Mon Sep 17 00:00:00 2001 From: myoungwon oh Date: Thu, 24 Feb 2022 11:29:17 +0900 Subject: [PATCH] osd: wait recovery of all snapshots when reference_chunk is called If a user sends reference_chunk() to the original object (not manifest object) which has not recovered snapshots, the OSD triggers assert() because reference_chunk() try to find adjacent unrecovered clones, resulting in the assert(). This is because the original object does not wait the recovery of snapshots. To avoid this, this commit add a condition to check a base snapshot is readable whether the object is manifest or not. If the base snapshot is valid and the snapshot is manifest , osd try to calculate reference count. fixes: https://tracker.ceph.com/issues/54509 Signed-off-by: Myoungwon Oh --- src/osd/PrimaryLogPG.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index e91213bdee921..b06581041670b 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -3450,6 +3450,12 @@ int PrimaryLogPG::get_manifest_ref_count(ObjectContextRef obc, std::string& fp_o if (osdmap->in_removed_snaps_queue(info.pgid.pgid.pool(), *p)) { return -EBUSY; } + if (is_unreadable_object(clone_oid)) { + dout(10) << __func__ << ": " << clone_oid + << " is unreadable. Need to wait for recovery" << dendl; + wait_for_unreadable_object(clone_oid, op); + return -EAGAIN; + } ObjectContextRef clone_obc = get_object_context(clone_oid, false); if (!clone_obc) { break; -- 2.39.5