From: Sage Weil Date: Tue, 7 Sep 2021 18:53:06 +0000 (-0500) Subject: os/bluestore: fix startup vs device write pointers X-Git-Tag: v17.1.0~535^2~39 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7f74551b7bdf13afb0f0d31b18bdafff91df3d82;p=ceph.git os/bluestore: fix startup vs device write pointers Compare freelist write pointers to the device on startup. Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2004f7286457..459f7070b1bc 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5592,7 +5592,36 @@ int BlueStore::_init_alloc() ceph_assert(a); auto f = dynamic_cast(fm); ceph_assert(f); - a->init_from_zone_pointers(f->get_zone_states(db), + vector wp = bdev->get_zones(); + vector zones = f->get_zone_states(db); + ceph_assert(wp.size() == zones.size()); + + // reconcile zone state + auto num_zones = bdev->get_size() / zone_size; + for (unsigned i = first_sequential_zone; i < num_zones; ++i) { + ceph_assert(wp[i] >= i * zone_size); + ceph_assert(wp[i] <= (i + 1) * zone_size); // pos might be at start of next zone + uint64_t p = wp[i] - i * zone_size; + if (zones[i].write_pointer > p) { + derr << __func__ << " zone 0x" << std::hex << i + << " bluestore write pointer 0x" << zones[i].write_pointer + << " > device write pointer 0x" << p + << std::dec << dendl; + ceph_abort("bad write pointer"); + } else if (zones[i].write_pointer < p) { + // this is "normal" in that it can happen after any crash (if we have a + // write in flight but did not manage to commit the transaction) + auto delta = p - zones[i].write_pointer; + dout(1) << __func__ << " zone 0x" << std::hex << i + << " device write pointer 0x" << p + << " > bluestore pointer 0x" << zones[i].write_pointer + << ", advancing 0x" << delta << std::dec << dendl; + zones[i].num_dead_bytes += delta; + zones[i].write_pointer = p; + } + } + + a->init_from_zone_pointers(zones, &zoned_cleaner_lock, &zoned_cleaner_cond); dout(1) << __func__ diff --git a/src/os/bluestore/ZonedAllocator.cc b/src/os/bluestore/ZonedAllocator.cc index 0ec8000e17b6..03326d2e5fc5 100644 --- a/src/os/bluestore/ZonedAllocator.cc +++ b/src/os/bluestore/ZonedAllocator.cc @@ -151,7 +151,7 @@ void ZonedAllocator::dump(std::function &&_zone_states, + std::vector _zone_states, ceph::mutex *_cleaner_lock, ceph::condition_variable *_cleaner_cond) { diff --git a/src/os/bluestore/ZonedAllocator.h b/src/os/bluestore/ZonedAllocator.h index 73697e4cfd5f..903b62fd1a09 100644 --- a/src/os/bluestore/ZonedAllocator.h +++ b/src/os/bluestore/ZonedAllocator.h @@ -96,7 +96,7 @@ public: void mark_zones_to_clean_free(void); void init_from_zone_pointers( - std::vector &&_zone_states, + std::vector _zone_states, ceph::mutex *_cleaner_lock, ceph::condition_variable *_cleaner_cond); void init_add_free(uint64_t offset, uint64_t length) override {} diff --git a/src/os/bluestore/zoned_types.h b/src/os/bluestore/zoned_types.h index c92f2ada0996..93d3138399a0 100644 --- a/src/os/bluestore/zoned_types.h +++ b/src/os/bluestore/zoned_types.h @@ -14,8 +14,8 @@ // We use the same struct for an on-disk and in-memory representation of the // state. struct zone_state_t { - uint64_t num_dead_bytes = 0; - uint64_t write_pointer = 0; + uint64_t num_dead_bytes = 0; ///< dead bytes deallocated (behind the write pointer) + uint64_t write_pointer = 0; ///< relative offset within the zone void encode(ceph::buffer::list &bl) const { using ceph::encode;