ceph_assert(a);
auto f = dynamic_cast<ZonedFreelistManager*>(fm);
ceph_assert(f);
- a->init_from_zone_pointers(f->get_zone_states(db),
+ vector<uint64_t> wp = bdev->get_zones();
+ vector<zone_state_t> zones = f->get_zone_states(db);
+ ceph_assert(wp.size() == zones.size());
+
+ // reconcile zone state
+ auto num_zones = bdev->get_size() / zone_size;
+ for (unsigned i = first_sequential_zone; i < num_zones; ++i) {
+ ceph_assert(wp[i] >= i * zone_size);
+ ceph_assert(wp[i] <= (i + 1) * zone_size); // pos might be at start of next zone
+ uint64_t p = wp[i] - i * zone_size;
+ if (zones[i].write_pointer > p) {
+ derr << __func__ << " zone 0x" << std::hex << i
+ << " bluestore write pointer 0x" << zones[i].write_pointer
+ << " > device write pointer 0x" << p
+ << std::dec << dendl;
+ ceph_abort("bad write pointer");
+ } else if (zones[i].write_pointer < p) {
+ // this is "normal" in that it can happen after any crash (if we have a
+ // write in flight but did not manage to commit the transaction)
+ auto delta = p - zones[i].write_pointer;
+ dout(1) << __func__ << " zone 0x" << std::hex << i
+ << " device write pointer 0x" << p
+ << " > bluestore pointer 0x" << zones[i].write_pointer
+ << ", advancing 0x" << delta << std::dec << dendl;
+ zones[i].num_dead_bytes += delta;
+ zones[i].write_pointer = p;
+ }
+ }
+
+ a->init_from_zone_pointers(zones,
&zoned_cleaner_lock,
&zoned_cleaner_cond);
dout(1) << __func__
}
void ZonedAllocator::init_from_zone_pointers(
- std::vector<zone_state_t> &&_zone_states,
+ std::vector<zone_state_t> _zone_states,
ceph::mutex *_cleaner_lock,
ceph::condition_variable *_cleaner_cond)
{
void mark_zones_to_clean_free(void);
void init_from_zone_pointers(
- std::vector<zone_state_t> &&_zone_states,
+ std::vector<zone_state_t> _zone_states,
ceph::mutex *_cleaner_lock,
ceph::condition_variable *_cleaner_cond);
void init_add_free(uint64_t offset, uint64_t length) override {}
// We use the same struct for an on-disk and in-memory representation of the
// state.
struct zone_state_t {
- uint64_t num_dead_bytes = 0;
- uint64_t write_pointer = 0;
+ uint64_t num_dead_bytes = 0; ///< dead bytes deallocated (behind the write pointer)
+ uint64_t write_pointer = 0; ///< relative offset within the zone
void encode(ceph::buffer::list &bl) const {
using ceph::encode;