virtual void dump() = 0;
virtual void dump(std::function<void(uint64_t offset, uint64_t length)> notify) = 0;
- virtual void set_zone_states(std::vector<zone_state_t> &&_zone_states) {}
+ virtual void zoned_set_zone_states(std::vector<zone_state_t> &&_zone_states) {}
+ virtual bool zoned_get_zones_to_clean(std::deque<uint64_t> *zones_to_clean) {
+ return false;
+ }
+
virtual void init_add_free(uint64_t offset, uint64_t length) = 0;
virtual void init_rm_free(uint64_t offset, uint64_t length) = 0;
finisher(cct, "commit_finisher", "cfin"),
kv_sync_thread(this),
kv_finalize_thread(this),
+ zoned_cleaner_thread(this),
min_alloc_size(_min_alloc_size),
min_alloc_size_order(ctz(_min_alloc_size)),
mempool_thread(this)
ceph_assert(shared_alloc.a != NULL);
if (bdev->is_smr()) {
- shared_alloc.a->set_zone_states(fm->get_zone_states(db));
+ shared_alloc.a->zoned_set_zone_states(fm->get_zone_states(db));
}
uint64_t num = 0, bytes = 0;
_kv_start();
+ if (bdev->is_smr()) {
+ _zoned_cleaner_start();
+ }
+
r = _deferred_replay();
if (r < 0)
goto out_stop;
return 0;
out_stop:
+ if (bdev->is_smr()) {
+ _zoned_cleaner_stop();
+ }
_kv_stop();
out_coll:
_shutdown_cache();
mounted = false;
if (!_kv_only) {
mempool_thread.shutdown();
+ if (bdev->is_smr()) {
+ dout(20) << __func__ << " stopping zone cleaner thread" << dendl;
+ _zoned_cleaner_stop();
+ }
dout(20) << __func__ << " stopping kv thread" << dendl;
_kv_stop();
_shutdown_cache();
kv_finalize_started = false;
}
+void BlueStore::_zoned_cleaner_start() {
+ dout(10) << __func__ << dendl;
+
+ zoned_cleaner_thread.create("bstore_zcleaner");
+}
+
+void BlueStore::_zoned_cleaner_stop() {
+ dout(10) << __func__ << dendl;
+ {
+ std::unique_lock l{zoned_cleaner_lock};
+ while (!zoned_cleaner_started) {
+ zoned_cleaner_cond.wait(l);
+ }
+ zoned_cleaner_stop = true;
+ zoned_cleaner_cond.notify_all();
+ }
+ zoned_cleaner_thread.join();
+ {
+ std::lock_guard l{zoned_cleaner_lock};
+ zoned_cleaner_stop = false;
+ }
+ dout(10) << __func__ << " done" << dendl;
+}
+
+void BlueStore::_zoned_cleaner_thread() {
+ dout(10) << __func__ << " start" << dendl;
+ std::unique_lock l{zoned_cleaner_lock};
+ ceph_assert(!zoned_cleaner_started);
+ zoned_cleaner_started = true;
+ zoned_cleaner_cond.notify_all();
+ std::deque<uint64_t> zones_to_clean;
+ while (true) {
+ if (zoned_cleaner_queue.empty()) {
+ if (zoned_cleaner_stop) {
+ break;
+ }
+ dout(20) << __func__ << " sleep" << dendl;
+ zoned_cleaner_cond.wait(l);
+ dout(20) << __func__ << " wake" << dendl;
+ } else {
+ zones_to_clean.swap(zoned_cleaner_queue);
+ l.unlock();
+ while (!zones_to_clean.empty()) {
+ _zoned_clean_zone(zones_to_clean.front());
+ zones_to_clean.pop_front();
+ }
+ l.lock();
+ }
+ }
+ dout(10) << __func__ << " finish" << dendl;
+ zoned_cleaner_started = false;
+}
+
+void BlueStore::_zoned_clean_zone(uint64_t zone_num) {
+ dout(10) << __func__ << " cleaning zone " << zone_num << dendl;
+}
+
bluestore_deferred_op_t *BlueStore::_get_deferred_op(
TransContext *txc)
{
}
_collect_allocation_stats(need, min_alloc_size, prealloc.size());
+ if (bdev->is_smr()) {
+ std::deque<uint64_t> zones_to_clean;
+ if (shared_alloc.a->zoned_get_zones_to_clean(&zones_to_clean)) {
+ std::lock_guard l{zoned_cleaner_lock};
+ zoned_cleaner_queue.swap(zones_to_clean);
+ zoned_cleaner_cond.notify_one();
+ }
+ }
+
dout(20) << __func__ << " prealloc " << prealloc << dendl;
auto prealloc_pos = prealloc.begin();
struct KVFinalizeThread : public Thread {
BlueStore *store;
explicit KVFinalizeThread(BlueStore *s) : store(s) {}
- void *entry() {
+ void *entry() override {
store->_kv_finalize_thread();
return NULL;
}
};
+ struct ZonedCleanerThread : public Thread {
+ BlueStore *store;
+ explicit ZonedCleanerThread(BlueStore *s) : store(s) {}
+ void *entry() override {
+ store->_zoned_cleaner_thread();
+ return nullptr;
+ }
+ };
struct DBHistogram {
struct value_dist {
std::deque<DeferredBatch*> deferred_stable_to_finalize; ///< pending finalization
bool kv_finalize_in_progress = false;
+ ZonedCleanerThread zoned_cleaner_thread;
+ ceph::mutex zoned_cleaner_lock = ceph::make_mutex("BlueStore::zoned_cleaner_lock");
+ ceph::condition_variable zoned_cleaner_cond;
+ bool zoned_cleaner_started = false;
+ bool zoned_cleaner_stop = false;
+ std::deque<uint64_t> zoned_cleaner_queue;
+
PerfCounters *logger = nullptr;
std::list<CollectionRef> removed_collections;
void _kv_sync_thread();
void _kv_finalize_thread();
+ void _zoned_cleaner_start();
+ void _zoned_cleaner_stop();
+ void _zoned_cleaner_thread();
+ void _zoned_clean_zone(uint64_t zone_num);
+
bluestore_deferred_op_t *_get_deferred_op(TransContext *txc);
void _deferred_queue(TransContext *txc);
public:
}
uint64_t ZonedAllocator::get_free() {
- std::lock_guard l(lock);
return num_free;
}
// This just increments |num_free|. The actual free space is added by
// set_zone_states, as it updates the write pointer for each zone.
void ZonedAllocator::init_add_free(uint64_t offset, uint64_t length) {
- std::lock_guard l(lock);
- ldout(cct, 10) << __func__ << " " << std::hex
+ ldout(cct, 40) << __func__ << " " << std::hex
<< offset << "~" << length << dendl;
num_free += length;
void ZonedAllocator::init_rm_free(uint64_t offset, uint64_t length) {
std::lock_guard l(lock);
- ldout(cct, 10) << __func__ << " 0x" << std::hex
+ ldout(cct, 40) << __func__ << " 0x" << std::hex
<< offset << "~" << length << dendl;
num_free -= length;
ceph_assert(remaining_space <= length);
advance_write_pointer(zone_num, remaining_space);
- ldout(cct, 10) << __func__ << " set zone 0x" << std::hex
+ ldout(cct, 40) << __func__ << " set zone 0x" << std::hex
<< zone_num << " write pointer to 0x" << zone_size << dendl;
length -= remaining_space;
for ( ; length; length -= zone_size) {
advance_write_pointer(++zone_num, zone_size);
- ldout(cct, 10) << __func__ << " set zone 0x" << std::hex
+ ldout(cct, 40) << __func__ << " set zone 0x" << std::hex
<< zone_num << " write pointer to 0x" << zone_size << dendl;
}
}
-void ZonedAllocator::set_zone_states(std::vector<zone_state_t> &&_zone_states) {
+bool ZonedAllocator::zoned_get_zones_to_clean(std::deque<uint64_t> *zones_to_clean) {
+ // TODO: make 0.25 tunable
+ if (static_cast<double>(num_free) / size > 0.25) {
+ return false;
+ }
+ {
+ std::lock_guard l(lock);
+ // TODO: populate |zones_to_clean| with the numbers of zones that should be
+ // cleaned.
+ }
+ return true;
+}
+
+void ZonedAllocator::zoned_set_zone_states(std::vector<zone_state_t> &&_zone_states) {
std::lock_guard l(lock);
ldout(cct, 10) << __func__ << dendl;
zone_states = std::move(_zone_states);
// atomic_alloc_and_submit_lock will be removed.
ceph::mutex lock = ceph::make_mutex("ZonedAllocator::lock");
- int64_t num_free; ///< total bytes in freelist
+ std::atomic<int64_t> num_free; ///< total bytes in freelist
uint64_t size;
uint64_t block_size;
uint64_t zone_size;
void dump(std::function<void(uint64_t offset,
uint64_t length)> notify) override;
- void set_zone_states(std::vector<zone_state_t> &&_zone_states) override;
+ void zoned_set_zone_states(std::vector<zone_state_t> &&_zone_states) override;
+ bool zoned_get_zones_to_clean(std::deque<uint64_t> *zones_to_clean) override;
+
void init_add_free(uint64_t offset, uint64_t length) override;
void init_rm_free(uint64_t offset, uint64_t length) override;