flags:
- runtime
with_legacy: true
+- name: bluefs_check_volume_selector_on_umount
+ type: bool
+ level: dev
+ desc: Check validity of volume selector on umount
+ long_desc: Checks if volume selector did not diverge from the state it should be in.
+ Reference is constructed from bluefs inode table. Asserts on inconsistency.
+ default: false
+ flags:
+ - runtime
+ with_legacy: true
+- name: bluefs_check_volume_selector_often
+ type: bool
+ level: dev
+ desc: Periodically check validity of volume selector
+ long_desc: Periodically checks if current volume selector does not diverge from the valid state.
+ Reference is constructed from bluefs inode table. Asserts on inconsistency. This is debug feature.
+ default: false
+ see_also:
+ - bluefs_check_volume_selector_on_umount
+ flags:
+ - startup
+ with_legacy: true
- name: bluestore_bluefs
type: bool
level: dev
discard_cb[BDEV_DB] = db_discard_cb;
discard_cb[BDEV_SLOW] = slow_discard_cb;
asok_hook = SocketHook::create(this);
-
}
BlueFS::~BlueFS()
dout(1) << __func__ << dendl;
sync_metadata(avoid_compact);
-
+ if (cct->_conf->bluefs_check_volume_selector_on_umount) {
+ _check_vselector_LNF();
+ }
_close_writer(log.writer);
log.writer = NULL;
log.t.clear();
void BlueFS::flush_range(FileWriter *h, uint64_t offset, uint64_t length)/*_WF*/
{
+ _maybe_check_vselector_LNF();
std::unique_lock hl(h->lock);
_flush_range_F(h, offset, length);
}
}
ceph_assert(h->file->fnode.size >= offset);
_flush_bdev(h);
+
+ std::lock_guard ll(log.lock);
vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size);
h->file->fnode.size = offset;
vselector->add_usage(h->file->vselector_hint, h->file->fnode.size);
-
- std::lock_guard ll(log.lock);
log.t.op_file_update_inc(h->file->fnode);
return 0;
}
int BlueFS::fsync(FileWriter *h)/*_WF_WD_WLD_WLNF_WNF*/
{
+ _maybe_check_vselector_LNF();
std::unique_lock hl(h->lock);
uint64_t old_dirty_seq = 0;
{
_flush_and_sync_log_LD(old_dirty_seq);
}
_maybe_compact_log_LNF_NF_LD_D();
+
return 0;
}
FileWriter **h,
bool overwrite)/*_N_LD*/
{
+ _maybe_check_vselector_LNF();
FileRef file;
bool create = false;
bool truncate = false;
FileReader **h,
bool random)/*_N*/
{
+ _maybe_check_vselector_LNF();
std::lock_guard nl(nodes.lock);
dout(10) << __func__ << " " << dirname << "/" << filename
<< (random ? " (random)":" (sequential)") << dendl;
return 0;
}
+void BlueFS::_check_vselector_LNF() {
+ BlueFSVolumeSelector* vs = vselector->clone_empty();
+ if (!vs) {
+ return;
+ }
+ std::lock_guard ll(log.lock);
+ std::lock_guard nl(nodes.lock);
+ // Checking vselector is under log, nodes and file(s) locks,
+ // so any modification of vselector must be under at least one of those locks.
+ for (auto& f : nodes.file_map) {
+ f.second->lock.lock();
+ vs->add_usage(f.second->vselector_hint, f.second->fnode);
+ }
+ bool res = vselector->compare(vs);
+ if (!res) {
+ dout(0) << "Current:";
+ vselector->dump(*_dout);
+ *_dout << dendl;
+ dout(0) << "Expected:";
+ vs->dump(*_dout);
+ *_dout << dendl;
+ }
+ ceph_assert(res);
+ for (auto& f : nodes.file_map) {
+ f.second->lock.unlock();
+ }
+ delete vs;
+}
+
size_t BlueFS::probe_alloc_avail(int dev, uint64_t alloc_size)
{
size_t total = 0;
virtual uint8_t select_prefer_bdev(void* hint) = 0;
virtual void get_paths(const std::string& base, paths& res) const = 0;
virtual void dump(std::ostream& sout) = 0;
+
+ /* used for sanity checking of vselector */
+ virtual BlueFSVolumeSelector* clone_empty() const { return nullptr; }
+ virtual bool compare(BlueFSVolumeSelector* other) { return true; };
};
struct bluefs_shared_alloc_context_t {
unsigned get_super_length() {
return 4096;
}
-
+ void _maybe_check_vselector_LNF() {
+ if (cct->_conf->bluefs_check_volume_selector_often) {
+ _check_vselector_LNF();
+ }
+ }
public:
BlueFS(CephContext* cct);
~BlueFS();
size_t read_offset,
size_t read_len,
bufferlist* bl);
+ void _check_vselector_LNF();
};
class OriginalVolumeSelector : public BlueFSVolumeSelector {
}
}
+BlueFSVolumeSelector* RocksDBBlueFSVolumeSelector::clone_empty() const {
+ RocksDBBlueFSVolumeSelector* ns =
+ new RocksDBBlueFSVolumeSelector(0, 0, 0,
+ 0, 0, 0,
+ 0, 0, false);
+ return ns;
+}
+
+bool RocksDBBlueFSVolumeSelector::compare(BlueFSVolumeSelector* other) {
+ RocksDBBlueFSVolumeSelector* o = dynamic_cast<RocksDBBlueFSVolumeSelector*>(other);
+ ceph_assert(o);
+ bool equal = true;
+ for (size_t x = 0; x < BlueFS::MAX_BDEV + 1; x++) {
+ for (size_t y = 0; y <LEVEL_MAX - LEVEL_FIRST + 1; y++) {
+ equal &= (per_level_per_dev_usage.at(x, y) == o->per_level_per_dev_usage.at(x, y));
+ }
+ }
+ for (size_t t = 0; t < LEVEL_MAX - LEVEL_FIRST + 1; t++) {
+ equal &= (per_level_files[t] == o->per_level_files[t]);
+ }
+ return equal;
+}
+
// =======================================================
//================================================================================================================