From dca32d4126f81092253a3681c2c4a505969e358d Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 30 Jan 2024 06:54:38 +0000 Subject: [PATCH] os/bluestore: Create read_bdev_main_label function Duplicate read_bdev_label into dedicated read_bdev_main_label. New function reads multiple labels. Also created check_or_set_main_bdev_label in similiar way. Signed-off-by: Adam Kupczyk (cherry picked from commit 75b1a22b95963a8f722ecff80756718cbaf01fce) --- src/common/options/global.yaml.in | 24 ++++++ src/os/bluestore/BlueStore.cc | 125 +++++++++++++++++++++++++++++- src/os/bluestore/BlueStore.h | 17 +++- 3 files changed, 163 insertions(+), 3 deletions(-) diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index a66f544a57935..2955ba96cb968 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -4330,6 +4330,30 @@ options: flags: - create with_legacy: true +- name: bluestore_bdev_label_multi + type: bool + level: advanced + desc: Keep multiple copies of block device label. + long_desc: Having multiple labels is only useful in error conditions. + The label located at offset 0 has been known to be sometimes overwritten by unknown causes, + but without it OSD cannot run. + default: true + flags: + - create + with_legacy: false +- name: bluestore_bdev_label_require_all + type: bool + level: advanced + desc: Require all copies to match. + long_desc: Under normal conditions, all copies should be the same. + Clearing this flag allows to run OSD if at least one of labels + could be properly read. + default: true + see_also: + - bluestore_bdev_label_multi + flags: + - runtime + with_legacy: false # whether preallocate space if block/db_path/wal_path is file rather that block device. - name: bluestore_block_preallocate_file type: bool diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 1b942e9b515c4..d998b6ae19ef7 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -6592,6 +6592,91 @@ int BlueStore::_read_bdev_label( return 0; } +/** + Reads device label. + cct - CephContext, as usual + path - Path to block device, as conf.bluestore_block_path defines. + *out_label - Filled if reading of label is considered successful. + *out_valid_positions - List of locations that contained valid labels. + *out_is_multi - Whether the label is regular or multi label with epoch. + *out_epoch - Epoch of label. + + Returns: + 0 When all label are read + 1 When some, but not all labels are read + -ENOENT Otherwise + +*/ +int BlueStore::_read_main_bdev_label( + CephContext* cct, + const string &path, + bluestore_bdev_label_t *out_label, + std::vector* out_valid_positions, + bool* out_is_multi, + int64_t* out_epoch) +{ + dout(10) << __func__ << dendl; + ceph_assert(out_label); + // go and try read all possible bdev labels. + // if only first bdev label is correct, it must not have "multi=yes" key. + int64_t epoch = -1; + bool all_labels_valid = true; + for (uint64_t position : bdev_label_positions) { + bluestore_bdev_label_t label; + int r = _read_bdev_label(cct, path, &label, position); + if (r == 0) { + auto i = label.meta.find("multi"); + bool is_multi = i != label.meta.end() && i->second == "yes"; + if (position == BDEV_LABEL_POSITION && !is_multi) { + // we have a single-label case + *out_label = label; + is_multi = false; + if (out_is_multi) { + *out_is_multi = false; + } + if(out_valid_positions) { + out_valid_positions->push_back(position); + } + goto done; + } + if (!is_multi) { + // for not base bdev position, it has to be cloned to be considered + continue; + } + i = label.meta.find("epoch"); + if (i != label.meta.end()) { + int64_t v = atoll(i->second.c_str()); + if (v > epoch) { + epoch = v; + *out_label = label; + if (out_epoch) { + *out_epoch = epoch; + } + is_multi = true; + if (out_is_multi) { + *out_is_multi = true; + } + if(out_valid_positions) { + out_valid_positions->push_back(position); + } + } + } + } else if (r == 1) { + // tried to read but no disk + } else { + all_labels_valid = false; + } + } + if (epoch == -1) { + // not even one label read properly + derr << "No valid bdev label found" << dendl; + return -ENOENT; + } + done: + dout(10) << __func__ << " got " << *out_label << dendl; + return all_labels_valid ? 0 : 1; +} + int BlueStore::_check_or_set_bdev_label( string path, uint64_t size, string desc, bool create) { @@ -6620,6 +6705,44 @@ int BlueStore::_check_or_set_bdev_label( return 0; } +int BlueStore::_check_or_set_main_bdev_label( + string path, uint64_t size, bool create) +{ + bluestore_bdev_label_t label; + if (create) { + label.osd_uuid = fsid; + label.size = size; + label.btime = ceph_clock_now(); + label.description = "main"; + if (cct->_conf.get_val("bluestore_bdev_label_multi")) { + label.meta["multi"] = "yes"; + label.meta["epoch"] = "1"; + } + int r = _write_bdev_label(cct, path, label, bdev_label_positions); + if (r < 0) + return r; + } else { + int r = _read_main_bdev_label(cct, path, &bdev_label, &bdev_label_valid_locations); + if (r < 0) + return r; + if (cct->_conf->bluestore_debug_permit_any_bdev_label) { + dout(20) << __func__ << " bdev " << path << " fsid " << label.osd_uuid + << " and fsid " << fsid << " check bypassed" << dendl; + } else if (label.osd_uuid != fsid) { + derr << __func__ << " bdev " << path << " fsid " << label.osd_uuid + << " does not match our fsid " << fsid << dendl; + return -EIO; + } + if (cct->_conf.get_val("bluestore_bdev_label_require_all")) { + if (r != 0) { + derr << __func__ << "not all labels read properly" << dendl; + return -EIO; + } + } + } + return 0; +} + void BlueStore::_set_alloc_sizes(void) { max_alloc_size = cct->_conf->bluestore_max_alloc_size; @@ -6669,7 +6792,7 @@ int BlueStore::_open_bdev(bool create) } if (bdev->supported_bdev_label()) { - r = _check_or_set_bdev_label(p, bdev->get_size(), "main", create); + r = _check_or_set_main_bdev_label(p, bdev->get_size(), create); if (r < 0) goto fail_close; } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 23f25a12b3b15..06721f6958c0b 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2440,8 +2440,10 @@ private: std::atomic config_changed = {0}; ///< Counter to determine if there is a configuration change. // caching of bdev_label - bool bdev_label_valid = false; // indicator if - bluestore_bdev_label_t bdev_label; // this value is valid + bluestore_bdev_label_t bdev_label; // this value is valid if + std::vector bdev_label_valid_locations; // this has any elements + bool bdev_label_multi = false; + int64_t bdev_label_epoch = -1; typedef std::map osd_pools_map; @@ -2774,6 +2776,17 @@ public: private: int _check_or_set_bdev_label(std::string path, uint64_t size, std::string desc, bool create); + int _check_or_set_main_bdev_label( + std::string path, + uint64_t size, + bool create); + static int _read_main_bdev_label( + CephContext* cct, + const std::string &path, + bluestore_bdev_label_t *out_label, + std::vector* out_valid_positions = nullptr, + bool* out_is_cloned = nullptr, + int64_t* out_epoch = nullptr); int _set_bdev_label_size(const std::string& path, uint64_t size); int _open_super_meta(); -- 2.39.5