From b3b35dba971e74a39522ec1b242750e5eeb16736 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Mon, 25 Oct 2021 12:30:39 +0000 Subject: [PATCH] crimson/osd: write object store's meta sequentially. We're violating the contract on `ObjectStore::write_meta()' that requires the method must be called in one-by-one manner. For instance, the implementation in `BlueStore` does read-modify- write without any locking. Having multiple in-flight requests the same time (can happen b/c of the thread pool in `AlienStore`) may lead to a corruption like the following one: ``` [2021-10-25 13:38:38,725][ceph_volume.process][INFO ] Running command: /home/rzarz/dev/ceph1/build/bin/ceph-bluestore-tool show-label --dev /dev/nvme0n1p3 [2021-10-25 13:38:38,737][ceph_volume.process][INFO ] stdout { [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "/dev/nvme0n1p3": { [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "osd_uuid": "a11030f3-f41f-482c-916c-98476feaf25f", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "size": 1022903713792, [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "btime": "2021-10-25T12:41:11.938439+0200", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "description": "main", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "bfm_blocks": "249732352", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "bfm_blocks_per_key": "128", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "bfm_bytes_per_block": "4096", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "bfm_size": "1022903713792", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "bluefs": "1", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "ceph_fsid": "f884fe47-b307-46f9-b021-320d7c5a427b", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "kv_backend": "rocksdb", [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout "mkfs_done": "yes" [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout } [2021-10-25 13:38:38,738][ceph_volume.process][INFO ] stdout } [2021-10-25 13:38:38,738][ceph_volume.devices.raw.list][ERROR ] device /dev/nvme0n1p3 does not have all BlueStore data needed to be a valid OSD: ['{', ' "/dev/nvme0n1p3": {', ' "osd_uuid": "a11030f3-f41f-482c-916c-98476feaf25f",', ' "size": 1022903713792,', ' "btime": "2021-10-25T12:41:11.938439+0200",', ' "description": "main",', ' "bfm_blocks": "249732352",', ' "bfm_blocks_per_key": "128",', ' "bfm_bytes_per_block": "4096",', ' "bfm_size": "1022903713792",', ' "bluefs": "1",', ' "ceph_fsid": "f884fe47-b307-46f9-b021-320d7c5a427b",', ' "kv_backend": "rocksdb",', ' "mkfs_done": "yes"', ' }', '}'] 'whoami' [2021-10-25 13:38:38,738][ceph_volume.devices.raw.list][INFO ] device /dev/nvme0n1p3 does not have BlueStore information ``` Signed-off-by: Radoslaw Zarzynski (cherry picked from commit 92b2fe955f9cafdf6a9c6adb8eadc5f6a9fb9b8b) --- src/crimson/osd/osd.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index f2b4cde54bcc2..28ac67298fe0a 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -169,12 +169,14 @@ seastar::future<> OSD::mkfs(uuid_d osd_uuid, uuid_d cluster_fsid) superblock.compat_features = get_osd_initial_compat_set(); return _write_superblock(); }).then([cluster_fsid, this] { - return when_all_succeed( - store.write_meta("ceph_fsid", cluster_fsid.to_string()), - store.write_meta("whoami", std::to_string(whoami)), - _write_key_meta(), - store.write_meta("ready", "ready")); - }).then_unpack([cluster_fsid, this] { + return store.write_meta("ceph_fsid", cluster_fsid.to_string()); + }).then([this] { + return store.write_meta("whoami", std::to_string(whoami)); + }).then([this] { + return _write_key_meta(); + }).then([this] { + return store.write_meta("ready", "ready"); + }).then([cluster_fsid, this] { fmt::print("created object store {} for osd.{} fsid {}\n", local_conf().get_val("osd_data"), whoami, cluster_fsid); -- 2.39.5