]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: write object store's meta sequentially. 43652/head
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Mon, 25 Oct 2021 12:30:39 +0000 (12:30 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Mon, 25 Oct 2021 13:18:15 +0000 (13:18 +0000)
We're violating the contract on `ObjectStore::write_meta()' that
requires the method must be called in one-by-one manner.
For instance, the implementation in `BlueStore` does read-modify-
write without any locking. Having multiple in-flight requests
the same time (can happen b/c of the thread pool in `AlienStore`)
may lead to a corruption like the following one:

```
[2021-10-25 13:38:38,725][ceph_volume.process][INFO  ] Running command: /home/rzarz/dev/ceph1/build/bin/ceph-bluestore-tool show-label --dev /dev/nvme0n1p3
[2021-10-25 13:38:38,737][ceph_volume.process][INFO  ] stdout {
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "/dev/nvme0n1p3": {
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "osd_uuid": "a11030f3-f41f-482c-916c-98476feaf25f",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "size": 1022903713792,
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "btime": "2021-10-25T12:41:11.938439+0200",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "description": "main",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "bfm_blocks": "249732352",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "bfm_blocks_per_key": "128",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "bfm_bytes_per_block": "4096",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "bfm_size": "1022903713792",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "bluefs": "1",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "ceph_fsid": "f884fe47-b307-46f9-b021-320d7c5a427b",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "kv_backend": "rocksdb",
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout "mkfs_done": "yes"
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout }
[2021-10-25 13:38:38,738][ceph_volume.process][INFO  ] stdout }
[2021-10-25 13:38:38,738][ceph_volume.devices.raw.list][ERROR ] device /dev/nvme0n1p3 does not have all BlueStore data needed to be a valid OSD: ['{', '    "/dev/nvme0n1p3": {', '        "osd_uuid": "a11030f3-f41f-482c-916c-98476feaf25f",', '        "size": 1022903713792,', '        "btime": "2021-10-25T12:41:11.938439+0200",', '        "description": "main",', '        "bfm_blocks": "249732352",', '        "bfm_blocks_per_key": "128",', '        "bfm_bytes_per_block": "4096",', '        "bfm_size": "1022903713792",', '        "bluefs": "1",', '        "ceph_fsid": "f884fe47-b307-46f9-b021-320d7c5a427b",', '        "kv_backend": "rocksdb",', '        "mkfs_done": "yes"', '    }', '}']
'whoami'
[2021-10-25 13:38:38,738][ceph_volume.devices.raw.list][INFO  ] device /dev/nvme0n1p3 does not have BlueStore information
```

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
(cherry picked from commit 92b2fe955f9cafdf6a9c6adb8eadc5f6a9fb9b8b)

src/crimson/osd/osd.cc

index f2b4cde54bcc291daf3d8473813bdfa3121f6396..28ac67298fe0a7e0dbec4b73b04267c02dc69257 100644 (file)
@@ -169,12 +169,14 @@ seastar::future<> OSD::mkfs(uuid_d osd_uuid, uuid_d cluster_fsid)
     superblock.compat_features = get_osd_initial_compat_set();
     return _write_superblock();
   }).then([cluster_fsid, this] {
-    return when_all_succeed(
-      store.write_meta("ceph_fsid", cluster_fsid.to_string()),
-      store.write_meta("whoami", std::to_string(whoami)),
-      _write_key_meta(),
-      store.write_meta("ready", "ready"));
-  }).then_unpack([cluster_fsid, this] {
+    return store.write_meta("ceph_fsid", cluster_fsid.to_string());
+  }).then([this] {
+    return store.write_meta("whoami", std::to_string(whoami));
+  }).then([this] {
+    return _write_key_meta();
+  }).then([this] {
+    return store.write_meta("ready", "ready");
+  }).then([cluster_fsid, this] {
     fmt::print("created object store {} for osd.{} fsid {}\n",
                local_conf().get_val<std::string>("osd_data"),
                whoami, cluster_fsid);