} // namespace ceph::osd_cmds
+void OSD::write_superblock(CephContext* cct, OSDSuperblock& sb, ObjectStore::Transaction& t)
+{
+ dout(10) << "write_superblock " << sb << dendl;
+
+ //hack: at minimum it's using the baseline feature set
+ if (!sb.compat_features.incompat.contains(CEPH_OSD_FEATURE_INCOMPAT_BASE))
+ sb.compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
+
+ bufferlist bl;
+ encode(sb, bl);
+ t.write(coll_t::meta(), OSD_SUPERBLOCK_GOBJECT, 0, bl.length(), bl);
+ std::map<std::string, ceph::buffer::list> attrs;
+ attrs.emplace(OSD_SUPERBLOCK_OMAP_KEY, bl);
+ t.omap_setkeys(coll_t::meta(), OSD_SUPERBLOCK_GOBJECT, attrs);
+}
+
int OSD::mkfs(CephContext *cct,
std::unique_ptr<ObjectStore> store,
uuid_d fsid,
sb.osd_fsid = store->get_fsid();
sb.whoami = whoami;
sb.compat_features = get_osd_initial_compat_set();
-
- bufferlist bl;
- encode(sb, bl);
-
ObjectStore::CollectionHandle ch = store->create_new_collection(
coll_t::meta());
ObjectStore::Transaction t;
t.create_collection(coll_t::meta(), 0);
- t.write(coll_t::meta(), OSD_SUPERBLOCK_GOBJECT, 0, bl.length(), bl);
+ write_superblock(cct, sb, t);
ret = store->queue_transaction(ch, std::move(t));
if (ret) {
derr << "OSD::mkfs: error while writing OSD_SUPERBLOCK_GOBJECT: "
superblock.purged_snaps_last = 0;
ObjectStore::Transaction t;
dout(10) << __func__ << " updating superblock" << dendl;
- write_superblock(t);
+ write_superblock(cct, superblock, t);
ret = store->queue_transaction(service.meta_ch, std::move(t), nullptr);
if (ret < 0) {
ss << "Error writing superblock: " << cpp_strerror(ret);
}
ObjectStore::Transaction t;
- write_superblock(t);
+ write_superblock(cct, superblock, t);
r = store->queue_transaction(service.meta_ch, std::move(t));
if (r < 0)
goto out;
superblock.mounted = service.get_boot_epoch();
superblock.clean_thru = get_osdmap_epoch();
ObjectStore::Transaction t;
- write_superblock(t);
+ write_superblock(cct, superblock, t);
int r = store->queue_transaction(service.meta_ch, std::move(t));
if (r) {
derr << "OSD::shutdown: error writing superblock: "
}
}
-void OSD::write_superblock(ObjectStore::Transaction& t)
-{
- dout(10) << "write_superblock " << superblock << dendl;
-
- //hack: at minimum it's using the baseline feature set
- if (!superblock.compat_features.incompat.contains(CEPH_OSD_FEATURE_INCOMPAT_BASE))
- superblock.compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
-
- bufferlist bl;
- encode(superblock, bl);
- t.write(coll_t::meta(), OSD_SUPERBLOCK_GOBJECT, 0, bl.length(), bl);
-}
int OSD::read_superblock()
{
bufferlist bl;
- int r = store->read(service.meta_ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
- if (r < 0)
- return r;
+
+ set<string> keys;
+ keys.insert(OSD_SUPERBLOCK_OMAP_KEY);
+ map<string, bufferlist> vals;
+ // Let's read from OMAP first to be able to better handle
+ // "recover-after-an-error' case when main OSD volume data
+ // is partially corrupted (csums don't match for a bunch of onodes).
+ // As a result we might want to set bluestore_ignore_csum_error option which
+ // will silent disk read errors.
+ // Clearly such a reading from corrupted superblock will miss an error as well
+ // and it wouldn't attempt to use still valid OMAP's replica.
+ // Hence preferring omap reading over disk one.
+ int r = store->omap_get_values(
+ service.meta_ch, OSD_SUPERBLOCK_GOBJECT, keys, &vals);
+ if (r < 0 || vals.size() == 0) {
+ dout(10) << __func__ << " attempt reading from disk replica" << dendl;
+
+ r = store->read(service.meta_ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
+ if (r < 0) {
+ return -ENOENT;
+ }
+ dout(10) << __func__ << " got disk replica" << dendl;
+ } else {
+ std::swap(bl, vals.begin()->second);
+ }
auto p = bl.cbegin();
decode(superblock, p);
make_purged_snaps_oid(), &t,
m->purged_snaps);
superblock.purged_snaps_last = m->last;
- write_superblock(t);
+ write_superblock(cct, superblock, t);
store->queue_transaction(
service.meta_ch,
std::move(t));
dout(10) << __func__ << " done queueing pgs, updating superblock" << dendl;
ObjectStore::Transaction t;
superblock.last_purged_snaps_scrub = ceph_clock_now();
- write_superblock(t);
+ write_superblock(cct, superblock, t);
int tr = store->queue_transaction(service.meta_ch, std::move(t), nullptr);
ceph_assert(tr == 0);
if (is_active()) {
num++;
if (num >= cct->_conf->osd_target_transaction_size && num >= nreceived) {
service.publish_superblock(superblock);
- write_superblock(t);
+ write_superblock(cct, superblock, t);
int tr = store->queue_transaction(service.meta_ch, std::move(t), nullptr);
ceph_assert(tr == 0);
num = 0;
}
if (num > 0) {
service.publish_superblock(superblock);
- write_superblock(t);
+ write_superblock(cct, superblock, t);
int tr = store->queue_transaction(service.meta_ch, std::move(t), nullptr);
ceph_assert(tr == 0);
}
{
bufferlist bl;
::encode(pg_num_history, bl);
- t.write(coll_t::meta(), make_pg_num_history_oid(), 0, bl.length(), bl);
+ auto oid = make_pg_num_history_oid();
+ t.truncate(coll_t::meta(), oid, 0); // we don't need bytes left if new data
+ // block is shorter than the previous
+ // one. And better to trim them, e.g.
+ // this allows to avoid csum eroors
+ // when issuing overwrite
+ // (which happens to be partial)
+ // and original data is corrupted.
+ // Another side effect is that the
+ // superblock is not permanently
+ // anchored to a fixed disk location
+ // any more.
+ t.write(coll_t::meta(), oid, 0, bl.length(), bl);
dout(20) << __func__ << " pg_num_history " << pg_num_history << dendl;
}
}
// superblock and commit
- write_superblock(t);
+ write_superblock(cct, superblock, t);
t.register_on_commit(new C_OnMapCommit(this, start, last, m));
store->queue_transaction(
service.meta_ch,
dout(0) << __func__ << " enabling on-disk ERASURE CODES compat feature" << dendl;
superblock.compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
ObjectStore::Transaction t;
- write_superblock(t);
+ write_superblock(cct, superblock, t);
int err = store->queue_transaction(service.meta_ch, std::move(t), NULL);
ceph_assert(err == 0);
}