From 89d0c357964ff28b83ac541c4d946f712a7bee47 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Jan 2018 15:05:25 -0600 Subject: [PATCH] osd: record final pg_pool_t when a pool is deleted Also, prevent OSD start if we have a PG whose pool is deleted and no stored pool info. (User should downgrade, let PG deletion complete, then upgrade.) Signed-off-by: Sage Weil --- src/osd/OSD.cc | 107 ++++++++++++++++++++++++++++++++++++------------- src/osd/OSD.h | 11 ++++- src/osd/PG.h | 16 ++++---- 3 files changed, 95 insertions(+), 39 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 064e5d98de160..13898b2b31545 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2581,6 +2581,36 @@ int OSD::init() goto out; } + // load up "current" osdmap + assert_warn(!osdmap); + if (osdmap) { + derr << "OSD::init: unable to read current osdmap" << dendl; + r = -EINVAL; + goto out; + } + osdmap = get_map(superblock.current_epoch); + + // make sure we don't have legacy pgs deleting + { + vector ls; + int r = store->list_collections(ls); + ceph_assert(r >= 0); + for (auto c : ls) { + spg_t pgid; + if (c.is_pg(&pgid) && + !osdmap->have_pg_pool(pgid.pool())) { + ghobject_t oid = make_final_pool_info_oid(pgid.pool()); + if (!store->exists(coll_t::meta(), oid)) { + derr << __func__ << " missing pg_pool_t for deleted pool " + << pgid.pool() << " for pg " << pgid + << "; please downgrade to luminous and allow " + << "pg deletion to complete before upgrading" << dendl; + ceph_abort(); + } + } + } + } + initial = get_osd_initial_compat_set(); diff = superblock.compat_features.unsupported(initial); if (superblock.compat_features.merge(initial)) { @@ -2613,14 +2643,6 @@ int OSD::init() dout(1) << "warning: got an error loading one or more classes: " << cpp_strerror(r) << dendl; } - // load up "current" osdmap - assert_warn(!osdmap); - if (osdmap) { - derr << "OSD::init: unable to read current osdmap" << dendl; - r = -EINVAL; - goto out; - } - osdmap = get_map(superblock.current_epoch); check_osdmap_features(); create_recoverystate_perf(); @@ -3751,13 +3773,6 @@ void OSD::recursive_remove_collection(CephContext* cct, // ====================================================== // PG's -PGPool OSD::_get_pool(int id, OSDMapRef createmap) -{ - PGPool p = PGPool(cct, createmap, id); - dout(10) << "_get_pool " << p.id << dendl; - return p; -} - PG *OSD::_open_lock_pg( OSDMapRef createmap, spg_t pgid, bool no_lockdep_check) @@ -3780,16 +3795,28 @@ PG* OSD::_make_pg( spg_t pgid) { dout(10) << "_open_lock_pg " << pgid << dendl; - PGPool pool = _get_pool(pgid.pool(), createmap); - - // create + pg_pool_t pi; + string name; + if (createmap->have_pg_pool(pgid.pool())) { + pi = *createmap->get_pg_pool(pgid.pool()); + name = createmap->get_pool_name(pgid.pool()); + } else { + // pool was deleted; grab final pg_pool_t off disk. + ghobject_t oid = make_final_pool_info_oid(pgid.pool()); + bufferlist bl; + int r = store->read(coll_t::meta(), oid, 0, 0, bl); + ceph_assert(r >= 0); + auto p = bl.begin(); + decode(pi, p); + decode(name, p); + } + PGPool pool(cct, createmap, pgid.pool(), pi, name); PG *pg; - if (createmap->get_pg_type(pgid.pgid) == pg_pool_t::TYPE_REPLICATED || - createmap->get_pg_type(pgid.pgid) == pg_pool_t::TYPE_ERASURE) + if (pi.type == pg_pool_t::TYPE_REPLICATED || + pi.type == pg_pool_t::TYPE_ERASURE) pg = new PrimaryLogPG(&service, createmap, pool, pgid); else ceph_abort(); - return pg; } @@ -7015,12 +7042,12 @@ struct C_OnMapCommit : public Context { struct C_OnMapApply : public Context { OSDService *service; - list pinned_maps; + map pinned_maps; epoch_t e; C_OnMapApply(OSDService *service, - const list &pinned_maps, + map &&pinned_maps, epoch_t e) - : service(service), pinned_maps(pinned_maps), e(e) {} + : service(service), pinned_maps(std::move(pinned_maps)), e(e) {} void finish(int r) override { service->clear_map_bl_cache_pins(e); } @@ -7089,7 +7116,7 @@ void OSD::handle_osd_map(MOSDMap *m) // off of disk. Otherwise these maps will probably not stay in the cache, // and reading those OSDMaps before they are actually written can result // in a crash. - list pinned_maps; + map pinned_maps; if (m->fsid != monc->get_fsid()) { dout(0) << "handle_osd_map fsid " << m->fsid << " != " << monc->get_fsid() << dendl; @@ -7211,7 +7238,7 @@ void OSD::handle_osd_map(MOSDMap *m) ghobject_t fulloid = get_osdmap_pobject_name(e); t.write(coll_t::meta(), fulloid, 0, bl.length(), bl); pin_map_bl(e, bl); - pinned_maps.push_back(add_map(o)); + pinned_maps[e] = add_map(o); got_full_map(e); continue; @@ -7269,7 +7296,7 @@ void OSD::handle_osd_map(MOSDMap *m) ghobject_t fulloid = get_osdmap_pobject_name(e); t.write(coll_t::meta(), fulloid, 0, fbl.length(), fbl); pin_map_bl(e, fbl); - pinned_maps.push_back(add_map(o)); + pinned_maps[e] = add_map(o); continue; } @@ -7302,12 +7329,36 @@ void OSD::handle_osd_map(MOSDMap *m) superblock.clean_thru = last; } + // check for deleted pools + OSDMapRef lastmap; + for (auto& i : pinned_maps) { + if (!lastmap) { + lastmap = get_map(i.first - 1); + } + assert(lastmap->get_epoch() + 1 == i.second->get_epoch()); + for (auto& j : lastmap->get_pools()) { + if (!i.second->have_pg_pool(j.first)) { + dout(10) << __func__ << " recording final pg_pool_t for pool " + << j.first << dendl; + // this information is needed by _make_pg() if have to restart before + // the pool is deleted and need to instantiate a new (zombie) PG[Pool]. + ghobject_t obj = make_final_pool_info_oid(j.first); + bufferlist bl; + encode(j.second, bl, CEPH_FEATURES_ALL); + string name = lastmap->get_pool_name(j.first); + encode(name, bl); + t.write(coll_t::meta(), obj, 0, bl.length(), bl); + } + } + lastmap = i.second; + } + // superblock and commit write_superblock(t); store->queue_transaction( service.meta_osr.get(), std::move(t), - new C_OnMapApply(&service, pinned_maps, last), + new C_OnMapApply(&service, std::move(pinned_maps), last), new C_OnMapCommit(this, start, last, m), 0); service.publish_superblock(superblock); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 9be91933884ea..1efb051b50e65 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1183,6 +1183,15 @@ public: hobject_t oid(sobject_t("infos", CEPH_NOSNAP)); return ghobject_t(oid); } + + static ghobject_t make_final_pool_info_oid(int64_t pool) { + return ghobject_t( + hobject_t( + sobject_t( + object_t(string("final_pool_") + stringify(pool)), + CEPH_NOSNAP))); + } + static void recursive_remove_collection(CephContext* cct, ObjectStore *store, spg_t pgid, @@ -1825,8 +1834,6 @@ protected: map > peering_wait_for_split; PGRecoveryStats pg_recovery_stats; - PGPool _get_pool(int id, OSDMapRef createmap); - PG *_lookup_lock_pg_with_map_lock_held(spg_t pgid); PG *_lookup_lock_pg(spg_t pgid); diff --git a/src/osd/PG.h b/src/osd/PG.h index 48096dfcc225e..14ff1d3799b01 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -225,18 +225,16 @@ struct PGPool { interval_set cached_removed_snaps; // current removed_snaps set interval_set newly_removed_snaps; // newly removed in the last epoch - PGPool(CephContext* cct, OSDMapRef map, int64_t i) + PGPool(CephContext* cct, OSDMapRef map, int64_t i, const pg_pool_t& info, + const string& name) : cct(cct), cached_epoch(map->get_epoch()), id(i), - name(map->get_pool_name(id)) { - const pg_pool_t *pi = map->get_pg_pool(id); - if (pi) { - info = *pi; - snapc = pi->get_snap_context(); - if (map->require_osd_release < CEPH_RELEASE_MIMIC) { - pi->build_removed_snaps(cached_removed_snaps); - } + name(name), + info(info) { + snapc = info.get_snap_context(); + if (map->require_osd_release < CEPH_RELEASE_MIMIC) { + info.build_removed_snaps(cached_removed_snaps); } } -- 2.39.5