From 1cf2b906400dc4a821a5ca50ee8db317b2a82577 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 9 Dec 2019 09:44:11 -0600 Subject: [PATCH] osd: use new and improved calc_pg_role() An acting set might be [0,1,2,1], and we want pgs 1.0s2 and 1.0s4 on osd.1 to get roles 1 and 3, respectively (instead of 1 and -1). This allows the second EC PG on the OSD to have role >= 0. This probably was unnoticed before, but now it leads to hangs in the rados/thrash-erasure-code collection because proc_lease() bails out when is_nonprimary() fails (due to role < 0). Fixes: https://tracker.ceph.com/issues/43189 Signed-off-by: Sage Weil --- src/crimson/osd/osd.cc | 7 ++----- src/crimson/osd/pg.cc | 7 ++----- src/osd/OSD.cc | 14 ++++++-------- src/osd/PG.cc | 6 +----- src/osd/PeeringState.cc | 9 +++------ 5 files changed, 14 insertions(+), 29 deletions(-) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index f173ac87ea6..1d7abd6eb4d 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -713,11 +713,8 @@ seastar::future> OSD::handle_pg_create_info( startmap->pg_to_up_acting_osds( info->pgid.pgid, &up, &up_primary, &acting, &acting_primary); - int role = startmap->calc_pg_role(whoami, acting, acting.size()); - if (!pp->is_replicated() && role != info->pgid.shard) { - role = -1; - } - + int role = startmap->calc_pg_role(pg_shard_t(whoami, info->pgid.shard), + acting); create_pg_collection( rctx.transaction, diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 01cad4a2d2e..f66b7cacdb4 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -316,11 +316,8 @@ seastar::future<> PG::read_state(crimson::os::FuturizedStore* store) acting, up_primary, primary); - int rr = OSDMap::calc_pg_role(pg_whoami.osd, acting); - if (peering_state.get_pool().info.is_replicated() || rr == pg_whoami.shard) - peering_state.set_role(rr); - else - peering_state.set_role(-1); + int rr = OSDMap::calc_pg_role(pg_whoami, acting); + peering_state.set_role(rr); epoch_t epoch = get_osdmap_epoch(); shard_services.start_operation( diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 5b4c8b3c26b..9f332d3b609 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4809,10 +4809,7 @@ PGRef OSD::handle_pg_create_info(const OSDMapRef& osdmap, rctx.transaction, pgid, pgid.get_split_bits(pp->get_pg_num())); init_pg_ondisk(rctx.transaction, pgid, pp); - int role = startmap->calc_pg_role(whoami, acting, acting.size()); - if (!pp->is_replicated() && role != pgid.shard) { - role = -1; - } + int role = startmap->calc_pg_role(pg_shard_t(whoami, pgid.shard), acting); PGRef pg = _make_pg(startmap, pgid); pg->ch = store->create_new_collection(pg->coll); @@ -9001,12 +8998,16 @@ void OSD::handle_pg_create(OpRequestRef op) dout(20) << "mkpg " << on << " e" << created << "@" << ci->second << dendl; + spg_t pgid; + bool mapped = osdmap->get_primary_shard(on, &pgid); + ceph_assert(mapped); + // is it still ours? vector up, acting; int up_primary = -1; int acting_primary = -1; osdmap->pg_to_up_acting_osds(on, &up, &up_primary, &acting, &acting_primary); - int role = osdmap->calc_pg_role(whoami, acting, acting.size()); + int role = osdmap->calc_pg_role(pg_shard_t(whoami, pgid.shard), acting); if (acting_primary != whoami) { dout(10) << "mkpg " << on << " not acting_primary (" << acting_primary @@ -9014,9 +9015,6 @@ void OSD::handle_pg_create(OpRequestRef op) continue; } - spg_t pgid; - bool mapped = osdmap->get_primary_shard(on, &pgid); - ceph_assert(mapped); PastIntervals pi; pg_history_t history; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index a82d9e24cd2..c9a7a1bda01 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1153,11 +1153,7 @@ void PG::read_state(ObjectStore *store) acting, up_primary, primary); - int rr = OSDMap::calc_pg_role(osd->whoami, acting); - if (pool.info.is_replicated() || rr == pg_whoami.shard) - recovery_state.set_role(rr); - else - recovery_state.set_role(-1); + recovery_state.set_role(OSDMap::calc_pg_role(pg_whoami, acting)); } // init pool options diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 17360b6f868..99534bcc620 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -580,11 +580,8 @@ void PeeringState::start_peering_interval( else state_clear(PG_STATE_REMAPPED); - int role = osdmap->calc_pg_role(pg_whoami.osd, acting, acting.size()); - if (pool.info.is_replicated() || role == pg_whoami.shard) - set_role(role); - else - set_role(-1); + int role = osdmap->calc_pg_role(pg_whoami, acting); + set_role(role); // did acting, up, primary|acker change? if (!lastmap) { @@ -2931,7 +2928,7 @@ void PeeringState::split_into( newacting, up_primary, primary); - child->role = OSDMap::calc_pg_role(pg_whoami.osd, child->acting); + child->role = OSDMap::calc_pg_role(pg_whoami, child->acting); // this comparison includes primary rank via pg_shard_t if (get_primary() != child->get_primary()) -- 2.39.5