]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: use new and improved calc_pg_role()
authorSage Weil <sage@redhat.com>
Mon, 9 Dec 2019 15:44:11 +0000 (09:44 -0600)
committerSage Weil <sage@redhat.com>
Mon, 9 Dec 2019 15:51:12 +0000 (09:51 -0600)
An acting set might be [0,1,2,1], and we want pgs 1.0s2 and 1.0s4 on
osd.1 to get roles 1 and 3, respectively (instead of 1 and -1).  This
allows the second EC PG on the OSD to have role >= 0.

This probably was unnoticed before, but now it leads to hangs in the
rados/thrash-erasure-code collection because proc_lease() bails out
when is_nonprimary() fails (due to role < 0).

Fixes: https://tracker.ceph.com/issues/43189
Signed-off-by: Sage Weil <sage@redhat.com>
src/crimson/osd/osd.cc
src/crimson/osd/pg.cc
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PeeringState.cc

index f173ac87ea634604d5f54e6a80d7e30e000091b7..1d7abd6eb4dc3ef67d9a1926343cfbd8ee564db1 100644 (file)
@@ -713,11 +713,8 @@ seastar::future<Ref<PG>> OSD::handle_pg_create_info(
          startmap->pg_to_up_acting_osds(
            info->pgid.pgid, &up, &up_primary, &acting, &acting_primary);
 
-         int role = startmap->calc_pg_role(whoami, acting, acting.size());
-         if (!pp->is_replicated() && role != info->pgid.shard) {
-           role = -1;
-         }
-
+         int role = startmap->calc_pg_role(pg_shard_t(whoami, info->pgid.shard),
+                                           acting);
 
          create_pg_collection(
            rctx.transaction,
index 01cad4a2d2e9a97e432dd91e00bfce6cfb02343f..f66b7cacdb472da02f78803b8907f375e87cff03 100644 (file)
@@ -316,11 +316,8 @@ seastar::future<> PG::read_state(crimson::os::FuturizedStore* store)
        acting,
        up_primary,
        primary);
-    int rr = OSDMap::calc_pg_role(pg_whoami.osd, acting);
-    if (peering_state.get_pool().info.is_replicated() || rr == pg_whoami.shard)
-       peering_state.set_role(rr);
-    else
-       peering_state.set_role(-1);
+    int rr = OSDMap::calc_pg_role(pg_whoami, acting);
+    peering_state.set_role(rr);
 
     epoch_t epoch = get_osdmap_epoch();
     shard_services.start_operation<LocalPeeringEvent>(
index 5b4c8b3c26b6dc5898d5ccf4b12ec3e90eb017f3..9f332d3b609a89628e9beb722ebb4588799e6ffa 100644 (file)
@@ -4809,10 +4809,7 @@ PGRef OSD::handle_pg_create_info(const OSDMapRef& osdmap,
     rctx.transaction, pgid, pgid.get_split_bits(pp->get_pg_num()));
   init_pg_ondisk(rctx.transaction, pgid, pp);
 
-  int role = startmap->calc_pg_role(whoami, acting, acting.size());
-  if (!pp->is_replicated() && role != pgid.shard) {
-    role = -1;
-  }
+  int role = startmap->calc_pg_role(pg_shard_t(whoami, pgid.shard), acting);
 
   PGRef pg = _make_pg(startmap, pgid);
   pg->ch = store->create_new_collection(pg->coll);
@@ -9001,12 +8998,16 @@ void OSD::handle_pg_create(OpRequestRef op)
 
     dout(20) << "mkpg " << on << " e" << created << "@" << ci->second << dendl;
 
+    spg_t pgid;
+    bool mapped = osdmap->get_primary_shard(on, &pgid);
+    ceph_assert(mapped);
+
     // is it still ours?
     vector<int> up, acting;
     int up_primary = -1;
     int acting_primary = -1;
     osdmap->pg_to_up_acting_osds(on, &up, &up_primary, &acting, &acting_primary);
-    int role = osdmap->calc_pg_role(whoami, acting, acting.size());
+    int role = osdmap->calc_pg_role(pg_shard_t(whoami, pgid.shard), acting);
 
     if (acting_primary != whoami) {
       dout(10) << "mkpg " << on << "  not acting_primary (" << acting_primary
@@ -9014,9 +9015,6 @@ void OSD::handle_pg_create(OpRequestRef op)
       continue;
     }
 
-    spg_t pgid;
-    bool mapped = osdmap->get_primary_shard(on, &pgid);
-    ceph_assert(mapped);
 
     PastIntervals pi;
     pg_history_t history;
index a82d9e24cd2eb5f46a877850f3829fa0768c1c8e..c9a7a1bda01b72ea39da4ac68c5fe279b71b8c7e 100644 (file)
@@ -1153,11 +1153,7 @@ void PG::read_state(ObjectStore *store)
       acting,
       up_primary,
       primary);
-    int rr = OSDMap::calc_pg_role(osd->whoami, acting);
-    if (pool.info.is_replicated() || rr == pg_whoami.shard)
-      recovery_state.set_role(rr);
-    else
-      recovery_state.set_role(-1);
+    recovery_state.set_role(OSDMap::calc_pg_role(pg_whoami, acting));
   }
 
   // init pool options
index 17360b6f868c6189c5e004f19b8870436efff863..99534bcc62018a42d468e4c5106b6f88b60fcd4a 100644 (file)
@@ -580,11 +580,8 @@ void PeeringState::start_peering_interval(
   else
     state_clear(PG_STATE_REMAPPED);
 
-  int role = osdmap->calc_pg_role(pg_whoami.osd, acting, acting.size());
-  if (pool.info.is_replicated() || role == pg_whoami.shard)
-    set_role(role);
-  else
-    set_role(-1);
+  int role = osdmap->calc_pg_role(pg_whoami, acting);
+  set_role(role);
 
   // did acting, up, primary|acker change?
   if (!lastmap) {
@@ -2931,7 +2928,7 @@ void PeeringState::split_into(
     newacting,
     up_primary,
     primary);
-  child->role = OSDMap::calc_pg_role(pg_whoami.osd, child->acting);
+  child->role = OSDMap::calc_pg_role(pg_whoami, child->acting);
 
   // this comparison includes primary rank via pg_shard_t
   if (get_primary() != child->get_primary())