From: Zengran Zhang Date: Sat, 9 Mar 2019 06:48:28 +0000 (+0800) Subject: OSD: OSDMapRef access by multiple threads is unsafe X-Git-Tag: v15.1.0~3019^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F26874%2Fhead;p=ceph.git OSD: OSDMapRef access by multiple threads is unsafe we update OSD::osdmap in OSD::_committed_osd_maps() which is executed by objectstore's finisher thread. while PG::sched_scrub() is called by OSD's sharded work queue's worker thread.and we push the osdmap updates down to PGs OSD::consume_map() which is in turn called by OSD::_committed_osd_maps() where osdmap is updated. so it does not big deal if we are checking a stale CEPH_OSDMAP_NODEEP_SCRUB flag. also this flag will be updated with the latest osdmap very soon. Signed-off-by: Kefu Chai Signed-off-by: Zengran Zhang --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 0e4e92dd65fb..16f85379e37d 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -469,7 +469,7 @@ void OSDService::shutdown() f->stop(); } - osdmap = OSDMapRef(); + publish_map(OSDMapRef()); next_osdmap = OSDMapRef(); } @@ -3954,7 +3954,10 @@ int OSD::shutdown() monc->shutdown(); osd_lock.Unlock(); + map_lock.get_write(); osdmap = OSDMapRef(); + map_lock.put_write(); + for (auto s : shards) { std::lock_guard l(s->osdmap_lock); s->shard_osdmap = OSDMapRef(); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 20f48ecee8ff..ffdce25ab225 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -4298,7 +4298,7 @@ bool PG::sched_scrub() time_for_deep = (time_for_deep || deep_coin_flip); //NODEEP_SCRUB so ignore time initiated deep-scrub - if (osd->osd->get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) || + if (get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) || pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)) { time_for_deep = false; nodeep_scrub = true; @@ -4308,7 +4308,7 @@ bool PG::sched_scrub() ceph_assert(!scrubber.must_deep_scrub); //NOSCRUB so skip regular scrubs - if ((osd->osd->get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || + if ((get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) && !time_for_deep) { if (scrubber.reserved) { // cancel scrub if it is still in scheduling,