From e43546dee9246773ffd6877b4f9495f1ec61cd55 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 9 Mar 2012 13:34:55 -0800 Subject: [PATCH] osd: fix watch_lock vs map_lock ordering watch_lock is inside map_lock (and pg->lock), which means we need to drop it to take pg->lock here. That means verifying in handle_watch_timeout that we haven't raced with another thread canceling the timeout event, which would be indicated by - the entity not appearing in unconnected_watchers - the entity having a different (presumably newer) expire time Fixes: #2103 Signed-off-by: Sage Weil Reviewed-by: Samuel Just --- src/osd/OSD.cc | 4 ++++ src/osd/ReplicatedPG.cc | 22 +++++++++++++++------- src/osd/ReplicatedPG.h | 2 +- src/osd/Watch.h | 2 +- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index b64b25dc0fc8..2126010f03c3 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2023,7 +2023,11 @@ void OSD::handle_watch_timeout(void *obc, entity_name_t entity, utime_t expire) { + // watch_lock is inside pg->lock; handle_watch_timeout checks for the race. + watch_lock.Unlock(); pg->lock(); + watch_lock.Lock(); + pg->handle_watch_timeout(obc, entity, expire); pg->unlock(); pg->put(); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index f39a708b6113..bee16773911a 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1535,10 +1535,10 @@ void ReplicatedPG::remove_watchers_and_notifies() for (map::iterator witer = obc->watchers.begin(); witer != obc->watchers.end(); remove_watcher(obc, (witer++)->first)) ; - for (map::iterator iter = obc->unconnected_watchers.begin(); + for (map::iterator iter = obc->unconnected_watchers.begin(); iter != obc->unconnected_watchers.end(); ) { - map::iterator i = iter++; + map::iterator i = iter++; unregister_unconnected_watcher(obc, i->first); } for (map::iterator niter = obc->notifs.begin(); @@ -2980,7 +2980,7 @@ void ReplicatedPG::do_osd_op_effects(OpContext *ctx) session->get(); session->watches[obc] = get_osdmap()->object_locator_to_pg(soid.oid, obc->obs.oi.oloc); } - map::iterator un_iter = + map::iterator un_iter = obc->unconnected_watchers.find(entity); if (un_iter != obc->unconnected_watchers.end()) { unregister_unconnected_watcher(obc, un_iter->first); @@ -3659,10 +3659,10 @@ void ReplicatedPG::register_unconnected_watcher(void *_obc, pgid.set_ps(obc->obs.oi.soid.hash); get(); obc->ref++; - Context *cb = new Watch::C_WatchTimeout(osd, - static_cast(obc), - this, - entity, expire); + Watch::C_WatchTimeout *cb = new Watch::C_WatchTimeout(osd, + static_cast(obc), + this, + entity, expire); osd->watch_timer.add_event_at(expire, cb); obc->unconnected_watchers[entity] = cb; } @@ -3672,6 +3672,14 @@ void ReplicatedPG::handle_watch_timeout(void *_obc, utime_t expire) { ObjectContext *obc = static_cast(_obc); + + if (obc->unconnected_watchers.count(entity) == 0 || + obc->unconnected_watchers[entity]->expire != expire) { + dout(10) << "handle_watch_timeout must have raced, no/wrong unconnected_watcher " << entity << dendl; + put_object_context(obc); + return; + } + obc->unconnected_watchers.erase(entity); obc->obs.oi.watchers.erase(entity); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index c1a376d4f4b5..4623aa805132 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -278,7 +278,7 @@ public: // any entity in obs.oi.watchers MUST be in either watchers or unconnected_watchers. map watchers; - map unconnected_watchers; + map unconnected_watchers; map notifs; ObjectContext(const object_info_t &oi_, bool exists_, SnapSetContext *ssc_) diff --git a/src/osd/Watch.h b/src/osd/Watch.h index fa1bb99e6bff..8fdfbd710a8e 100644 --- a/src/osd/Watch.h +++ b/src/osd/Watch.h @@ -65,8 +65,8 @@ public: void *obc; void *pg; entity_name_t entity; - utime_t expire; public: + utime_t expire; C_WatchTimeout(OSD *_osd, void *_obc, void *_pg, entity_name_t _entity, utime_t _expire) : osd(_osd), obc(_obc), pg(_pg), entity(_entity), expire(_expire) {} -- 2.47.3