From: Samuel Just Date: Tue, 12 Jun 2012 23:01:05 +0000 (-0700) Subject: osd/: Make pg osdmap be independent of osd, other pg maps X-Git-Tag: v0.50~109^2~2^2~68 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bbd8af028784c1dcdc1066b45129d5c5c00d691e;p=ceph.git osd/: Make pg osdmap be independent of osd, other pg maps This will allow handle_osd_map to not stop other work queues. Signed-off-by: Samuel Just --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 5d2cdb2f6000..12c3d94972a3 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1192,7 +1192,7 @@ PG *OSD::_open_lock_pg(pg_t pgid, bool no_lockdep_check, bool hold_map_lock) hobject_t logoid = make_pg_log_oid(pgid); hobject_t infooid = make_pg_biginfo_oid(pgid); if (osdmap->get_pg_type(pgid) == pg_pool_t::TYPE_REP) - pg = new ReplicatedPG(this, pool, pgid, logoid, infooid); + pg = new ReplicatedPG(this, osdmap, pool, pgid, logoid, infooid); else assert(0); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index f5b9329a6059..0f7b2a6fe93b 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -40,17 +40,9 @@ static ostream& _prefix(std::ostream *_dout, const PG *pg) { return *_dout << pg->gen_prefix(); } -/* - * take osd->map_lock to get a valid osdmap reference - */ void PG::lock(bool no_lockdep) { - osd->map_lock.get_read(); - OSDMapRef map = osd->osdmap; - osd->map_lock.put_read(); _lock.Lock(no_lockdep); - osdmap_ref.swap(map); - // if we have unrecorded dirty state with the lock dropped, there is a bug assert(!dirty_info); assert(!dirty_log); @@ -58,15 +50,9 @@ void PG::lock(bool no_lockdep) dout(30) << "lock" << dendl; } -/* - * caller holds osd->map_lock, no need to take it to get a valid - * osdmap reference. - */ void PG::lock_with_map_lock_held(bool no_lockdep) { _lock.Lock(no_lockdep); - osdmap_ref = osd->osdmap; - // if we have unrecorded dirty state with the lock dropped, there is a bug assert(!dirty_info); assert(!dirty_log); @@ -82,15 +68,6 @@ void PG::reassert_lock_with_map_lock_held() dout(30) << "reassert_lock_with_map_lock_held" << dendl; } -void PG::unlock() -{ - dout(30) << "unlock" << dendl; - assert(!dirty_info); - assert(!dirty_log); - osdmap_ref.reset(); - _lock.Unlock(); -} - std::string PG::gen_prefix() const { stringstream out; @@ -4023,6 +4000,10 @@ void PG::take_waiters() void PG::handle_peering_event(CephPeeringEvtRef evt, RecoveryCtx *rctx) { + if (!require_same_or_newer_map(evt->get_epoch_sent())) { + peering_waiters.push_back(evt); + return; + } if (old_peering_evt(evt)) return; assert(!deleting); @@ -4083,6 +4064,7 @@ void PG::handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap, RecoveryCtx *rctx) { dout(10) << "handle_advance_map " << newup << "/" << newacting << dendl; + osdmap_ref = osdmap; AdvMap evt(osdmap, lastmap, newup, newacting); recovery_state.handle_event(evt, rctx); } diff --git a/src/osd/PG.h b/src/osd/PG.h index 58ecd5aff399..9ccbe7a21c9c 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -337,9 +337,9 @@ public: /*** PG ****/ protected: OSD *osd; + OSDMapRef osdmap_ref; PGPool *pool; - OSDMapRef osdmap_ref; OSDMapRef get_osdmap() const { assert(is_locked()); assert(osdmap_ref); @@ -361,7 +361,10 @@ public: bool deleting; // true while RemoveWQ should be chewing on us void lock(bool no_lockdep = false); - void unlock(); + void unlock() { + //generic_dout(0) << this << " " << info.pgid << " unlock" << dendl; + _lock.Unlock(); + } /* During handle_osd_map, the osd holds a write lock to the osdmap. * *_with_map_lock_held assume that the map_lock is already held */ @@ -397,6 +400,7 @@ public: } + list op_waiters; list op_queue; // op queue bool dirty_info, dirty_log; @@ -799,6 +803,7 @@ public: }; typedef std::tr1::shared_ptr CephPeeringEvtRef; list peering_queue; // op queue + list peering_waiters; struct QueryState : boost::statechart::event< QueryState > { Formatter *f; @@ -1243,8 +1248,9 @@ public: public: - PG(OSD *o, PGPool *_pool, pg_t p, const hobject_t& loid, const hobject_t& ioid) : - osd(o), pool(_pool), + PG(OSD *o, OSDMapRef curmap, + PGPool *_pool, pg_t p, const hobject_t& loid, const hobject_t& ioid) : + osd(o), osdmap_ref(curmap), pool(_pool), _lock("PG::_lock"), ref(0), deleting(false), dirty_info(false), dirty_log(false), info(p), coll(p), log_oid(loid), biginfo_oid(ioid), @@ -1378,6 +1384,9 @@ public: bool old_peering_evt(CephPeeringEvtRef evt) { return old_peering_msg(evt->get_epoch_sent(), evt->get_epoch_requested()); } + bool require_same_or_newer_map(epoch_t e) { + return e <= get_osdmap()->get_epoch(); + } // recovery bits void take_waiters(); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index ffff3a4d31e6..9c8217833be5 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -582,8 +582,10 @@ void ReplicatedPG::calc_trim_to() } } -ReplicatedPG::ReplicatedPG(OSD *o, PGPool *_pool, pg_t p, const hobject_t& oid, const hobject_t& ioid) : - PG(o, _pool, p, oid, ioid), temp_created(false), +ReplicatedPG::ReplicatedPG(OSD *o, OSDMapRef curmap, + PGPool *_pool, pg_t p, const hobject_t& oid, + const hobject_t& ioid) : + PG(o, curmap, _pool, p, oid, ioid), temp_created(false), temp_coll(coll_t::make_temp_coll(p)), snap_trimmer_machine(this) { snap_trimmer_machine.initiate(); @@ -1053,6 +1055,10 @@ void ReplicatedPG::log_subop_stats(OpRequestRef op, int tag_inb, int tag_lat) void ReplicatedPG::do_sub_op(OpRequestRef op) { MOSDSubOp *m = (MOSDSubOp*)op->request; + if (!require_same_or_newer_map(m->map_epoch)) { + op_waiters.push_back(op); + return; + } assert(m->get_header().type == MSG_OSD_SUBOP); dout(15) << "do_sub_op " << *op->request << dendl; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index a715e9e3c852..2192d0c187c6 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -788,7 +788,9 @@ protected: int get_pgls_filter(bufferlist::iterator& iter, PGLSFilter **pfilter); public: - ReplicatedPG(OSD *o, PGPool *_pool, pg_t p, const hobject_t& oid, const hobject_t& ioid); + ReplicatedPG(OSD *o, OSDMapRef curmap, + PGPool *_pool, pg_t p, const hobject_t& oid, + const hobject_t& ioid); ~ReplicatedPG() {} int do_command(vector& cmd, ostream& ss, bufferlist& idata, bufferlist& odata);