From: Sage Weil Date: Sun, 20 Jul 2014 21:10:41 +0000 (-0700) Subject: mds: use lock-safe OSDMap accessors; adjust Objecter wait_for_map call X-Git-Tag: v0.86~213^2~69 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fad36411c7fa49b2319c227993ff37feee579d42;p=ceph.git mds: use lock-safe OSDMap accessors; adjust Objecter wait_for_map call We need to handle a race between when we inspect the OSDMap and when we try to wait for the next map. Signed-off-by: Sage Weil --- diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index 85b0a27739d..d7b07304db7 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -885,10 +885,6 @@ void MDS::handle_mds_beacon(MMDSBeacon *m) m->put(); } -void MDS::request_osdmap(Context *c) { - objecter->wait_for_new_map(c, osdmap->get_epoch()); -} - /* This function DOES put the passed message before returning*/ void MDS::handle_command(MMonCommand *m) { @@ -1506,18 +1502,21 @@ void MDS::replay_start() calc_recovery_set(); + const OSDMap *osdmap = objecter->get_osdmap_read(); + epoch_t e = osdmap->get_epoch(); + objecter->put_osdmap_read(); + dout(1) << " need osdmap epoch " << mdsmap->get_last_failure_osd_epoch() - <<", have " << osdmap->get_epoch() - << dendl; + << ", have " << e << dendl; // start? - if (osdmap->get_epoch() >= mdsmap->get_last_failure_osd_epoch()) { + if (e >= mdsmap->get_last_failure_osd_epoch()) { boot_start(); } else { dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch() << " (which blacklists prior instance)" << dendl; - objecter->wait_for_new_map(new C_MDS_BootStart(this, MDS_BOOT_INITIAL), - mdsmap->get_last_failure_osd_epoch()); + objecter->wait_for_map(mdsmap->get_last_failure_osd_epoch(), + new C_MDS_BootStart(this, MDS_BOOT_INITIAL)); } } @@ -1549,11 +1548,15 @@ inline void MDS::standby_replay_restart() { dout(1) << "standby_replay_restart" << (standby_replaying ? " (as standby)":" (final takeover pass)") << dendl; - if (!standby_replaying && osdmap->get_epoch() < mdsmap->get_last_failure_osd_epoch()) { + const OSDMap *osdmap = objecter->get_osdmap_read(); + epoch_t e = osdmap->get_epoch(); + objecter->put_osdmap_read(); + + if (!standby_replaying && e < mdsmap->get_last_failure_osd_epoch()) { dout(1) << " waiting for osdmap " << mdsmap->get_last_failure_osd_epoch() << " (which blacklists prior instance)" << dendl; - objecter->wait_for_new_map(new C_MDS_BootStart(this, MDS_BOOT_PREPARE_LOG), - mdsmap->get_last_failure_osd_epoch()); + objecter->wait_for_map(mdsmap->get_last_failure_osd_epoch(), + new C_MDS_BootStart(this, MDS_BOOT_PREPARE_LOG)); } else { mdlog->get_journaler()->reread_head_and_probe( new C_MDS_StandbyReplayRestartFinish(this, mdlog->get_journaler()->get_read_pos())); diff --git a/src/mds/MDS.h b/src/mds/MDS.h index fe3eab12738..a5a3aaf797b 100644 --- a/src/mds/MDS.h +++ b/src/mds/MDS.h @@ -434,8 +434,6 @@ class MDS : public Dispatcher, public md_config_obs_t { void beacon_send(); void handle_mds_beacon(MMDSBeacon *m); - void request_osdmap(Context *c); - void inc_dispatch_depth() { ++dispatch_depth; } void dec_dispatch_depth() { --dispatch_depth; } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index ec77f81abaa..48d06919007 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -32,6 +32,8 @@ #include "msg/Messenger.h" +#include "osdc/Objecter.h" + #include "messages/MClientSession.h" #include "messages/MClientRequest.h" #include "messages/MClientReply.h" @@ -3582,7 +3584,8 @@ struct keys_and_values qi::rule key, value; }; -int Server::parse_layout_vxattr(string name, string value, ceph_file_layout *layout) +int Server::parse_layout_vxattr(string name, string value, const OSDMap *osdmap, + ceph_file_layout *layout) { dout(20) << "parse_layout_vxattr name " << name << " value '" << value << "'" << dendl; try { @@ -3599,7 +3602,7 @@ int Server::parse_layout_vxattr(string name, string value, ceph_file_layout *lay if (begin != end) return -EINVAL; for (map::iterator q = m.begin(); q != m.end(); ++q) { - int r = parse_layout_vxattr(string("layout.") + q->first, q->second, layout); + int r = parse_layout_vxattr(string("layout.") + q->first, q->second, osdmap, layout); if (r < 0) return r; } @@ -3613,7 +3616,7 @@ int Server::parse_layout_vxattr(string name, string value, ceph_file_layout *lay try { layout->fl_pg_pool = boost::lexical_cast(value); } catch (boost::bad_lexical_cast const&) { - int64_t pool = mds->osdmap->lookup_pg_pool_name(value); + int64_t pool = osdmap->lookup_pg_pool_name(value); if (pool < 0) { dout(10) << " unknown pool " << value << dendl; return -ENOENT; @@ -3673,15 +3676,18 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, layout = mds->mdcache->default_file_layout; rest = name.substr(name.find("layout")); - int r = parse_layout_vxattr(rest, value, &layout); + const OSDMap *osdmap = mds->objecter->get_osdmap_read(); + int r = parse_layout_vxattr(rest, value, osdmap, &layout); + mds->objecter->put_osdmap_read(); if (r < 0) { if (r == -ENOENT) { if (!mdr->waited_for_osdmap) { - // send request to get latest map, but don't wait if - // we don't get anything newer than what we have + // make sure we have the latest map. + // FIXME: we should get the client's osdmap epoch and just + // make sure we have *that*. mdr->waited_for_osdmap = true; - mds->request_osdmap( - new C_MDS_RetryRequest(mdcache, mdr)); + mds->objecter->wait_for_latest_osdmap( + new C_MDS_RetryRequest(mdcache, mdr)); return; } r = -EINVAL; @@ -3703,15 +3709,18 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, } ceph_file_layout layout = cur->get_projected_inode()->layout; rest = name.substr(name.find("layout")); - int r = parse_layout_vxattr(rest, value, &layout); + const OSDMap *osdmap = mds->objecter->get_osdmap_read(); + int r = parse_layout_vxattr(rest, value, osdmap, &layout); + mds->objecter->put_osdmap_read(); if (r < 0) { if (r == -ENOENT) { if (!mdr->waited_for_osdmap) { - // send request to get latest map, but don't wait if - // we don't get anything newer than what we have + // make sure we have the latest map. + // FIXME: we should get the client's osdmap epoch and just + // make sure we have *that*. mdr->waited_for_osdmap = true; - mds->request_osdmap( - new C_MDS_RetryRequest(mdcache, mdr)); + mds->objecter->wait_for_latest_osdmap( + new C_MDS_RetryRequest(mdcache, mdr)); return; } r = -EINVAL; diff --git a/src/mds/Server.h b/src/mds/Server.h index 42308918958..5a12bd505c7 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -17,6 +17,7 @@ #include "MDS.h" +class OSDMap; class PerfCounters; class LogEvent; class EMetaBlob; @@ -162,7 +163,8 @@ public: void handle_client_setlayout(MDRequestRef& mdr); void handle_client_setdirlayout(MDRequestRef& mdr); - int parse_layout_vxattr(string name, string value, ceph_file_layout *layout); + int parse_layout_vxattr(string name, string value, const OSDMap *osdmap, + ceph_file_layout *layout); void handle_set_vxattr(MDRequestRef& mdr, CInode *cur, ceph_file_layout *dir_layout, set rdlocks, diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 170a2e25836..a19f5748b01 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1298,10 +1298,14 @@ void Objecter::_wait_for_new_map(Context *c, epoch_t epoch, int err) assert(r == 0); } -void Objecter::wait_for_new_map(Context *c, epoch_t epoch, int err) +bool Objecter::wait_for_map(epoch_t epoch, Context *c, int err) { RWLock::WLocker wl(rwlock); + if (osdmap->get_epoch() >= epoch) { + return true; + } _wait_for_new_map(c, epoch, err); + return false; } void Objecter::kick_requests(OSDSession *session) diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 67ed9f9bc11..73e818c47b0 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -1704,7 +1704,8 @@ public: int get_client_incarnation() const { return client_inc.read(); } void set_client_incarnation(int inc) { client_inc.set(inc); } - void wait_for_new_map(Context *c, epoch_t epoch, int err=0); + /// wait for epoch; true if we already have it + bool wait_for_map(epoch_t epoch, Context *c, int err=0); void _wait_for_new_map(Context *c, epoch_t epoch, int err=0); void wait_for_latest_osdmap(Context *fin); void get_latest_version(epoch_t oldest, epoch_t neweset, Context *fin);