From 66b6dd107c8fdfea8db761fc9b09a628e8c37669 Mon Sep 17 00:00:00 2001 From: sageweil Date: Thu, 13 Sep 2007 04:15:48 +0000 Subject: [PATCH] mds boot cleanup; mds profiling stuff (commented out); mds server bugfixes (unlink); mdslog append git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1843 29311d96-e01e-0410-9327-a35deaab8ce9 --- trunk/ceph/mds/CDir.cc | 17 +++-- trunk/ceph/mds/CDir.h | 8 ++- trunk/ceph/mds/CInode.h | 7 +- trunk/ceph/mds/MDCache.cc | 41 +++++------ trunk/ceph/mds/MDLog.cc | 7 ++ trunk/ceph/mds/MDLog.h | 1 + trunk/ceph/mds/MDS.cc | 142 +++++++++++++++++++------------------- trunk/ceph/mds/MDS.h | 7 +- trunk/ceph/mds/Server.cc | 43 +++++++++++- 9 files changed, 165 insertions(+), 108 deletions(-) diff --git a/trunk/ceph/mds/CDir.cc b/trunk/ceph/mds/CDir.cc index 7254c05a3343f..2c3c55b189aab 100644 --- a/trunk/ceph/mds/CDir.cc +++ b/trunk/ceph/mds/CDir.cc @@ -145,6 +145,8 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) auth_pins = 0; nested_auth_pins = 0; request_pins = 0; + + //hack_num_accessed = -1; dir_rep = REP_NONE; //dir_rep = REP_ALL; // hack: to wring out some bugs! FIXME FIXME @@ -319,6 +321,7 @@ void CDir::link_primary_inode(CDentry *dn, CInode *in) void CDir::link_inode_work( CDentry *dn, CInode *in) { + assert(dn->inode == 0); dn->inode = in; in->set_primary_parent(dn); @@ -834,6 +837,8 @@ void CDir::_fetched(bufferlist &bl) int32_t n; ::_decode(n, bl, off); + //int num_new_inodes_loaded = 0; + for (int i=0; ihack_accessed = false; + //in->hack_load_stamp = g_clock.now(); + //num_new_inodes_loaded++; } } } else { @@ -969,6 +978,9 @@ void CDir::_fetched(bufferlist &bl) projected_version = version = committing_version = committed_version = got_version; } + //cache->mds->logger->inc("newin", num_new_inodes_loaded); + //hack_num_accessed = 0; + // mark complete, !fetching state_set(STATE_COMPLETE); state_clear(STATE_FETCHING); @@ -976,11 +988,6 @@ void CDir::_fetched(bufferlist &bl) // kick waiters finish_waiting(WAIT_COMPLETE, 0); - /* - list waiters; - take_waiting(WAIT_COMPLETE, waiters); - cache->mds->queue_finished(waiters); - */ } diff --git a/trunk/ceph/mds/CDir.h b/trunk/ceph/mds/CDir.h index f607d47d506bb..b8bf4e6e3920a 100644 --- a/trunk/ceph/mds/CDir.h +++ b/trunk/ceph/mds/CDir.h @@ -160,9 +160,11 @@ class CDir : public MDSCacheObject { return dirfrag() < ((const CDir*)r)->dirfrag(); } + //int hack_num_accessed; + public: - //typedef hash_map map_t; - typedef map map_t; + typedef hash_map map_t; // there is a bug somewhere, valgrind me. + //typedef map map_t; protected: // contents map_t items; // non-null AND null @@ -171,6 +173,8 @@ protected: int num_dirty; + + // state version_t version; version_t committing_version; diff --git a/trunk/ceph/mds/CInode.h b/trunk/ceph/mds/CInode.h index 924aca7c01add..569db3a0a9fe9 100644 --- a/trunk/ceph/mds/CInode.h +++ b/trunk/ceph/mds/CInode.h @@ -83,7 +83,6 @@ class CInode : public MDSCacheObject { } // -- state -- - static const int STATE_ROOT = (1<<1); static const int STATE_EXPORTING = (1<<2); // on nonauth bystander. static const int STATE_ANCHORING = (1<<3); static const int STATE_UNANCHORING = (1<<4); @@ -123,6 +122,9 @@ class CInode : public MDSCacheObject { off_t last_journaled; // log offset for the last time i was journaled off_t last_open_journaled; // log offset for the last journaled EOpen + //bool hack_accessed; + //utime_t hack_load_stamp; + // projected values (only defined while dirty) list projected_inode; list projected_dirfragtree; @@ -204,6 +206,7 @@ protected: CInode(MDCache *c, bool auth=true) : mdcache(c), last_journaled(0), last_open_journaled(0), + //hack_accessed(true), stickydir_ref(0), parent(0), force_auth(CDIR_AUTH_DEFAULT), replica_caps_wanted(0), @@ -232,7 +235,7 @@ protected: bool is_anchoring() { return state_test(STATE_ANCHORING); } bool is_unanchoring() { return state_test(STATE_UNANCHORING); } - bool is_root() { return state & STATE_ROOT; } + bool is_root() { return inode.ino == MDS_INO_ROOT; } bool is_stray() { return MDS_INO_IS_STRAY(inode.ino); } diff --git a/trunk/ceph/mds/MDCache.cc b/trunk/ceph/mds/MDCache.cc index 91089aa3e06a6..d58fe3db47373 100644 --- a/trunk/ceph/mds/MDCache.cc +++ b/trunk/ceph/mds/MDCache.cc @@ -308,6 +308,9 @@ CDentry *MDCache::get_or_create_stray_dentry(CInode *in) { string straydname; in->name_stray_dentry(straydname); + + if (!stray) create_stray_inode(mds->get_nodeid()); + frag_t fg = stray->pick_dirfrag(straydname); CDir *straydir = stray->get_or_open_dirfrag(this, fg); @@ -2719,29 +2722,11 @@ void MDCache::rejoin_send_acks() // =============================================================================== -/* -void MDCache::rename_file(CDentry *srcdn, - CDentry *destdn) -{ - CInode *in = srcdn->inode; - - // unlink src - srcdn->dir->unlink_inode(srcdn); - - // unlink old inode? - if (destdn->inode) destdn->dir->unlink_inode(destdn); - - // link inode w/ dentry - destdn->dir->link_inode( destdn, in ); -} -*/ - void MDCache::set_root(CInode *in) { assert(root == 0); root = in; - root->state_set(CInode::STATE_ROOT); } @@ -3074,6 +3059,17 @@ void MDCache::trim_inode(CDentry *dn, CInode *in, CDir *con, mapadd_inode(df, in->ino(), in->get_replica_nonce()); } } + + /* + if (in->is_auth()) { + if (in->hack_accessed) + mds->logger->inc("outt"); + else { + mds->logger->inc("outut"); + mds->logger->favg("oututl", g_clock.now() - in->hack_load_stamp); + } + } + */ // unlink if (dn) @@ -4670,14 +4666,13 @@ void MDCache::eval_stray(CDentry *dn) CInode *in = dn->inode; assert(in); - return; // BROKEN, FIXME - + return; // FIXME or test me rather, there is a bug here somewhere! // purge? if (in->inode.nlink == 0) { - if (!dn->is_replicated() && !in->is_any_caps()) - _purge_stray(dn); - return; + if (dn->is_replicated() || in->is_any_caps()) return; // wait + if (!in->dirfrags.empty()) return; // wait for dirs to close/trim + _purge_stray(dn); } else if (in->inode.nlink == 1) { // trivial reintegrate? diff --git a/trunk/ceph/mds/MDLog.cc b/trunk/ceph/mds/MDLog.cc index a803433682506..e47f65c3f3c6a 100644 --- a/trunk/ceph/mds/MDLog.cc +++ b/trunk/ceph/mds/MDLog.cc @@ -94,6 +94,13 @@ void MDLog::open(Context *c) journaler->recover(c); } +void MDLog::append() +{ + dout(5) << "append positioning at end" << dendl; + journaler->set_read_pos(journaler->get_write_pos()); + journaler->set_expire_pos(journaler->get_write_pos()); +} + void MDLog::write_head(Context *c) { journaler->write_head(c); diff --git a/trunk/ceph/mds/MDLog.h b/trunk/ceph/mds/MDLog.h index 24a965fd89b60..73f24fa9bb28b 100644 --- a/trunk/ceph/mds/MDLog.h +++ b/trunk/ceph/mds/MDLog.h @@ -173,6 +173,7 @@ public: void reset(); // fresh, empty log! void open(Context *onopen); + void append(); void write_head(Context *onfinish); void replay(Context *onfinish); diff --git a/trunk/ceph/mds/MDS.cc b/trunk/ceph/mds/MDS.cc index 0c120d8cc4cd3..30a2ce2b0ba5b 100644 --- a/trunk/ceph/mds/MDS.cc +++ b/trunk/ceph/mds/MDS.cc @@ -155,8 +155,22 @@ void MDS::reopen_logger(utime_t start) mds_logtype.add_inc("dir_f"); mds_logtype.add_inc("dir_c"); - mds_logtype.add_inc("mkdir"); - + //mds_logtype.add_inc("mkdir"); + + /* + mds_logtype.add_inc("newin"); // new inodes (pre)loaded + mds_logtype.add_inc("newt"); // inodes first touched/used + mds_logtype.add_inc("outt"); // trimmed touched + mds_logtype.add_inc("outut"); // trimmed untouched (wasted effort) + mds_logtype.add_avg("oututl"); // avg trim latency for untouched + + mds_logtype.add_inc("dirt1"); + mds_logtype.add_inc("dirt2"); + mds_logtype.add_inc("dirt3"); + mds_logtype.add_inc("dirt4"); + mds_logtype.add_inc("dirt5"); + */ + mds_logtype.add_set("c"); mds_logtype.add_set("ctop"); mds_logtype.add_set("cbot"); @@ -510,7 +524,7 @@ void MDS::handle_mds_map(MMDSMap *m) return; } - if (oldwhoami != whoami || !logger) + if (oldwhoami != whoami || !logger) // fakesyn/newsyn starts knowing who they are reopen_logger(mdsmap->get_create()); if (oldwhoami != whoami) { @@ -700,27 +714,25 @@ void MDS::boot() { if (is_creating()) boot_create(); // new tables, journal - else if (is_starting()) - boot_start(); // old tables, empty journal - else if (is_replay()) - boot_replay(); // replay, join + else if (is_starting() || is_replay()) + boot_start(); // start|replay, join else assert(is_standby()); } -class C_MDS_BootFinish : public Context { +class C_MDS_CreateFinish : public Context { MDS *mds; public: - C_MDS_BootFinish(MDS *m) : mds(m) {} - void finish(int r) { mds->boot_finish(); } + C_MDS_CreateFinish(MDS *m) : mds(m) {} + void finish(int r) { mds->creating_done(); } }; void MDS::boot_create() { dout(3) << "boot_create" << dendl; - C_Gather *fin = new C_Gather(new C_MDS_BootFinish(this)); + C_Gather *fin = new C_Gather(new C_MDS_CreateFinish(this)); if (whoami == 0) { dout(3) << "boot_create since i am also mds0, creating root inode and dir" << dendl; @@ -774,59 +786,22 @@ void MDS::boot_create() } } -void MDS::boot_start() -{ - dout(2) << "boot_start" << dendl; - - C_Gather *fin = new C_Gather(new C_MDS_BootFinish(this)); - - dout(2) << "boot_start opening idalloc" << dendl; - idalloc->load(fin->new_sub()); - - dout(2) << "boot_start opening clientmap" << dendl; - clientmap.load(fin->new_sub()); - - if (mdsmap->get_anchortable() == whoami) { - dout(2) << "boot_start opening anchor table" << dendl; - anchortable->load(fin->new_sub()); - } else { - dout(2) << "boot_start i have no anchor table" << dendl; - } - - dout(2) << "boot_start opening mds log" << dendl; - mdlog->open(fin->new_sub()); - - if (mdsmap->get_root() == whoami) { - dout(2) << "boot_start opening root directory" << dendl; - mdcache->open_root(fin->new_sub()); - } - - dout(2) << "boot_start opening local stray directory" << dendl; - mdcache->open_local_stray(); -} - -void MDS::boot_finish() +void MDS::creating_done() { - dout(3) << "boot_finish" << dendl; - - if (is_starting()) { - // make sure mdslog is empty - assert(mdlog->get_read_pos() == mdlog->get_write_pos()); - } - + dout(1)<< "creating_done" << dendl; set_want_state(MDSMap::STATE_ACTIVE); } -class C_MDS_BootRecover : public Context { +class C_MDS_BootStart : public Context { MDS *mds; int nextstep; public: - C_MDS_BootRecover(MDS *m, int n) : mds(m), nextstep(n) {} - void finish(int r) { mds->boot_replay(nextstep); } + C_MDS_BootStart(MDS *m, int n) : mds(m), nextstep(n) {} + void finish(int r) { mds->boot_start(nextstep); } }; -void MDS::boot_replay(int step) +void MDS::boot_start(int step) { switch (step) { case 0: @@ -834,37 +809,65 @@ void MDS::boot_replay(int step) case 1: { - C_Gather *gather = new C_Gather(new C_MDS_BootRecover(this, 2)); - dout(2) << "boot_replay " << step << ": opening idalloc" << dendl; + C_Gather *gather = new C_Gather(new C_MDS_BootStart(this, 2)); + dout(2) << "boot_start " << step << ": opening idalloc" << dendl; idalloc->load(gather->new_sub()); - dout(2) << "boot_replay " << step << ": opening clientmap" << dendl; + dout(2) << "boot_start " << step << ": opening clientmap" << dendl; clientmap.load(gather->new_sub()); if (mdsmap->get_anchortable() == whoami) { - dout(2) << "boot_replay " << step << ": opening anchor table" << dendl; + dout(2) << "boot_start " << step << ": opening anchor table" << dendl; anchortable->load(gather->new_sub()); } + + dout(2) << "boot_start " << step << ": opening mds log" << dendl; + mdlog->open(gather->new_sub()); } break; case 2: - dout(2) << "boot_replay " << step << ": opening mds log" << dendl; - mdlog->open(new C_MDS_BootRecover(this, 3)); + if (is_replay()) { + dout(2) << "boot_start " << step << ": replaying mds log" << dendl; + mdlog->replay(new C_MDS_BootStart(this, 3)); + } else { + dout(2) << "boot_start " << step << ": positioning at end of old mds log" << dendl; + mdlog->append(); + mdcache->log_subtree_map(new C_MDS_BootStart(this, 3)); + } break; - + case 3: - dout(2) << "boot_replay " << step << ": replaying mds log" << dendl; - mdlog->replay(new C_MDS_BootRecover(this, 4)); - break; + if (is_replay()) { + replay_done(); + break; + } + // starting only + assert(is_starting()); + if (mdsmap->get_root() == whoami) { + dout(2) << "boot_start " << step << ": opening root directory" << dendl; + mdcache->open_root(new C_MDS_BootStart(this, 4)); + break; + } + step++; + case 4: - replay_done(); - break; + dout(2) << "boot_start " << step << ": opening local stray directory" << dendl; + mdcache->open_local_stray(); + starting_done(); + break; } } +void MDS::starting_done() +{ + dout(3) << "starting_done" << dendl; + assert(is_starting()); + set_want_state(MDSMap::STATE_ACTIVE); +} + void MDS::replay_start() { @@ -877,11 +880,10 @@ void MDS::replay_start() dout(1) << "now replay. my recovery peers are " << rs << dendl; mdcache->set_recovery_set(rs); - // note: don't actually start yet. boot() will get called once we have - // an mdsmap AND osdmap. + // start? if (osdmap->get_epoch() > 0 && mdsmap->get_epoch() > 0) - boot_replay(); + boot_start(); } void MDS::replay_done() @@ -992,7 +994,7 @@ void MDS::handle_mds_recovery(int who) void MDS::stopping_start() { dout(2) << "stopping_start" << dendl; - + // start cache shutdown mdcache->shutdown_start(); diff --git a/trunk/ceph/mds/MDS.h b/trunk/ceph/mds/MDS.h index 7ce32f301b58f..4dcd73662dbe8 100644 --- a/trunk/ceph/mds/MDS.h +++ b/trunk/ceph/mds/MDS.h @@ -234,12 +234,13 @@ class MDS : public Dispatcher { void boot(); void boot_create(); // i am new mds. - void boot_start(); // i am old but empty (was down:out) mds. - void boot_replay(int step=0); // i am recovering existing (down:failed) mds. - void boot_finish(); + void boot_start(int step=0); // starting|replay void replay_start(); + void creating_done(); + void starting_done(); void replay_done(); + void resolve_start(); void resolve_done(); void reconnect_start(); diff --git a/trunk/ceph/mds/Server.cc b/trunk/ceph/mds/Server.cc index f29564043c5d7..4a5163d6ccc81 100644 --- a/trunk/ceph/mds/Server.cc +++ b/trunk/ceph/mds/Server.cc @@ -397,6 +397,27 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei) if (!req->is_idempotent()) mds->clientmap.add_completed_request(mdr->reqid); + /* + if (tracei && !tracei->hack_accessed) { + tracei->hack_accessed = true; + mds->logger->inc("newt"); + if (tracei->parent && + tracei->parent->dir->hack_num_accessed >= 0) { + tracei->parent->dir->hack_num_accessed++; + if (tracei->parent->dir->hack_num_accessed == 1) + mds->logger->inc("dirt1"); + if (tracei->parent->dir->hack_num_accessed == 2) + mds->logger->inc("dirt2"); + if (tracei->parent->dir->hack_num_accessed == 3) + mds->logger->inc("dirt3"); + if (tracei->parent->dir->hack_num_accessed == 4) + mds->logger->inc("dirt4"); + if (tracei->parent->dir->hack_num_accessed == 5) + mds->logger->inc("dirt5"); + } + } + */ + // include trace if (tracei) { reply->set_trace_dist( tracei, mds->get_nodeid() ); @@ -1381,6 +1402,11 @@ void Server::handle_client_utime(MDRequest *mdr) CInode *cur = rdlock_path_pin_ref(mdr, true); if (!cur) return; + if (cur->is_root()) { + reply_request(mdr, -EINVAL); // for now + return; + } + // xlock inode set rdlocks = mdr->rdlocks; set wrlocks = mdr->wrlocks; @@ -1415,6 +1441,11 @@ void Server::handle_client_chmod(MDRequest *mdr) CInode *cur = rdlock_path_pin_ref(mdr, true); if (!cur) return; + if (cur->is_root()) { + reply_request(mdr, -EINVAL); // for now + return; + } + // write set rdlocks = mdr->rdlocks; set wrlocks = mdr->wrlocks; @@ -1450,6 +1481,11 @@ void Server::handle_client_chown(MDRequest *mdr) CInode *cur = rdlock_path_pin_ref(mdr, true); if (!cur) return; + if (cur->is_root()) { + reply_request(mdr, -EINVAL); // for now + return; + } + // write set rdlocks = mdr->rdlocks; set wrlocks = mdr->wrlocks; @@ -1703,7 +1739,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) newdir->mark_complete(); newdir->mark_dirty(newdir->pre_dirty()); - if (mds->logger) mds->logger->inc("mkdir"); + //if (mds->logger) mds->logger->inc("mkdir"); // prepare finisher EUpdate *le = new EUpdate(mdlog, "mkdir"); @@ -3118,14 +3154,14 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen oldin->inode.nlink--; oldin->inode.ctime = mdr->now; if (oldin->is_auth()) - oldin->mark_dirty(mdr->pvmap[straydn]); + oldin->pop_and_dirty_projected_inode(); } else if (oldin) { // nlink-- remote. destdn was remote. oldin->inode.nlink--; oldin->inode.ctime = mdr->now; if (oldin->is_auth()) - oldin->mark_dirty(mdr->pvmap[oldin]); + oldin->pop_and_dirty_projected_inode(); } CInode *in = srcdn->inode; @@ -3134,6 +3170,7 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen // srcdn was remote. srcdn->dir->unlink_inode(srcdn); destdn->dir->link_remote_inode(destdn, in->ino(), MODE_TO_DT(in->inode.mode)); + destdn->link_remote(in); if (destdn->is_auth()) destdn->mark_dirty(mdr->pvmap[destdn]); } else { -- 2.39.5