From: Sage Weil Date: Mon, 5 Apr 2010 22:38:45 +0000 (-0700) Subject: mds: journal oldest client tid X-Git-Tag: v0.20~116 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=07e39846411e3d9fecbdc57329bd3a0d142e2b8f;p=ceph.git mds: journal oldest client tid Journal the client's safe tid with new requests. This keeps the client completed_requests list trimmed, so that we don't build up a ginormous list of all requests over the entire journal. Forward rolling mds format change. --- diff --git a/src/TODO b/src/TODO index c1e4f480eab2..9969094c6772 100644 --- a/src/TODO +++ b/src/TODO @@ -390,7 +390,7 @@ uclient - hadoop: clean up assert usage mds -- should we occasionally journal session completed requests?? +- put inode dirty fields into dirty_bits_t to reduce per-inode memory footprint - don't sync log on every clientreplay request? - pass issued, wanted into eval(lock) when eval() already has it? (and otherwise optimize eval paths..) - add an up:shadow mode? diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 8d05c7abc85e..fd36335b09d7 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2276,7 +2276,7 @@ void Server::handle_client_openc(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "openc"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, in); @@ -2608,7 +2608,7 @@ void Server::handle_client_setattr(MDRequest *mdr) pi->ctime = now; // log + wait - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr, &le->metablob, cur); @@ -2649,7 +2649,7 @@ void Server::handle_client_opent(MDRequest *mdr, int cmode) EUpdate *le = new EUpdate(mdlog, "open_truncate"); mdlog->start_entry(le); le->metablob.add_truncate_start(in->ino()); - le->metablob.add_client_req(mdr->reqid); + le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, in, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr, &le->metablob, in); @@ -2720,7 +2720,7 @@ void Server::handle_client_setlayout(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "setlayout"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr, &le->metablob, cur); @@ -2802,7 +2802,7 @@ void Server::handle_client_setxattr(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "setxattr"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY, false); mdcache->journal_cow_inode(mdr, &le->metablob, cur); le->metablob.add_primary_dentry(cur->get_projected_parent_dn(), true, cur, 0, 0, px); @@ -2847,7 +2847,7 @@ void Server::handle_client_removexattr(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "removexattr"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY, false); mdcache->journal_cow_inode(mdr, &le->metablob, cur); le->metablob.add_primary_dentry(cur->get_projected_parent_dn(), true, cur, 0, 0, px); @@ -2944,7 +2944,7 @@ void Server::handle_client_mknod(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "mknod"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), @@ -2997,7 +2997,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "mkdir"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, newi); @@ -3061,7 +3061,7 @@ void Server::handle_client_symlink(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "symlink"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, newi); @@ -3172,7 +3172,7 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti) // log + wait EUpdate *le = new EUpdate(mdlog, "link_local"); mdlog->start_entry(le); - le->metablob.add_client_req(mdr->reqid); + le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, dn->get_dir(), PREDIRTY_DIR, 1); // new dn mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, 0, PREDIRTY_PRIMARY); // targeti le->metablob.add_remote_dentry(dn, true, targeti->ino(), targeti->d_type()); // new remote @@ -3262,7 +3262,7 @@ void Server::_link_remote(MDRequest *mdr, bool inc, CDentry *dn, CInode *targeti mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, inc ? "link_remote":"unlink_remote"); mdlog->start_entry(le); - le->metablob.add_client_req(mdr->reqid); + le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); if (!mdr->more()->slaves.empty()) { dout(20) << " noting uncommitted_slaves " << mdr->more()->slaves << dendl; le->reqid = mdr->reqid; @@ -3765,7 +3765,7 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn, CDentry *straydn) // prepare log entry EUpdate *le = new EUpdate(mdlog, "unlink_local"); mdlog->start_entry(le); - le->metablob.add_client_req(mdr->reqid); + le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); if (straydn) { assert(dnl->is_primary()); @@ -4253,7 +4253,7 @@ void Server::handle_client_rename(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "rename"); mdlog->start_entry(le); - le->metablob.add_client_req(mdr->reqid); + le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); if (!mdr->more()->slaves.empty()) { dout(20) << " noting uncommitted_slaves " << mdr->more()->slaves << dendl; @@ -5438,7 +5438,7 @@ void Server::handle_client_mksnap(MDRequest *mdr) EUpdate *le = new EUpdate(mdlog, "mksnap"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); le->metablob.add_table_transaction(TABLE_SNAP, stid); mdcache->predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY, false); mdcache->journal_cow_inode(mdr, &le->metablob, diri); @@ -5584,7 +5584,7 @@ void Server::handle_client_rmsnap(MDRequest *mdr) EUpdate *le = new EUpdate(mdlog, "rmsnap"); mdlog->start_entry(le); - le->metablob.add_client_req(req->get_reqid()); + le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); le->metablob.add_table_transaction(TABLE_SNAP, stid); mdcache->predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY, false); mdcache->journal_cow_inode(mdr, &le->metablob, diri); diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h index 515050316344..05f4b69a194a 100644 --- a/src/mds/SessionMap.h +++ b/src/mds/SessionMap.h @@ -330,10 +330,12 @@ public: Session *session = get_session(rid.name); return session && session->have_completed_request(rid.tid); } - void add_completed_request(metareqid_t rid) { + void add_completed_request(metareqid_t rid, tid_t tid=0) { Session *session = get_session(rid.name); assert(session); session->add_completed_request(rid.tid); + if (tid) + session->trim_completed_requests(tid); } void trim_completed_requests(entity_name_t c, tid_t tid) { Session *session = get_session(c); diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 7dbedab4f2aa..24a8d7b87d13 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -356,11 +356,11 @@ private: vector destroyed_inodes; // idempotent op(s) - list client_reqs; + list > client_reqs; public: void encode(bufferlist& bl) const { - __u8 struct_v = 1; + __u8 struct_v = 2; ::encode(struct_v, bl); ::encode(lump_order, bl); ::encode(lump_map, bl); @@ -403,7 +403,16 @@ private: ::decode(truncate_start, bl); ::decode(truncate_finish, bl); ::decode(destroyed_inodes, bl); - ::decode(client_reqs, bl); + if (struct_v >= 2) + ::decode(client_reqs, bl); + else { + list r; + ::decode(r, bl); + while (!r.empty()) { + client_reqs.push_back(pair(r.front(), 0)); + r.pop_front(); + } + } } @@ -427,8 +436,8 @@ private: } } - void add_client_req(metareqid_t r) { - client_reqs.push_back(r); + void add_client_req(metareqid_t r, __u64 tid=0) { + client_reqs.push_back(pair(r, tid)); } void add_table_transaction(int table, version_t tid) { diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 9c2bb4c7adb2..6e6cc26689f4 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -673,13 +673,13 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg) } // client requests - for (list::iterator p = client_reqs.begin(); + for (list >::iterator p = client_reqs.begin(); p != client_reqs.end(); ++p) - if (p->name.is_client()) { - dout(10) << "EMetaBlob.replay request " << *p << dendl; - if (mds->sessionmap.have_session(p->name)) - mds->sessionmap.add_completed_request(*p); + if (p->first.name.is_client()) { + dout(10) << "EMetaBlob.replay request " << p->first << " " << p->second << dendl; + if (mds->sessionmap.have_session(p->first.name)) + mds->sessionmap.add_completed_request(p->first, p->second); }