From: Sage Weil Date: Tue, 10 Jun 2008 13:44:43 +0000 (-0700) Subject: mds: wait for journal safe for import/export X-Git-Tag: v0.3~141 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b2e1d2757aaa1cb0b3933be7f4af5cbec5b0009f;p=ceph.git mds: wait for journal safe for import/export --- diff --git a/src/mds/LogSegment.h b/src/mds/LogSegment.h index 8ce8c974000..106973efe8e 100644 --- a/src/mds/LogSegment.h +++ b/src/mds/LogSegment.h @@ -31,8 +31,9 @@ class MDSlaveUpdate; class LogSegment { public: - off_t offset, end; + loff_t offset, end; int num_events; + loff_t trimmable_at; // dirty items xlist dirty_dirfrags; @@ -63,7 +64,7 @@ class LogSegment { C_Gather *try_to_expire(MDS *mds); // cons - LogSegment(off_t off) : offset(off), end(off), num_events(0), + LogSegment(loff_t off) : offset(off), end(off), num_events(0), trimmable_at(0), allocv(0), sessionmapv(0), anchortablev(0) { } }; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index de5ff2958fb..bd985a5584f 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -505,7 +505,7 @@ void MDCache::try_subtree_merge_at(CDir *dir) le->metablob.add_primary_dentry(in->get_parent_dn(), true, 0, pi); mds->mdlog->submit_entry(le); - mds->mdlog->wait_for_sync(new C_MDC_SubtreeMergeWB(this, in, + mds->mdlog->wait_for_safe(new C_MDC_SubtreeMergeWB(this, in, mds->mdlog->get_current_segment())); } } diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 13592961e73..feb728dc07b 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -94,16 +94,21 @@ void MDLog::write_head(Context *c) journaler->write_head(c); } -off_t MDLog::get_read_pos() +loff_t MDLog::get_read_pos() { return journaler->get_read_pos(); } -off_t MDLog::get_write_pos() +loff_t MDLog::get_write_pos() { return journaler->get_write_pos(); } +loff_t MDLog::get_safe_pos() +{ + return journaler->get_write_safe_pos(); +} + void MDLog::create(Context *c) @@ -212,6 +217,17 @@ void MDLog::wait_for_sync( Context *c ) delete c; } } +void MDLog::wait_for_safe( Context *c ) +{ + if (g_conf.mds_log) { + // wait + journaler->flush(0, c); + } else { + // hack: bypass. + c->finish(0); + delete c; + } +} void MDLog::flush() { diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h index c958585b86a..b3580ec8d48 100644 --- a/src/mds/MDLog.h +++ b/src/mds/MDLog.h @@ -153,8 +153,9 @@ public: size_t get_num_segments() { return segments.size(); } void set_max_segments(int m) { max_segments = m; } - off_t get_read_pos(); - off_t get_write_pos(); + loff_t get_read_pos(); + loff_t get_write_pos(); + loff_t get_safe_pos(); bool empty() { return segments.empty(); } bool is_capped() { return capped; } @@ -162,6 +163,7 @@ public: void submit_entry( LogEvent *e, Context *c = 0 ); void wait_for_sync( Context *c ); + void wait_for_safe( Context *c ); void flush(); bool is_flushed() { return unflushed == 0; diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index 5e53bec9cd1..540e3c88b5f 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -1118,6 +1118,7 @@ void MDS::_dispatch(Message *m) if (is_active() || is_stopping()) { // flush log to disk after every op. for now. //mdlog->flush(); + mdlog->trim(); // trim cache mdcache->trim(); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index da66547bc23..87314914fd0 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1128,8 +1128,8 @@ void Migrator::handle_export_ack(MExportDirAck *m) } // log export completion, then finish (unfreeze, trigger finish context, etc.) - mds->mdlog->submit_entry(le, - new C_MDS_ExportFinishLogged(this, dir)); + mds->mdlog->submit_entry(le); + mds->mdlog->wait_for_safe(new C_MDS_ExportFinishLogged(this, dir)); delete m; } @@ -1693,7 +1693,8 @@ void Migrator::handle_export_dir(MExportDir *m) dout(7) << "handle_export_dir did " << *dir << dendl; // log it - mds->mdlog->submit_entry(le, onlogged); + mds->mdlog->submit_entry(le); + mds->mdlog->wait_for_safe(onlogged); // note state import_state[dir->dirfrag()] = IMPORT_LOGGINGSTART; @@ -2302,7 +2303,8 @@ void Migrator::handle_export_caps(MExportCaps *ex) mds->server->prepare_force_open_sessions(ex->client_map); le->client_map.swap(ex->client_map); - mds->mdlog->submit_entry(le, finish); + mds->mdlog->submit_entry(le); + mds->mdlog->wait_for_safe(finish); delete ex; } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index ae6267c1f68..d030a3b4d05 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4573,11 +4573,6 @@ void Server::handle_client_openc(MDRequest *mdr) // log + wait C_MDS_openc_finish *fin = new C_MDS_openc_finish(mds, mdr, dn, in); mdlog->submit_entry(le, fin); - - /* - FIXME. this needs to be rewritten when the write capability stuff starts - getting journaled. - */ } diff --git a/src/mds/journal.cc b/src/mds/journal.cc index d75faec618c..f5a992240d5 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -192,6 +192,27 @@ C_Gather *LogSegment::try_to_expire(MDS *mds) // FIXME client requests...? // audit handling of anchor transactions? + // once we are otherwise trimmable, make sure journal is fully safe on disk. + if (!gather) { + if (trimmable_at && + trimmable_at <= mds->mdlog->get_safe_pos()) { + dout(6) << "LogSegment(" << offset << ").try_to_expire trimmable at " << trimmable_at + << " <= " << mds->mdlog->get_safe_pos() << dendl; + } else { + if (trimmable_at == 0) { + trimmable_at = mds->mdlog->get_write_pos(); + dout(6) << "LogSegment(" << offset << ").try_to_expire now trimmable at " << trimmable_at + << ", waiting for safe journal flush" << dendl; + } else { + dout(6) << "LogSegment(" << offset << ").try_to_expire trimmable at " << trimmable_at + << " > " << mds->mdlog->get_safe_pos() + << ", waiting for safe journal flush" << dendl; + } + if (!gather) gather = new C_Gather; + mds->mdlog->wait_for_safe(gather->new_sub()); + } + } + if (gather) { dout(6) << "LogSegment(" << offset << ").try_to_expire waiting" << dendl; } else {