From b05d2d96c5494d6878d311670b389bfb43ea2129 Mon Sep 17 00:00:00 2001 From: sageweil Date: Wed, 6 Jun 2007 21:03:22 +0000 Subject: [PATCH] * be explicit about journal append ack/commit safety git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1401 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/TODO | 5 ++++- branches/sage/cephmds2/config.cc | 9 +++++---- branches/sage/cephmds2/config.h | 4 ++-- branches/sage/cephmds2/mds/MDBalancer.cc | 10 +++++----- branches/sage/cephmds2/osdc/Journaler.cc | 7 +++++-- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/branches/sage/cephmds2/TODO b/branches/sage/cephmds2/TODO index cd972c0c85335..9134e424eb3da 100644 --- a/branches/sage/cephmds2/TODO +++ b/branches/sage/cephmds2/TODO @@ -113,7 +113,6 @@ sage mds - statfs? -- fix lock caps gather ack versus ambiguous auth foreign rename @@ -213,9 +212,13 @@ rados snapshots objecter +- transaction prepare/commit - read+floor_lockout osd/rados +- transaction prepare/commit + - rollback + - rollback logging (to fix slow prepare vs rollback race) - read+floor_lockout for clean STOGITH-like/fencing semantics after failover. - separate out replication code into a PG class, to pave way for RAID diff --git a/branches/sage/cephmds2/config.cc b/branches/sage/cephmds2/config.cc index c91c756cbd6f9..01314f5bfbfd2 100644 --- a/branches/sage/cephmds2/config.cc +++ b/branches/sage/cephmds2/config.cc @@ -150,6 +150,7 @@ md_config_t g_conf = { // --- journaler --- journaler_allow_split_entries: true, + journaler_safe: false, // wait for COMMIT on journal writes // --- mds --- mds_cache_size: MDS_CACHE_SIZE, @@ -165,7 +166,6 @@ md_config_t g_conf = { mds_log_max_trimming: 10000, mds_log_read_inc: 1<<20, mds_log_pad_entry: 128,//256,//64, - mds_log_before_reply: true, mds_log_flush_on_shutdown: true, mds_log_import_map_interval: 1024*1024, // frequency (in bytes) of EImportMap in log mds_log_eopen_size: 100, // # open inodes per log entry @@ -232,7 +232,7 @@ md_config_t g_conf = { ebofs_cloneable: false, ebofs_verify: false, ebofs_commit_ms: 2000, // 0 = no forced commit timeout (for debugging/tracing) - ebofs_idle_commit_ms: 100, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms + ebofs_idle_commit_ms: 20, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms ebofs_oc_size: 10000, // onode cache ebofs_cc_size: 10000, // cnode cache ebofs_bc_size: (80 *256), // 4k blocks, *256 for MB @@ -564,6 +564,9 @@ void parse_config_options(std::vector& args) else if (strcmp(args[i], "--objecter_buffer_uncommitted") == 0) g_conf.objecter_buffer_uncommitted = atoi(args[++i]); + else if (strcmp(args[i], "--journaler_safe") == 0) + g_conf.journaler_safe = atoi(args[++i]); + else if (strcmp(args[i], "--mds_cache_size") == 0) g_conf.mds_cache_size = atoi(args[++i]); @@ -574,8 +577,6 @@ void parse_config_options(std::vector& args) else if (strcmp(args[i], "--mds_log") == 0) g_conf.mds_log = atoi(args[++i]); - else if (strcmp(args[i], "--mds_log_before_reply") == 0) - g_conf.mds_log_before_reply = atoi(args[++i]); else if (strcmp(args[i], "--mds_log_max_len") == 0) g_conf.mds_log_max_len = atoi(args[++i]); else if (strcmp(args[i], "--mds_log_read_inc") == 0) diff --git a/branches/sage/cephmds2/config.h b/branches/sage/cephmds2/config.h index a9fde4f7a0406..b8c6d20d7d6a5 100644 --- a/branches/sage/cephmds2/config.h +++ b/branches/sage/cephmds2/config.h @@ -151,7 +151,8 @@ struct md_config_t { // journaler bool journaler_allow_split_entries; - + bool journaler_safe; + // mds int mds_cache_size; float mds_cache_mid; @@ -166,7 +167,6 @@ struct md_config_t { int mds_log_max_trimming; int mds_log_read_inc; int mds_log_pad_entry; - bool mds_log_before_reply; bool mds_log_flush_on_shutdown; off_t mds_log_import_map_interval; int mds_log_eopen_size; diff --git a/branches/sage/cephmds2/mds/MDBalancer.cc b/branches/sage/cephmds2/mds/MDBalancer.cc index 5e6bac91671ed..75e8872a4dc0a 100644 --- a/branches/sage/cephmds2/mds/MDBalancer.cc +++ b/branches/sage/cephmds2/mds/MDBalancer.cc @@ -715,7 +715,7 @@ void MDBalancer::hit_inode(CInode *in, int type) anydom = in->popularity[MDS_POP_ANYDOM].pop[type].hit(); } - dout(-20) << "hit_inode " << type << " pop " << me << " me, " + dout(20) << "hit_inode " << type << " pop " << me << " me, " << nested << " nested, " << curdom << " curdom, " << anydom << " anydom" @@ -737,7 +737,7 @@ void MDBalancer::hit_dir(CDir *dir, int type) if (g_conf.num_mds > 2 && // FIXME >2 thing !dir->inode->is_root() && // not root (for now at least) dir->is_auth()) { - dout(-20) << "hit_dir " << type << " pop " << v << " me " + dout(20) << "hit_dir " << type << " pop " << v << " me " << *dir << endl; // hash this dir? (later?) @@ -766,7 +766,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type) // replicate? float dir_pop = dir->popularity[MDS_POP_CURDOM].pop[type].get(); // hmm?? - dout(-20) << "hit_recursive " << type << " pop " << dir_pop << " curdom " << *dir << endl; + dout(20) << "hit_recursive " << type << " pop " << dir_pop << " curdom " << *dir << endl; if (dir->is_auth()) { if (!dir->is_rep() && @@ -776,7 +776,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type) rd_adj = rdp / mds->get_mds_map()->get_num_mds() - rdp; rd_adj /= 2.0; // temper somewhat - dout(1) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl; + dout(2) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl; dir->dir_rep = CDir::REP_ALL; mds->mdcache->send_dir_updates(dir, true); @@ -789,7 +789,7 @@ void MDBalancer::hit_recursive(CDir *dir, int type) dir->is_rep() && dir_pop < g_conf.mds_bal_unreplicate_threshold) { // unreplicate - dout(1) << "unreplicating dir " << *dir << " pop " << dir_pop << endl; + dout(2) << "unreplicating dir " << *dir << " pop " << dir_pop << endl; dir->dir_rep = CDir::REP_NONE; mds->mdcache->send_dir_updates(dir); diff --git a/branches/sage/cephmds2/osdc/Journaler.cc b/branches/sage/cephmds2/osdc/Journaler.cc index cfb2828dc99f3..9227e5f227dd7 100644 --- a/branches/sage/cephmds2/osdc/Journaler.cc +++ b/branches/sage/cephmds2/osdc/Journaler.cc @@ -169,7 +169,8 @@ void Journaler::write_head(Context *oncommit) bufferlist bl; bl.append((char*)&last_written, sizeof(last_written)); filer.write(inode, 0, bl.length(), bl, 0, - 0, new C_WriteHead(this, last_written, oncommit)); + 0, + new C_WriteHead(this, last_written, oncommit)); } void Journaler::_finish_write_head(Header &wrote, Context *oncommit) @@ -293,8 +294,10 @@ void Journaler::flush(Context *onsync) dout(10) << "flush flushing " << flush_pos << "~" << len << endl; // submit write for anything pending + // flush _start_ pos to _finish_flush filer.write(inode, flush_pos, len, write_buf, 0, - new C_Flush(this, flush_pos), 0); // flush _start_ pos to _finish_flush + g_conf.journaler_safe ? 0:new C_Flush(this, flush_pos), // on ACK + g_conf.journaler_safe ? new C_Flush(this, flush_pos):0); // on COMMIT pending_flush[flush_pos] = g_clock.now(); // adjust pointers -- 2.39.5