- statfs?
-- fix lock caps gather ack versus ambiguous auth
foreign rename
objecter
+- transaction prepare/commit
- read+floor_lockout
osd/rados
+- transaction prepare/commit
+ - rollback
+ - rollback logging (to fix slow prepare vs rollback race)
- read+floor_lockout for clean STOGITH-like/fencing semantics after failover.
- separate out replication code into a PG class, to pave way for RAID
// --- journaler ---
journaler_allow_split_entries: true,
+ journaler_safe: false, // wait for COMMIT on journal writes
// --- mds ---
mds_cache_size: MDS_CACHE_SIZE,
mds_log_max_trimming: 10000,
mds_log_read_inc: 1<<20,
mds_log_pad_entry: 128,//256,//64,
- mds_log_before_reply: true,
mds_log_flush_on_shutdown: true,
mds_log_import_map_interval: 1024*1024, // frequency (in bytes) of EImportMap in log
mds_log_eopen_size: 100, // # open inodes per log entry
ebofs_cloneable: false,
ebofs_verify: false,
ebofs_commit_ms: 2000, // 0 = no forced commit timeout (for debugging/tracing)
- ebofs_idle_commit_ms: 100, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms
+ ebofs_idle_commit_ms: 20, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms
ebofs_oc_size: 10000, // onode cache
ebofs_cc_size: 10000, // cnode cache
ebofs_bc_size: (80 *256), // 4k blocks, *256 for MB
else if (strcmp(args[i], "--objecter_buffer_uncommitted") == 0)
g_conf.objecter_buffer_uncommitted = atoi(args[++i]);
+ else if (strcmp(args[i], "--journaler_safe") == 0)
+ g_conf.journaler_safe = atoi(args[++i]);
+
else if (strcmp(args[i], "--mds_cache_size") == 0)
g_conf.mds_cache_size = atoi(args[++i]);
else if (strcmp(args[i], "--mds_log") == 0)
g_conf.mds_log = atoi(args[++i]);
- else if (strcmp(args[i], "--mds_log_before_reply") == 0)
- g_conf.mds_log_before_reply = atoi(args[++i]);
else if (strcmp(args[i], "--mds_log_max_len") == 0)
g_conf.mds_log_max_len = atoi(args[++i]);
else if (strcmp(args[i], "--mds_log_read_inc") == 0)
// journaler
bool journaler_allow_split_entries;
-
+ bool journaler_safe;
+
// mds
int mds_cache_size;
float mds_cache_mid;
int mds_log_max_trimming;
int mds_log_read_inc;
int mds_log_pad_entry;
- bool mds_log_before_reply;
bool mds_log_flush_on_shutdown;
off_t mds_log_import_map_interval;
int mds_log_eopen_size;
anydom = in->popularity[MDS_POP_ANYDOM].pop[type].hit();
}
- dout(-20) << "hit_inode " << type << " pop " << me << " me, "
+ dout(20) << "hit_inode " << type << " pop " << me << " me, "
<< nested << " nested, "
<< curdom << " curdom, "
<< anydom << " anydom"
if (g_conf.num_mds > 2 && // FIXME >2 thing
!dir->inode->is_root() && // not root (for now at least)
dir->is_auth()) {
- dout(-20) << "hit_dir " << type << " pop " << v << " me "
+ dout(20) << "hit_dir " << type << " pop " << v << " me "
<< *dir << endl;
// hash this dir? (later?)
// replicate?
float dir_pop = dir->popularity[MDS_POP_CURDOM].pop[type].get(); // hmm??
- dout(-20) << "hit_recursive " << type << " pop " << dir_pop << " curdom " << *dir << endl;
+ dout(20) << "hit_recursive " << type << " pop " << dir_pop << " curdom " << *dir << endl;
if (dir->is_auth()) {
if (!dir->is_rep() &&
rd_adj = rdp / mds->get_mds_map()->get_num_mds() - rdp;
rd_adj /= 2.0; // temper somewhat
- dout(1) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl;
+ dout(2) << "replicating dir " << *dir << " pop " << dir_pop << " .. rdp " << rdp << " adj " << rd_adj << endl;
dir->dir_rep = CDir::REP_ALL;
mds->mdcache->send_dir_updates(dir, true);
dir->is_rep() &&
dir_pop < g_conf.mds_bal_unreplicate_threshold) {
// unreplicate
- dout(1) << "unreplicating dir " << *dir << " pop " << dir_pop << endl;
+ dout(2) << "unreplicating dir " << *dir << " pop " << dir_pop << endl;
dir->dir_rep = CDir::REP_NONE;
mds->mdcache->send_dir_updates(dir);
bufferlist bl;
bl.append((char*)&last_written, sizeof(last_written));
filer.write(inode, 0, bl.length(), bl, 0,
- 0, new C_WriteHead(this, last_written, oncommit));
+ 0,
+ new C_WriteHead(this, last_written, oncommit));
}
void Journaler::_finish_write_head(Header &wrote, Context *oncommit)
dout(10) << "flush flushing " << flush_pos << "~" << len << endl;
// submit write for anything pending
+ // flush _start_ pos to _finish_flush
filer.write(inode, flush_pos, len, write_buf, 0,
- new C_Flush(this, flush_pos), 0); // flush _start_ pos to _finish_flush
+ g_conf.journaler_safe ? 0:new C_Flush(this, flush_pos), // on ACK
+ g_conf.journaler_safe ? new C_Flush(this, flush_pos):0); // on COMMIT
pending_flush[flush_pos] = g_clock.now();
// adjust pointers