osd_heartbeat_interval: 1,
osd_heartbeat_grace: 30,
osd_mon_report_interval: 5, // pg stats, failures, up_thru, boot.
- osd_replay_window: 5,
+ osd_replay_window: 45,
osd_max_pull: 2,
osd_pad_pg_log: false,
void OSD::queue_want_up_thru(epoch_t want)
{
+ epoch_t cur = osdmap->get_up_thru(whoami);
if (want > up_thru_wanted) {
+ dout(10) << "queue_want_up_thru now " << want << " (was " << up_thru_wanted << ")"
+ << ", currently " << cur
+ << dendl;
up_thru_wanted = want;
// expedite, a bit. WARNING this will somewhat delay other mon queries.
last_mon_report = g_clock.now();
send_alive();
+ } else {
+ dout(10) << "queue_want_up_thru want " << want << " <= queued " << up_thru_wanted
+ << ", currently " << cur
+ << dendl;
}
}
if (!osdmap->exists(whoami))
return;
epoch_t up_thru = osdmap->get_up_thru(whoami);
- if (up_thru_wanted < up_thru) {
+ dout(10) << "send_alive up_thru currently " << up_thru << " want " << up_thru_wanted << dendl;
+ if (up_thru_wanted > up_thru) {
up_thru_pending = up_thru_wanted;
int mon = monmap->pick_mon();
dout(10) << "send_alive to mon" << mon << " (want " << up_thru_wanted << ")" << dendl;
osdmap->get_addr(whoami) == messenger->get_myaddr()) {
// yay!
activate_map(t);
-
+
// process waiters
take_waiters(waiting_for_osdmap);
}
for (hash_map<pg_t,PG*>::iterator it = pg_map.begin();
it != pg_map.end();
it++) {
- //pg_t pgid = it->first;
PG *pg = it->second;
pg->lock();
if (pg->is_active()) {
// update started counter
pg->info.history.last_epoch_started = osdmap->get_epoch();
- }
+ }
else if (pg->get_role() == 0 && !pg->is_active()) {
// i am (inactive) primary
pg->build_prior();
pg->peer(t, query_map, &info_map);
}
else if (pg->is_stray() &&
- pg->get_primary() >= 0) {
+ pg->get_primary() >= 0) {
// i am residual|replica
notify_list[pg->get_primary()].push_back(pg->info);
}
pg->unlock();
}
+ last_active_epoch = osdmap->get_epoch();
+
do_notifies(notify_list); // notify? (residual|replica)
do_queries(query_map);
do_infos(info_map);
private:
/** superblock **/
OSDSuperblock superblock;
- epoch_t boot_epoch;
+ epoch_t boot_epoch;
+ epoch_t last_active_epoch;
void write_superblock();
void write_superblock(ObjectStore::Transaction& t);
return n;
}
+ int get_state(int o) {
+ assert(o < max_osd);
+ return osd_state[o];
+ }
void set_state(int o, unsigned s) {
assert(o < max_osd);
osd_state[o] = s;
return -1;
}
- void mark_down(int o, bool clean) {
- osd_state[o] &= ~CEPH_OSD_UP;
- }
- void mark_up(int o) {
- osd_state[o] |= CEPH_OSD_UP;
- }
- void mark_out(int o) {
- set_offload(o, CEPH_OSD_OUT);
- }
- void mark_in(int o) {
- set_offload(o, CEPH_OSD_IN);
- }
-
void apply_incremental(Incremental &inc) {
if (inc.epoch == 1)
fsid = inc.fsid;
int num_still_up_or_clean = 0;
for (unsigned i=0; i<acting.size(); i++) {
if (osd->osdmap->is_up(acting[i])) { // is up now
- num_still_up_or_clean++;
if (acting[i] != osd->whoami) // and is not me
prior_set.insert(acting[i]);
+
+ // has it been up this whole time?
+ if (osd->osdmap->get_up_from(acting[i]) <= first_epoch)
+ num_still_up_or_clean++;
} else {
dout(10) << "build_prior prior osd" << acting[i] << " is down, must notify mon" << dendl;
must_notify_mon = true;
num_unacked--;
dout(15) << "handle_osd_modify_reply ack" << dendl;
+ /*
+ osd uses v to reorder during replay, but doesn't preserve it
if (wr->tid_version.count(tid) &&
wr->tid_version[tid].version != m->get_version().version) {
dout(-10) << "handle_osd_modify_reply WARNING: replay of tid " << tid
<< " did not achieve previous ordering" << dendl;
}
+ */
wr->tid_version[tid] = m->get_version();
if (wr->waitfor_ack.empty()) {
}
if (m->is_safe()) {
// safe
+ /*
+ osd uses v to reorder during replay, but doesn't preserve it
assert(wr->tid_version.count(tid) == 0 ||
m->get_version() == wr->tid_version[tid]);
+ */
wr->waitfor_commit.erase(tid);
num_uncommitted--;
int n = osdmap.get_max_osd();
int count[n];
for (int i=0; i<n; i++) {
- osdmap.mark_up(i);
- osdmap.mark_in(i);
+ osdmap.set_state(i, osdmap.get_state(i) | CEPH_OSD_UP);
+ osdmap.set_offload(i, CEPH_OSD_IN);
count[i] = 0;
}
for osd in 0 1 2 3 #4 5 6 7
do
$CEPH_BIN/cosd --mkfs_for_osd $osd dev/osd$osd # initialize empty object store
- $CEPH_BIN/cosd $ARGS dev/osd$osd --debug_ms 1 --debug_osd 20 --debug_fakestore 10 #--debug_osd 40
+ #valgrind --tool=massif $CEPH_BIN/cosd dev/osd$osd --debug_ms 1 --debug_osd 20 --debug_fakestore 10 1>out/o$osd & #--debug_osd 40
+ $CEPH_BIN/cosd dev/osd$osd -d --debug_ms 1 --debug_osd 20 --debug_fakestore 10
done
# mds