From: David Zafman Date: Tue, 28 Oct 2014 02:13:10 +0000 (-0700) Subject: osd, mon: Send initial pg create time from mon to osd X-Git-Tag: v0.89~40^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=52cb44c4dde337924342bb99f8ca8d1265421077;p=ceph.git osd, mon: Send initial pg create time from mon to osd Fixes: #9887 Signed-off-by: David Zafman --- diff --git a/src/messages/MOSDPGCreate.h b/src/messages/MOSDPGCreate.h index fcfb767807e3..a3dc3b72a395 100644 --- a/src/messages/MOSDPGCreate.h +++ b/src/messages/MOSDPGCreate.h @@ -25,15 +25,18 @@ struct MOSDPGCreate : public Message { - const static int HEAD_VERSION = 2; + const static int HEAD_VERSION = 3; + // At head_version 2 the unspecified compat_version was set to 2 + const static int COMPAT_VERSION = 2; version_t epoch; map mkpg; + map ctimes; MOSDPGCreate() - : Message(MSG_OSD_PG_CREATE, HEAD_VERSION) {} + : Message(MSG_OSD_PG_CREATE, HEAD_VERSION, COMPAT_VERSION) {} MOSDPGCreate(epoch_t e) - : Message(MSG_OSD_PG_CREATE, HEAD_VERSION), + : Message(MSG_OSD_PG_CREATE, HEAD_VERSION, COMPAT_VERSION), epoch(e) { } private: ~MOSDPGCreate() {} @@ -44,6 +47,7 @@ public: void encode_payload(uint64_t features) { ::encode(epoch, payload); ::encode(mkpg, payload); + ::encode(ctimes, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); @@ -65,14 +69,25 @@ public: mkpg[pgid] = pg_create_t(created, parent, split_bits); } } + if (header.version >= 3) { + ::decode(ctimes, p); + } else { + // To make other code simpler create map with time of 0,0 for each pg + for (map::const_iterator i = mkpg.begin(); + i != mkpg.end(); ++i) { + ctimes[i->first] = utime_t(); + } + } } void print(ostream& out) const { out << "osd_pg_create("; + map::const_iterator ci = ctimes.begin(); for (map::const_iterator i = mkpg.begin(); i != mkpg.end(); - ++i) { - out << "pg" << i->first << "," << i->second.created << "; "; + ++i, ++ci) { + assert(ci != ctimes.end() && ci->first == i->first); + out << "pg" << i->first << "," << i->second.created << "@" << ci->second << "; "; } out << ")"; } diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 2acae823da0d..9689501d4f6a 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -959,6 +959,7 @@ void PGMonitor::register_pg(pg_pool_t& pool, pg_t pgid, epoch_t epoch, bool new_ { pg_t parent; int split_bits = 0; + bool parent_found = false; if (!new_pool) { parent = pgid; while (1) { @@ -972,6 +973,7 @@ void PGMonitor::register_pg(pg_pool_t& pool, pg_t pgid, epoch_t epoch, bool new_ if (pg_map.pg_stat.count(parent) && pg_map.pg_stat[parent].state != PG_STATE_CREATING) { dout(10) << " parent is " << parent << dendl; + parent_found = true; break; } } @@ -983,10 +985,17 @@ void PGMonitor::register_pg(pg_pool_t& pool, pg_t pgid, epoch_t epoch, bool new_ stats.parent = parent; stats.parent_split_bits = split_bits; - utime_t now = ceph_clock_now(g_ceph_context); - stats.last_scrub_stamp = now; - stats.last_deep_scrub_stamp = now; - stats.last_clean_scrub_stamp = now; + if (parent_found) { + stats.last_scrub_stamp = pg_map.pg_stat[parent].last_scrub_stamp; + stats.last_deep_scrub_stamp = pg_map.pg_stat[parent].last_deep_scrub_stamp; + stats.last_clean_scrub_stamp = pg_map.pg_stat[parent].last_clean_scrub_stamp; + } else { + utime_t now = ceph_clock_now(g_ceph_context); + stats.last_scrub_stamp = now; + stats.last_deep_scrub_stamp = now; + stats.last_clean_scrub_stamp = now; + } + if (split_bits == 0) { dout(10) << "register_new_pgs will create " << pgid << dendl; @@ -1174,6 +1183,9 @@ void PGMonitor::send_pg_creates(int osd, Connection *con) m->mkpg[*q] = pg_create_t(pg_map.pg_stat[*q].created, pg_map.pg_stat[*q].parent, pg_map.pg_stat[*q].parent_split_bits); + // Need the create time from the monitor using his clock to set last_scrub_stamp + // upon pg creation. + m->ctimes[*q] = pg_map.pg_stat[*q].last_scrub_stamp; } if (con) { diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 322cf5c0f6b2..04d0e4a889f0 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -6929,9 +6929,11 @@ void OSD::handle_pg_create(OpRequestRef op) int num_created = 0; + map::iterator ci = m->ctimes.begin(); for (map::iterator p = m->mkpg.begin(); p != m->mkpg.end(); - ++p) { + ++p, ++ci) { + assert(ci != m->ctimes.end() && ci->first == p->first); epoch_t created = p->second.created; pg_t parent = p->second.parent; if (p->second.split_bits) // Skip split pgs @@ -6948,7 +6950,7 @@ void OSD::handle_pg_create(OpRequestRef op) continue; } - dout(20) << "mkpg " << on << " e" << created << dendl; + dout(20) << "mkpg " << on << " e" << created << "@" << ci->second << dendl; // is it still ours? vector up, acting; @@ -6986,9 +6988,16 @@ void OSD::handle_pg_create(OpRequestRef op) history.last_epoch_clean = created; // Newly created PGs don't need to scrub immediately, so mark them // as scrubbed at creation time. - utime_t now = ceph_clock_now(NULL); - history.last_scrub_stamp = now; - history.last_deep_scrub_stamp = now; + if (ci->second == utime_t()) { + // Older OSD doesn't send ctime, so just do what we did before + // The repair_test.py can fail in a mixed cluster + utime_t now = ceph_clock_now(NULL); + history.last_scrub_stamp = now; + history.last_deep_scrub_stamp = now; + } else { + history.last_scrub_stamp = ci->second; + history.last_deep_scrub_stamp = ci->second; + } bool valid_history = project_pg_history( pgid, history, created, up, up_primary, acting, acting_primary); /* the pg creation message must have come from a mon and therefore