Make sure the message we got from the peer OSD is valid. Specifically,
this avoids a race like this:
- A marks down B
- B sends MOSDRepScrub
- A accepts connection from B as new
- A replies to scrub
- B crashes because msgr seq 1 < expected seq 1000+
See #8880 for the most recent fix for requests.
Fixes: #9555
Backport: giant, firefly
Signed-off-by: Sage Weil <sage@redhat.com>
case MSG_OSD_SCRUB:
handle_scrub(static_cast<MOSDScrub*>(m));
- break;
+ break;
case MSG_OSD_REP_SCRUB:
handle_rep_scrub(static_cast<MOSDRepScrub*>(m));
- break;
+ break;
// -- need OSDMap --
void OSD::handle_rep_scrub(MOSDRepScrub *m)
{
- dout(10) << "queueing MOSDRepScrub " << *m << dendl;
+ dout(10) << __func__ << " " << *m << dendl;
+ if (!require_self_aliveness(m, m->map_epoch)) {
+ m->put();
+ return;
+ }
+ if (!require_osd_peer(m)) {
+ m->put();
+ return;
+ }
+ if (osdmap->get_epoch() >= m->map_epoch &&
+ !require_same_peer_instance(m, osdmap, true)) {
+ m->put();
+ return;
+ }
+
rep_scrub_wq.queue(m);
}