OPTION(osd_max_pg_blocked_by, OPT_U32, 16) // max peer osds to report that are blocking our progress
OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros
+OPTION(osd_peering_aggressive_backoff, OPT_BOOL, false) // issue aggressive client backoff during peering
+OPTION(osd_debug_crash_on_ignored_backoff, OPT_BOOL, false) // crash osd if client ignores a backoff; useful for debugging
OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
OPTION(osd_debug_drop_op_probability, OPT_DOUBLE, 0) // probability of stalling/dropping a client op
missing_loc.clear();
+ release_pg_backoffs();
+
pg_log.reset_recovery_pointers();
scrubber.reserved_peers.clear();
}
state_set(PG_STATE_ACTIVATING);
+ release_pg_backoffs();
}
if (is_primary()) {
projected_last_update = info.last_update;
switch (op->get_req()->get_type()) {
case CEPH_MSG_OSD_OP:
return can_discard_op(op);
+ case CEPH_MSG_OSD_BACKOFF:
+ return false; // never discard
case MSG_OSD_SUBOP:
return can_discard_replica_op<MOSDSubOp, MSG_OSD_SUBOP>(op);
case MSG_OSD_REPOP:
cur_epoch,
static_cast<MOSDOp*>(op->get_req())->get_map_epoch());
+ case CEPH_MSG_OSD_BACKOFF:
+ return false; // we don't care about maps
+
case MSG_OSD_SUBOP:
return !have_same_or_newer_map(
cur_epoch,
if (can_discard_request(op)) {
return;
}
+
+ // pg-wide backoffs
+ Message *m = op->get_req();
+ if (m->get_connection()->has_feature(CEPH_FEATURE_RADOS_BACKOFF)) {
+ SessionRef session((Session *)m->get_connection()->get_priv());
+ if (!session)
+ return; // drop it.
+ session->put(); // get_priv takes a ref, and so does the SessionRef
+
+ if (op->get_req()->get_type() == CEPH_MSG_OSD_OP) {
+ Backoff *b = session->have_backoff(info.pgid.pgid.get_hobj_start());
+ if (b) {
+ dout(10) << " have backoff " << *b << " " << *m << dendl;
+ assert(!b->is_acked() || !g_conf->osd_debug_crash_on_ignored_backoff);
+ return;
+ }
+
+ bool backoff =
+ is_down() ||
+ is_incomplete() ||
+ (!is_active() && is_peered());
+ if (g_conf->osd_peering_aggressive_backoff && !backoff) {
+ if (is_peering()) {
+ backoff = true;
+ }
+ }
+ if (backoff) {
+ add_pg_backoff(session);
+ return;
+ }
+ }
+ // pg backoff acks at pg-level
+ if (op->get_req()->get_type() == CEPH_MSG_OSD_BACKOFF) {
+ MOSDBackoff *ba = static_cast<MOSDBackoff*>(m);
+ if (ba->begin != ba->end) {
+ handle_backoff(op);
+ return;
+ }
+ }
+ }
+
if (flushes_in_progress > 0) {
dout(20) << flushes_in_progress
<< " flushes_in_progress pending "