]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PrimaryLogPG: PG-wide backoffs
authorSage Weil <sage@redhat.com>
Wed, 1 Feb 2017 22:30:32 +0000 (17:30 -0500)
committerSage Weil <sage@redhat.com>
Fri, 10 Feb 2017 23:55:58 +0000 (18:55 -0500)
Issue at top of do_request.  Release on activation or peering
interval change.

Signed-off-by: Sage Weil <sage@redhat.com>
src/common/config_opts.h
src/osd/PG.cc
src/osd/PrimaryLogPG.cc

index ac27b480d2a944ed2d35b424b48e1907149248aa..6af9f43d71736a985086268c7ffa70bec4cd093f 100644 (file)
@@ -842,6 +842,8 @@ OPTION(osd_command_max_records, OPT_INT, 256)
 OPTION(osd_max_pg_blocked_by, OPT_U32, 16)    // max peer osds to report that are blocking our progress
 OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
 OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false)  // read fiemap-reported holes and verify they are zeros
+OPTION(osd_peering_aggressive_backoff, OPT_BOOL, false)  // issue aggressive client backoff during peering
+OPTION(osd_debug_crash_on_ignored_backoff, OPT_BOOL, false) // crash osd if client ignores a backoff; useful for debugging
 OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
 OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
 OPTION(osd_debug_drop_op_probability, OPT_DOUBLE, 0)   // probability of stalling/dropping a client op
index d8c7ab93582d219c46e0329d1f8eb3da28573d90..e0000c30abd10c890c15889ed2bc65fd881ffba3 100644 (file)
@@ -973,6 +973,8 @@ void PG::clear_primary_state()
 
   missing_loc.clear();
 
+  release_pg_backoffs();
+
   pg_log.reset_recovery_pointers();
 
   scrubber.reserved_peers.clear();
@@ -1836,6 +1838,7 @@ void PG::activate(ObjectStore::Transaction& t,
     }
 
     state_set(PG_STATE_ACTIVATING);
+    release_pg_backoffs();
   }
   if (is_primary()) {
     projected_last_update = info.last_update;
@@ -5543,6 +5546,8 @@ bool PG::can_discard_request(OpRequestRef& op)
   switch (op->get_req()->get_type()) {
   case CEPH_MSG_OSD_OP:
     return can_discard_op(op);
+  case CEPH_MSG_OSD_BACKOFF:
+    return false; // never discard
   case MSG_OSD_SUBOP:
     return can_discard_replica_op<MOSDSubOp, MSG_OSD_SUBOP>(op);
   case MSG_OSD_REPOP:
@@ -5591,6 +5596,9 @@ bool PG::op_must_wait_for_map(epoch_t cur_epoch, OpRequestRef& op)
       cur_epoch,
       static_cast<MOSDOp*>(op->get_req())->get_map_epoch());
 
+  case CEPH_MSG_OSD_BACKOFF:
+    return false; // we don't care about maps
+
   case MSG_OSD_SUBOP:
     return !have_same_or_newer_map(
       cur_epoch,
index 98d7559890f02ebee027dc12ac0ae94acc9f64aa..727b537c5726f32c4514893b524df3d5a1298e0d 100644 (file)
@@ -1627,6 +1627,47 @@ void PrimaryLogPG::do_request(
   if (can_discard_request(op)) {
     return;
   }
+
+  // pg-wide backoffs
+  Message *m = op->get_req();
+  if (m->get_connection()->has_feature(CEPH_FEATURE_RADOS_BACKOFF)) {
+    SessionRef session((Session *)m->get_connection()->get_priv());
+    if (!session)
+      return;  // drop it.
+    session->put();  // get_priv takes a ref, and so does the SessionRef
+
+    if (op->get_req()->get_type() == CEPH_MSG_OSD_OP) {
+      Backoff *b = session->have_backoff(info.pgid.pgid.get_hobj_start());
+      if (b) {
+       dout(10) << " have backoff " << *b << " " << *m << dendl;
+       assert(!b->is_acked() || !g_conf->osd_debug_crash_on_ignored_backoff);
+       return;
+      }
+
+      bool backoff =
+       is_down() ||
+       is_incomplete() ||
+       (!is_active() && is_peered());
+      if (g_conf->osd_peering_aggressive_backoff && !backoff) {
+       if (is_peering()) {
+         backoff = true;
+       }
+      }
+      if (backoff) {
+       add_pg_backoff(session);
+       return;
+      }
+    }
+    // pg backoff acks at pg-level
+    if (op->get_req()->get_type() == CEPH_MSG_OSD_BACKOFF) {
+      MOSDBackoff *ba = static_cast<MOSDBackoff*>(m);
+      if (ba->begin != ba->end) {
+       handle_backoff(op);
+       return;
+      }
+    }
+  }
+
   if (flushes_in_progress > 0) {
     dout(20) << flushes_in_progress
             << " flushes_in_progress pending "