]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
ReplicatedPG: only allow a degraded write if we have at least min_size copies 3767/head
authorSamuel Just <sjust@redhat.com>
Tue, 17 Feb 2015 18:29:59 +0000 (10:29 -0800)
committerSamuel Just <sjust@redhat.com>
Wed, 18 Feb 2015 06:40:12 +0000 (22:40 -0800)
Also, add a config option to disable the feature.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/common/config_opts.h
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h

index 7994e3e8fb4d158934501b69398ac4949c7d3c83..e7604db6f397f10ae3096756e577dd336a09c151 100644 (file)
@@ -646,6 +646,7 @@ OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "ful
 OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe)
 
 OPTION(osd_pg_object_context_cache_count, OPT_INT, 64)
+OPTION(osd_enable_degraded_writes, OPT_BOOL, true)
 
 // determines whether PGLog::check() compares written out log to stored log
 OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false)
index 6ab9c5d07b0739d92fb7d61dbc7fdd2382d366f8..965fee542fab7a51ebe32f91c0cecb27b61eb6de 100644 (file)
@@ -440,6 +440,36 @@ void ReplicatedPG::wait_for_all_missing(OpRequestRef op)
   op->mark_delayed("waiting for all missing");
 }
 
+bool ReplicatedPG::is_degraded_object(const hobject_t &soid, int *healthy_copies)
+{
+  bool degraded = false;
+  assert(healthy_copies);
+  *healthy_copies = 0;
+
+  if (pg_log.get_missing().missing.count(soid)) {
+    degraded = true;
+  } else {
+    *healthy_copies += 1;
+  }
+
+  for (set<pg_shard_t>::iterator i = actingbackfill.begin();
+       i != actingbackfill.end();
+       ++i) {
+    if (*i == get_primary()) continue;
+    pg_shard_t peer = *i;
+    if (peer_missing.count(peer) &&
+        peer_missing[peer].missing.count(soid)) {
+      degraded = true;
+      continue;
+    }
+
+    assert(peer_info.count(peer));
+    if (!peer_info[peer].is_incomplete())
+      *healthy_copies += 1;
+  }
+  return degraded;
+}
+
 bool ReplicatedPG::is_degraded_or_backfilling_object(const hobject_t& soid)
 {
   if (pg_log.get_missing().missing.count(soid))
@@ -1453,10 +1483,13 @@ void ReplicatedPG::do_op(OpRequestRef& op)
    *
    * We also block if our peers do not support DEGRADED_WRITES.
    */
-  if ((pool.info.ec_pool() ||
-       !(get_min_peer_features() & CEPH_FEATURE_OSD_DEGRADED_WRITES)) &&
-      write_ordered &&
-      is_degraded_or_backfilling_object(head)) {
+  int valid_copies = 0;
+  if (write_ordered &&
+      is_degraded_object(head, &valid_copies) &&
+      (valid_copies < pool.info.min_size ||
+       pool.info.ec_pool() ||
+       !cct->_conf->osd_enable_degraded_writes ||
+       !(get_min_peer_features() & CEPH_FEATURE_OSD_DEGRADED_WRITES))) {
     wait_for_degraded_object(head, op);
     return;
   }
index 50190a7fd0b7c91d35a02f73054f5a4ec02e866a..f128c71e8d442f7f70038736211d16ead890711c 100644 (file)
@@ -1511,6 +1511,11 @@ public:
   void wait_for_all_missing(OpRequestRef op);
 
   bool is_degraded_or_backfilling_object(const hobject_t& oid);
+
+  /* true if the object is missing on any peer, *healthy_copies will be
+   * set to the number of complete peers not missing the object
+   */
+  bool is_degraded_object(const hobject_t &oid, int *healthy_copies);
   void wait_for_degraded_object(const hobject_t& oid, OpRequestRef op);
 
   bool maybe_await_blocked_snapset(const hobject_t &soid, OpRequestRef op);