]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: force recover the oldest missing object if too many logs 12882/head
authorZhiqiang Wang <zhiqiang@xsky.com>
Thu, 5 Jan 2017 06:31:46 +0000 (14:31 +0800)
committerZhiqiang Wang <zhiqiang@xsky.com>
Fri, 13 Jan 2017 05:32:34 +0000 (13:32 +0800)
When the oldest missing object of a pg is not recovered in a long
time, pg log is not trimmed because min_last_complete_on_disk is
not going further. This may accumulate too many logs in memory.
Force recover the oldest missing object when the number of logs
exceeds osd_force_recovery_pg_log_entries.

Signed-off-by: Zhiqiang Wang <zhiqiang@xsky.com>
src/common/config_opts.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h

index 85121c1180e4490328e2d1f163d03d240af37ae8..f8b6d707e7b9d349fcc179fef6a2c6a11080895d 100644 (file)
@@ -829,6 +829,7 @@ OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 150) // make this < map_cache_
 
 OPTION(osd_min_pg_log_entries, OPT_U32, 3000)  // number of entries to keep in the pg log when trimming it
 OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim
+OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT, 1.3) // max entries factor before force recovery
 OPTION(osd_pg_log_trim_min, OPT_U32, 100)
 OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
 OPTION(osd_command_max_records, OPT_INT, 256)
index e150704cb6b9fd3eeb21ac3dd0d7a3370b5a9264..1c2651c028e2e8c8446ab6ae03257e8dcd55ab32 100644 (file)
@@ -672,6 +672,48 @@ void PrimaryLogPG::wait_for_blocked_object(const hobject_t& soid, OpRequestRef o
   op->mark_delayed("waiting for blocked object");
 }
 
+void PrimaryLogPG::maybe_force_recovery()
+{
+  // no force if not in degraded/recovery/backfill stats
+  if (!is_degraded() &&
+      !state_test(PG_STATE_RECOVERING |
+                  PG_STATE_RECOVERY_WAIT |
+                 PG_STATE_BACKFILL |
+                 PG_STATE_BACKFILL_WAIT |
+                 PG_STATE_BACKFILL_TOOFULL))
+    return;
+
+  if (pg_log.get_log().approx_size() <
+      cct->_conf->osd_max_pg_log_entries *
+        cct->_conf->osd_force_recovery_pg_log_entries_factor)
+    return;
+
+  // find the oldest missing object
+  version_t min_version = 0;
+  hobject_t soid;
+  if (!pg_log.get_missing().get_items().empty()) {
+    min_version = pg_log.get_missing().get_rmissing().begin()->first;
+    soid = pg_log.get_missing().get_rmissing().begin()->second;
+  }
+  assert(!actingbackfill.empty());
+  for (set<pg_shard_t>::iterator it = actingbackfill.begin();
+       it != actingbackfill.end();
+       ++it) {
+    if (*it == get_primary()) continue;
+    pg_shard_t peer = *it;
+    if (peer_missing.count(peer) &&
+       !peer_missing[peer].get_items().empty() &&
+       min_version > peer_missing[peer].get_rmissing().begin()->first) {
+      min_version = peer_missing[peer].get_rmissing().begin()->first;
+      soid = peer_missing[peer].get_rmissing().begin()->second;
+    }
+  }
+
+  // recover it
+  if (soid != hobject_t())
+    maybe_kick_recovery(soid);
+}
+
 class PGLSPlainFilter : public PGLSFilter {
   string val;
 public:
@@ -2303,6 +2345,9 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
   } else if (op->may_write() || op->may_cache()) {
     osd->logger->tinc(l_osd_op_w_prepare_lat, prepare_latency);
   }
+
+  // force recovery of the oldest missing object if too many logs
+  maybe_force_recovery();
 }
 
 void PrimaryLogPG::record_write_error(OpRequestRef op, const hobject_t &soid,
index 50b32365153b1659b70a993c7bb167e87cc5a8d0..17c3ab5f228ae3cec9ce7124471508b46405858e 100644 (file)
@@ -1579,6 +1579,8 @@ public:
   void wait_for_blocked_object(const hobject_t& soid, OpRequestRef op);
   void kick_object_context_blocked(ObjectContextRef obc);
 
+  void maybe_force_recovery();
+
   void mark_all_unfound_lost(
     int what,
     ConnectionRef con,