From: Zhiqiang Wang Date: Thu, 5 Jan 2017 06:31:46 +0000 (+0800) Subject: osd: force recover the oldest missing object if too many logs X-Git-Tag: v12.0.3~128^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7c41d4cc6d1747a35d3861f339da5bf622c1589b;p=ceph.git osd: force recover the oldest missing object if too many logs When the oldest missing object of a pg is not recovered in a long time, pg log is not trimmed because min_last_complete_on_disk is not going further. This may accumulate too many logs in memory. Force recover the oldest missing object when the number of logs exceeds osd_force_recovery_pg_log_entries. Signed-off-by: Zhiqiang Wang --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 85121c1180e44..f8b6d707e7b9d 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -829,6 +829,7 @@ OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 150) // make this < map_cache_ OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim +OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT, 1.3) // max entries factor before force recovery OPTION(osd_pg_log_trim_min, OPT_U32, 100) OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy OPTION(osd_command_max_records, OPT_INT, 256) diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index e150704cb6b9f..1c2651c028e2e 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -672,6 +672,48 @@ void PrimaryLogPG::wait_for_blocked_object(const hobject_t& soid, OpRequestRef o op->mark_delayed("waiting for blocked object"); } +void PrimaryLogPG::maybe_force_recovery() +{ + // no force if not in degraded/recovery/backfill stats + if (!is_degraded() && + !state_test(PG_STATE_RECOVERING | + PG_STATE_RECOVERY_WAIT | + PG_STATE_BACKFILL | + PG_STATE_BACKFILL_WAIT | + PG_STATE_BACKFILL_TOOFULL)) + return; + + if (pg_log.get_log().approx_size() < + cct->_conf->osd_max_pg_log_entries * + cct->_conf->osd_force_recovery_pg_log_entries_factor) + return; + + // find the oldest missing object + version_t min_version = 0; + hobject_t soid; + if (!pg_log.get_missing().get_items().empty()) { + min_version = pg_log.get_missing().get_rmissing().begin()->first; + soid = pg_log.get_missing().get_rmissing().begin()->second; + } + assert(!actingbackfill.empty()); + for (set::iterator it = actingbackfill.begin(); + it != actingbackfill.end(); + ++it) { + if (*it == get_primary()) continue; + pg_shard_t peer = *it; + if (peer_missing.count(peer) && + !peer_missing[peer].get_items().empty() && + min_version > peer_missing[peer].get_rmissing().begin()->first) { + min_version = peer_missing[peer].get_rmissing().begin()->first; + soid = peer_missing[peer].get_rmissing().begin()->second; + } + } + + // recover it + if (soid != hobject_t()) + maybe_kick_recovery(soid); +} + class PGLSPlainFilter : public PGLSFilter { string val; public: @@ -2303,6 +2345,9 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } else if (op->may_write() || op->may_cache()) { osd->logger->tinc(l_osd_op_w_prepare_lat, prepare_latency); } + + // force recovery of the oldest missing object if too many logs + maybe_force_recovery(); } void PrimaryLogPG::record_write_error(OpRequestRef op, const hobject_t &soid, diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 50b32365153b1..17c3ab5f228ae 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1579,6 +1579,8 @@ public: void wait_for_blocked_object(const hobject_t& soid, OpRequestRef op); void kick_object_context_blocked(ObjectContextRef obc); + void maybe_force_recovery(); + void mark_all_unfound_lost( int what, ConnectionRef con,