// Bounds how infrequently a new map epoch will be persisted for a pg
OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32) // make this < map_cache_size!
+OPTION(osd_target_pg_log_entries_per_osd, OPT_U32)
OPTION(osd_min_pg_log_entries, OPT_U32) // number of entries to keep in the pg log when trimming it
OPTION(osd_max_pg_log_entries, OPT_U32) // max entries, say when degraded, before we trim
OPTION(osd_pg_log_dups_tracked, OPT_U32) // how many versions back to track combined in both pglog's regular + dup logs
.set_default(40)
.set_description(""),
+ Option("osd_target_pg_log_entries_per_osd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(3000 * 100)
+ .set_description("target number of PG entries total on an OSD")
+ .add_see_also("osd_max_pg_log_entries")
+ .add_see_also("osd_min_pg_log_entries"),
+
Option("osd_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
- .set_default(3000)
+ .set_default(250)
.set_description("minimum number of entries to maintain in the PG log")
.add_service("osd")
.add_see_also("osd_max_pg_log_entries")
.add_see_also("osd_pg_log_dups_tracked"),
Option("osd_max_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
- .set_default(3000)
+ .set_default(10000)
.set_description("maximum number of entries to maintain in the PG log when degraded before we trim")
.add_service("osd")
.add_see_also("osd_min_pg_log_entries")
}
}
+unsigned PG::get_target_pg_log_entries() const
+{
+ const unsigned num_pgs = shard_services.get_pg_num();
+ const unsigned target =
+ local_conf().get_val<uint64_t>("osd_target_pg_log_entries_per_osd");
+ const unsigned min_pg_log_entries =
+ local_conf().get_val<uint64_t>("osd_min_pg_log_entries");
+ if (num_pgs > 0 && target > 0) {
+ // target an even spread of our budgeted log entries across all
+ // PGs. note that while we only get to control the entry count
+ // for primary PGs, we'll normally be responsible for a mix of
+ // primary and replica PGs (for the same pool(s) even), so this
+ // will work out.
+ const unsigned max_pg_log_entries =
+ local_conf().get_val<uint64_t>("osd_max_pg_log_entries");
+ return std::clamp(target / num_pgs,
+ min_pg_log_entries,
+ max_pg_log_entries);
+ } else {
+ // fall back to a per-pg value.
+ return min_pg_log_entries;
+ }
+}
+
void PG::on_activate(interval_set<snapid_t>)
{
projected_last_update = peering_state.get_info().last_update;
ceph::timespan delay) final;
void recheck_readable() final;
+ unsigned get_target_pg_log_entries() const final;
+
void on_pool_change() final {
// Not needed yet
}
return true;
}
+unsigned OSDService::get_target_pg_log_entries() const
+{
+ auto num_pgs = osd->get_num_pgs();
+ auto target = cct->_conf->osd_target_pg_log_entries_per_osd;
+ if (num_pgs > 0 && target > 0) {
+ // target an even spread of our budgeted log entries across all
+ // PGs. note that while we only get to control the entry count
+ // for primary PGs, we'll normally be responsible for a mix of
+ // primary and replica PGs (for the same pool(s) even), so this
+ // will work out.
+ return std::max<unsigned>(
+ std::min<unsigned>(target / num_pgs,
+ cct->_conf->osd_max_pg_log_entries),
+ cct->_conf->osd_min_pg_log_entries);
+ } else {
+ // fall back to a per-pg value.
+ return cct->_conf->osd_min_pg_log_entries;
+ }
+}
+
void OSD::do_recovery(
PG *pg, epoch_t queued, uint64_t reserved_pushes,
ThreadPool::TPHandle &handle)
return awaiting.second.get() == pg;
});
}
+
+ unsigned get_target_pg_log_entries() const;
+
// delayed pg activation
void queue_for_recovery(PG *pg) {
std::lock_guard l(recovery_lock);
}
}
+unsigned PG::get_target_pg_log_entries() const
+{
+ return osd->get_target_pg_log_entries();
+}
+
void PG::clear_publish_stats()
{
dout(15) << "clear_stats" << dendl;
uint64_t get_snap_trimq_size() const override {
return snap_trimq.size();
}
+ unsigned get_target_pg_log_entries() const override;
void clear_publish_stats() override;
void clear_primary_state() override;
PG_STATE_BACKFILLING |
PG_STATE_BACKFILL_WAIT |
PG_STATE_BACKFILL_TOOFULL)) {
- target = cct->_conf->osd_max_pg_log_entries;
+ target = pl->get_target_pg_log_entries();
}
eversion_t limit = std::min(
PG_STATE_BACKFILLING |
PG_STATE_BACKFILL_WAIT |
PG_STATE_BACKFILL_TOOFULL)) {
- target = cct->_conf->osd_max_pg_log_entries;
+ target = pl->get_target_pg_log_entries();
}
// limit pg log trimming up to the can_rollback_to value
eversion_t limit = std::min(
virtual void queue_check_readable(epoch_t lpr, ceph::timespan delay) = 0;
virtual void recheck_readable() = 0;
+ virtual unsigned get_target_pg_log_entries() const = 0;
+
// ============ Flush state ==================
/**
* try_flush_or_schedule_async()