* New OSD daemon command dump_scrub_reservations which reveals the
scrub reservations that are held for local (primary) and remote (replica) PGs.
+
+14.2.6
+------
+
+* The following OSD memory config options related to bluestore cache autotuning can now
+ be configured during runtime:
+
+ - osd_memory_base (default: 768 MB)
+ - osd_memory_cache_min (default: 128 MB)
+ - osd_memory_expected_fragmentation (default: 0.15)
+ - osd_memory_target (default: 4 GB)
+
+ The above options can be set with::
+
+ ceph config set global <option> <value>
Option("osd_memory_target", Option::TYPE_SIZE, Option::LEVEL_BASIC)
.set_default(4_G)
+ .set_min(896_M)
+ .set_flag(Option::FLAG_RUNTIME)
.add_see_also("bluestore_cache_autotune")
- .set_description("When tcmalloc and cache autotuning is enabled, try to keep this many bytes mapped in memory."),
+ .add_see_also("osd_memory_cache_min")
+ .add_see_also("osd_memory_base")
+ .set_description("When tcmalloc and cache autotuning is enabled, try to keep this many bytes mapped in memory.")
+ .set_long_description("The minimum value must be at least equal to osd_memory_base + osd_memory_cache_min."),
Option("osd_memory_target_cgroup_limit_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.8)
Option("osd_memory_base", Option::TYPE_SIZE, Option::LEVEL_DEV)
.set_default(768_M)
+ .set_flag(Option::FLAG_RUNTIME)
.add_see_also("bluestore_cache_autotune")
.set_description("When tcmalloc and cache autotuning is enabled, estimate the minimum amount of memory in bytes the OSD will need."),
Option("osd_memory_expected_fragmentation", Option::TYPE_FLOAT, Option::LEVEL_DEV)
.set_default(0.15)
.set_min_max(0.0, 1.0)
+ .set_flag(Option::FLAG_RUNTIME)
.add_see_also("bluestore_cache_autotune")
.set_description("When tcmalloc and cache autotuning is enabled, estimate the percent of memory fragmentation."),
Option("osd_memory_cache_min", Option::TYPE_SIZE, Option::LEVEL_DEV)
.set_default(128_M)
+ .set_min(128_M)
+ .set_flag(Option::FLAG_RUNTIME)
.add_see_also("bluestore_cache_autotune")
.set_description("When tcmalloc and cache autotuning is enabled, set the minimum amount of memory used for caches."),
{
std::unique_lock l(lock);
+ uint32_t prev_config_change = store->config_changed.load();
uint64_t base = store->osd_memory_base;
double fragmentation = store->osd_memory_expected_fragmentation;
uint64_t target = store->osd_memory_target;
bool interval_stats_trim = false;
while (!stop) {
+ // Update pcm cache settings if related configuration was changed
+ uint32_t cur_config_change = store->config_changed.load();
+ if (cur_config_change != prev_config_change) {
+ _update_cache_settings();
+ prev_config_change = cur_config_change;
+ }
+
// Before we trim, check and see if it's time to rebalance/resize.
double autotune_interval = store->cache_autotune_interval;
double resize_interval = store->osd_memory_cache_resize_interval;
}
}
+void BlueStore::MempoolThread::_update_cache_settings()
+{
+ // Nothing to do if pcm is not used.
+ if (pcm == nullptr) {
+ return;
+ }
+
+ auto cct = store->cct;
+ uint64_t target = store->osd_memory_target;
+ uint64_t base = store->osd_memory_base;
+ uint64_t min = store->osd_memory_cache_min;
+ uint64_t max = min;
+ double fragmentation = store->osd_memory_expected_fragmentation;
+
+ uint64_t ltarget = (1.0 - fragmentation) * target;
+ if (ltarget > base + min) {
+ max = ltarget - base;
+ }
+
+ // set pcm cache levels
+ pcm->set_target_memory(target);
+ pcm->set_min_memory(min);
+ pcm->set_max_memory(max);
+
+ ldout(cct, 5) << __func__ << " updated pcm target: " << target
+ << " pcm min: " << min
+ << " pcm max: " << max
+ << dendl;
+}
+
// =======================================================
// OmapIteratorImpl
"osd_memory_target_cgroup_limit_ratio",
"osd_memory_base",
"osd_memory_cache_min",
+ "osd_memory_expected_fragmentation",
"bluestore_cache_autotune",
"bluestore_cache_autotune_interval",
"bluestore_warn_on_legacy_statfs",
throttle_deferred_bytes.reset_max(
conf->bluestore_throttle_bytes + conf->bluestore_throttle_deferred_bytes);
}
+ if (changed.count("osd_memory_target") ||
+ changed.count("osd_memory_base") ||
+ changed.count("osd_memory_cache_min") ||
+ changed.count("osd_memory_expected_fragmentation")) {
+ _update_osd_memory_options();
+ }
}
void BlueStore::_set_compression()
<< std::dec << dendl;
}
+void BlueStore::_update_osd_memory_options()
+{
+ osd_memory_target = cct->_conf.get_val<Option::size_t>("osd_memory_target");
+ osd_memory_base = cct->_conf.get_val<Option::size_t>("osd_memory_base");
+ osd_memory_expected_fragmentation = cct->_conf.get_val<double>("osd_memory_expected_fragmentation");
+ osd_memory_cache_min = cct->_conf.get_val<Option::size_t>("osd_memory_cache_min");
+ config_changed++;
+ dout(10) << __func__
+ << " osd_memory_target " << osd_memory_target
+ << " osd_memory_base " << osd_memory_base
+ << " osd_memory_expected_fragmentation " << osd_memory_expected_fragmentation
+ << " osd_memory_cache_min " << osd_memory_cache_min
+ << dendl;
+}
+
int BlueStore::_set_cache_sizes()
{
ceph_assert(bdev);
double osd_memory_expected_fragmentation = 0; ///< expected memory fragmentation
uint64_t osd_memory_cache_min = 0; ///< Min memory to assign when autotuning cache
double osd_memory_cache_resize_interval = 0; ///< Time to wait between cache resizing
+ std::atomic<uint32_t> config_changed = {0}; ///< Counter to determine if there is a configuration change.
typedef map<uint64_t, volatile_statfs> osd_pools_map;
int64_t *mem_avail,
const std::list<std::shared_ptr<PriorityCache::PriCache>>& caches,
PriorityCache::Priority pri);
+ void _update_cache_settings();
} mempool_thread;
// --------------------------------------------------------
void _set_alloc_sizes();
void _set_blob_size();
void _set_finisher_num();
+ void _update_osd_memory_options();
int _open_bdev(bool create);
// Verifies if disk space is enough for reserved + min bluefs