Dumping huge caches (~ 1G) to formatter causes mds to
hang or get terminated. Until the underlying issues is
fixed, disallow dumping cache if cache usage exceeds this
threshold. Also, Patrick feels a similar issue might be
hiding there for cache dumps to file when cache sizes
are really huge. This case has not been been reproduced
yet, hence, the limit for cache dumps to file is currently
kept not capped.
Fixes: http://tracker.ceph.com/issues/37609
Signed-off-by: Venky Shankar <vshankar@redhat.com>
(cherry picked from commit
6be2ce98c629d2cfe9d8443659e59600148b7675)
Conflicts:
src/common/options.cc
src/mds/MDCache.cc
Minor fix for fetching config option with g_conf.
Option("mds_max_retries_on_remount_failure", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(5)
.set_description("number of consecutive failed remount attempts for invalidating kernel dcache after which client would abort."),
+
+ Option("mds_dump_cache_threshold_formatter", Option::TYPE_SIZE, Option::LEVEL_DEV)
+ .set_default(1_G)
+ .set_description("threshold for cache usage to disallow \"dump cache\" operation to formatter")
+ .set_long_description("Disallow MDS from dumping caches to formatter via \"dump cache\" command if cache usage exceeds this threshold."),
+
+ Option("mds_dump_cache_threshold_file", Option::TYPE_SIZE, Option::LEVEL_DEV)
+ .set_default(0)
+ .set_description("threshold for cache usage to disallow \"dump cache\" operation to file")
+ .set_long_description("Disallow MDS from dumping caches to file via \"dump cache\" command if cache usage exceeds this threshold."),
});
}
int MDCache::dump_cache(std::string_view fn, Formatter *f)
{
int r = 0;
+
+ // dumping large caches may cause mds to hang or worse get killed.
+ // so, disallow the dump if the cache size exceeds the configured
+ // threshold, which is 1G for formatter and unlimited for file (note
+ // that this can be jacked up by the admin... and is nothing but foot
+ // shooting, but the option itself is for devs and hence dangerous to
+ // tune). TODO: remove this when fixed.
+ uint64_t threshold = f ?
+ g_conf->get_val<Option::size_t>("mds_dump_cache_threshold_formatter") :
+ g_conf->get_val<Option::size_t>("mds_dump_cache_threshold_file");
+
+ if (threshold && cache_size() > threshold) {
+ if (f) {
+ std::stringstream ss;
+ ss << "cache usage exceeds dump threshold";
+ f->open_object_section("result");
+ f->dump_string("error", ss.str());
+ f->close_section();
+ } else {
+ derr << "cache usage exceeds dump threshold" << dendl;
+ r = -EINVAL;
+ }
+ return r;
+ }
+
+ r = 0;
int fd = -1;
if (f) {
"mds_cache_reservation",
"mds_health_cache_threshold",
"mds_cache_mid",
+ "mds_dump_cache_threshold_formatter",
+ "mds_dump_cache_threshold_file",
// MDBalancer
"mds_bal_fragment_dirs",
"mds_bal_fragment_interval",