sync_cond.Signal();
}
-int Client::_do_remount(void)
+int Client::_do_remount(bool retry_on_error)
{
+ uint64_t max_retries = g_conf().get_val<uint64_t>("mds_max_retries_on_remount_failure");
+
errno = 0;
int r = remount_cb(callback_handle);
- if (r != 0) {
+ if (r == 0) {
+ retries_on_invalidate = 0;
+ } else {
int e = errno;
client_t whoami = get_nodeid();
if (r == -1) {
"failed to remount (to trim kernel dentries): "
"return code = " << r << dendl;
}
- bool should_abort = cct->_conf.get_val<bool>("client_die_on_failed_remount") ||
- cct->_conf.get_val<bool>("client_die_on_failed_dentry_invalidate");
+ bool should_abort =
+ (cct->_conf.get_val<bool>("client_die_on_failed_remount") ||
+ cct->_conf.get_val<bool>("client_die_on_failed_dentry_invalidate")) &&
+ !(retry_on_error && (++retries_on_invalidate < max_retries));
if (should_abort && !unmounting) {
lderr(cct) << "failed to remount for kernel dentry trimming; quitting!" << dendl;
ceph_abort();
explicit C_Client_Remount(Client *c) : client(c) {}
void finish(int r) override {
ceph_assert(r == 0);
- client->_do_remount();
+ client->_do_remount(true);
}
};
r = 0;
} else if (remount_cb) {
ldout(cct, 1) << "using remount_cb" << dendl;
- r = _do_remount();
+ r = _do_remount(false);
}
if (r) {
bool should_abort = cct->_conf.get_val<bool>("client_die_on_failed_dentry_invalidate");
int _release_fh(Fh *fh);
void _put_fh(Fh *fh);
- int _do_remount(void);
+ int _do_remount(bool retry_on_error);
int _read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl, bool *checkeof);
int _read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl);
std::map<std::pair<int64_t,std::string>, int> pool_perms;
list<Cond*> waiting_for_pool_perm;
+
+ uint64_t retries_on_invalidate = 0;
};
/**
Option("mds_cap_revoke_eviction_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0)
.set_description("number of seconds after which clients which have not responded to cap revoke messages by the MDS are evicted."),
+
+ Option("mds_max_retries_on_remount_failure", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(5)
+ .set_description("number of consecutive failed remount attempts for invalidating kernel dcache after which client would abort."),
});
}