m_msgr(msgr),
m_listener(this),
m_last_blocklist_check(ceph_clock_now()),
+ m_last_failure_check(ceph_clock_now()),
m_local(new librados::Rados()) {
auto thread_pool = &(cct->lookup_or_create_singleton_object<ThreadPoolSingleton>(
"cephfs::mirror::thread_pool", false, cct));
auto &mirror_action = m_mirror_actions.at(filesystem);
if (is_restart) {
- ceph_assert(mirror_action.action_in_progress);
mirror_action.fs_mirror.reset();
} else {
ceph_assert(!mirror_action.action_in_progress);
std::scoped_lock locker(m_lock);
auto &mirror_action = m_mirror_actions.at(filesystem);
- if (!mirror_action.fs_mirror->is_failed()) {
+ if (!mirror_action.fs_mirror->is_init_failed()) {
ceph_assert(mirror_action.action_in_progress);
mirror_action.action_in_progress = false;
m_cond.notify_all();
ceph_assert(mirror_action.fs_mirror);
ceph_assert(!mirror_action.action_in_progress);
- if (mirror_action.fs_mirror->is_failed()) {
+ if (mirror_action.fs_mirror->is_init_failed()) {
dout(10) << ": init failed for filesystem=" << filesystem << dendl;
m_work_queue->queue(on_finish, -EINVAL);
return;
void Mirror::update_fs_mirrors() {
dout(20) << dendl;
+ auto now = ceph_clock_now();
double blocklist_interval = g_ceph_context->_conf.get_val<std::chrono::seconds>
("cephfs_mirror_restart_mirror_on_blocklist_interval").count();
- auto now = ceph_clock_now();
bool check_blocklist = blocklist_interval > 0 && ((now - m_last_blocklist_check) >= blocklist_interval);
+ double failed_interval = g_ceph_context->_conf.get_val<std::chrono::seconds>
+ ("cephfs_mirror_restart_mirror_on_failure_interval").count();
+ bool check_failure = failed_interval > 0 && ((now - m_last_failure_check) >= failed_interval);
+
{
std::scoped_lock locker(m_lock);
for (auto &[filesystem, mirror_action] : m_mirror_actions) {
- if (check_blocklist && !mirror_action.action_in_progress
- && mirror_action.fs_mirror && mirror_action.fs_mirror->is_blocklisted()) {
+ if (check_failure && !mirror_action.action_in_progress &&
+ mirror_action.fs_mirror && mirror_action.fs_mirror->is_failed()) {
+ // about to restart failed mirror instance -- nothing
+ // should interfere
+ dout(5) << ": filesystem=" << filesystem << " failed mirroring -- restarting" << dendl;
+ auto peers = mirror_action.fs_mirror->get_peers();
+ mirror_action.action_ctxs.push_front(
+ new C_RestartMirroring(this, filesystem, mirror_action.pool_id, peers));
+ } else if (check_blocklist && !mirror_action.action_in_progress &&
+ mirror_action.fs_mirror && mirror_action.fs_mirror->is_blocklisted()) {
// about to restart blocklisted mirror instance -- nothing
// should interfere
dout(5) << ": filesystem=" << filesystem << " is blocklisted -- restarting" << dendl;
if (check_blocklist) {
m_last_blocklist_check = now;
}
+ if (check_failure) {
+ m_last_failure_check = now;
+ }
}
schedule_mirror_update_task();
{return !mirror_action.action_in_progress;});
if (mirror_action.fs_mirror &&
!mirror_action.fs_mirror->is_stopping() &&
- !mirror_action.fs_mirror->is_failed()) {
+ !mirror_action.fs_mirror->is_init_failed()) {
C_SaferCond cond;
mirror_action.fs_mirror->shutdown(new C_AsyncCallback<ContextWQ>(m_work_queue, &cond));
int r = cond.wait();
dout(10) << ": shutdown filesystem=" << filesystem << ", r=" << r << dendl;
}
+
+ mirror_action.fs_mirror.reset();
}
}