std::stringstream *ss);
- void set_health_checks(health_check_map_t&& c) {
+ bool set_health_checks(health_check_map_t&& c) {
+ // when health checks change a report is immediately sent to the monitors.
+ // currently modules have static health check details, but this equality
+ // test could be made smarter if too much noise shows up in the future.
+ bool changed = health_checks != c;
health_checks = std::move(c);
+ return changed;
}
void get_health_checks(health_check_map_t *checks);
#include "PyModule.h"
#include "ActivePyModules.h"
+#include "DaemonServer.h"
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_mgr
std::map<std::string, std::string> store_data,
DaemonStateIndex &ds, ClusterState &cs,
MonClient &mc, LogChannelRef clog_, Objecter &objecter_,
- Client &client_, Finisher &f)
+ Client &client_, Finisher &f, DaemonServer &server)
: module_config(module_config_), daemon_state(ds), cluster_state(cs),
monc(mc), clog(clog_), objecter(objecter_), client(client_), finisher(f),
- lock("ActivePyModules")
+ server(server), lock("ActivePyModules")
{
store_cache = std::move(store_data);
}
void ActivePyModules::set_health_checks(const std::string& module_name,
health_check_map_t&& checks)
{
- Mutex::Locker l(lock);
+ bool changed = false;
+
+ lock.Lock();
auto p = modules.find(module_name);
if (p != modules.end()) {
- p->second->set_health_checks(std::move(checks));
+ changed = p->second->set_health_checks(std::move(checks));
}
+ lock.Unlock();
+
+ // immediately schedule a report to be sent to the monitors with the new
+ // health checks that have changed. This is done asynchronusly to avoid
+ // blocking python land. ActivePyModules::lock needs to be dropped to make
+ // lockdep happy:
+ //
+ // send_report callers: DaemonServer::lock -> PyModuleRegistery::lock
+ // active_start: PyModuleRegistry::lock -> ActivePyModules::lock
+ //
+ // if we don't release this->lock before calling schedule_tick a cycle is
+ // formed with the addition of ActivePyModules::lock -> DaemonServer::lock.
+ // This is still correct as send_report is run asynchronously under
+ // DaemonServer::lock.
+ if (changed)
+ server.schedule_tick(0);
}
int ActivePyModules::handle_command(
#include "ClusterState.h"
class health_check_map_t;
+class DaemonServer;
class ActivePyModules
{
Objecter &objecter;
Client &client;
Finisher &finisher;
+ DaemonServer &server;
mutable Mutex lock{"ActivePyModules::lock"};
std::map<std::string, std::string> store_data,
DaemonStateIndex &ds, ClusterState &cs, MonClient &mc,
LogChannelRef clog_, Objecter &objecter_, Client &client_,
- Finisher &f);
+ Finisher &f, DaemonServer &server);
~ActivePyModules();
g_conf()->auth_service_required :
g_conf()->auth_supported),
lock("DaemonServer"),
- pgmap_ready(false)
+ pgmap_ready(false),
+ timer(g_ceph_context, lock),
+ shutting_down(false),
+ tick_event(nullptr)
{
g_conf().add_observer(this);
}
started_at = ceph_clock_now();
+ Mutex::Locker l(lock);
+ timer.init();
+
+ schedule_tick_locked(
+ g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count());
+
return 0;
}
}
}
+void DaemonServer::tick()
+{
+ dout(10) << dendl;
+ send_report();
+
+ schedule_tick_locked(
+ g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count());
+}
+
+// Currently modules do not set health checks in response to events delivered to
+// all modules (e.g. notify) so we do not risk a thundering hurd situation here.
+// if this pattern emerges in the future, this scheduler could be modified to
+// fire after all modules have had a chance to set their health checks.
+void DaemonServer::schedule_tick_locked(double delay_sec)
+{
+ if (tick_event) {
+ timer.cancel_event(tick_event);
+ tick_event = nullptr;
+ }
+
+ // on shutdown start rejecting explicit requests to send reports that may
+ // originate from python land which may still be running.
+ if (shutting_down)
+ return;
+
+ tick_event = timer.add_event_after(delay_sec,
+ new FunctionContext([this](int r) {
+ tick();
+ }));
+}
+
+void DaemonServer::schedule_tick(double delay_sec)
+{
+ Mutex::Locker l(lock);
+ schedule_tick_locked(delay_sec);
+}
+
void DaemonServer::shutdown()
{
dout(10) << "begin" << dendl;
msgr->shutdown();
msgr->wait();
dout(10) << "done" << dendl;
+
+ Mutex::Locker l(lock);
+ shutting_down = true;
+ timer.shutdown();
}
static DaemonKey key_from_service(
#include "common/Mutex.h"
#include "common/LogClient.h"
+#include "common/Timer.h"
#include <msg/Messenger.h>
#include <mon/MonClient.h>
std::set<int32_t> reported_osds;
void maybe_ready(int32_t osd_id);
+ SafeTimer timer;
+ bool shutting_down;
+ Context *tick_event;
+ void tick();
+ void schedule_tick_locked(double delay_sec);
+
public:
int init(uint64_t gid, entity_addrvec_t client_addrs);
void shutdown();
virtual const char** get_tracked_conf_keys() const override;
virtual void handle_conf_change(const ConfigProxy& conf,
const std::set <std::string> &changed) override;
+
+ void schedule_tick(double delay_sec);
};
#endif
// assume finisher already initialized in background_init
dout(4) << "starting python modules..." << dendl;
py_module_registry->active_start(daemon_state, cluster_state,
- kv_store, *monc, clog, *objecter, *client, finisher);
+ kv_store, *monc, clog, *objecter, *client, finisher, server);
dout(4) << "Complete." << dendl;
initializing = false;
}
}
-void Mgr::tick()
-{
- dout(10) << dendl;
- server.send_report();
-}
-
std::map<std::string, std::string> Mgr::get_services() const
{
Mutex::Locker l(lock);
bool ms_dispatch(Message *m);
- void tick();
-
void background_init(Context *completion);
void shutdown();
dout(10) << __func__ << dendl;
send_beacon();
- if (active_mgr && active_mgr->is_initialized()) {
- active_mgr->tick();
- }
-
timer.add_event_after(
g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count(),
new FunctionContext([this](int r){
DaemonStateIndex &ds, ClusterState &cs,
const std::map<std::string, std::string> &kv_store,
MonClient &mc, LogChannelRef clog_, Objecter &objecter_,
- Client &client_, Finisher &f)
+ Client &client_, Finisher &f, DaemonServer &server)
{
Mutex::Locker locker(lock);
active_modules.reset(new ActivePyModules(
module_config, kv_store, ds, cs, mc,
- clog_, objecter_, client_, f));
+ clog_, objecter_, client_, f, server));
for (const auto &i : modules) {
// Anything we're skipping because of !can_run will be flagged
DaemonStateIndex &ds, ClusterState &cs,
const std::map<std::string, std::string> &kv_store,
MonClient &mc, LogChannelRef clog_, Objecter &objecter_,
- Client &client_, Finisher &f);
+ Client &client_, Finisher &f, DaemonServer &server);
void standby_start(MonClient &mc);
bool is_standby_running() const