From: Roland Sommer Date: Fri, 8 Oct 2021 06:40:26 +0000 (+0200) Subject: mgr/prometheus: Make standby discoverable X-Git-Tag: v17.1.0~457^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=c1570f870e28d8fd4cd072832f1a2e19bac663eb;p=ceph.git mgr/prometheus: Make standby discoverable Enable config settings to modify standby's behaviour on the index page This makes the standby discoverable by reverse proxy or loadbalancer setups. Testing for the empty response of the '/metrics' endpoint would trigger metric collection on the active manager instance. The newly added configuration options settings standby_behaviour and standby_error_status_code are documented and flagged as runtime, as modifying both settings has an immediate effect (no restart required). Co-authored-by: Ernesto Puerta <37327689+epuertat@users.noreply.github.com> Signed-off-by: Roland Sommer Fixes: https://tracker.ceph.com/issues/53229 --- diff --git a/doc/mgr/prometheus.rst b/doc/mgr/prometheus.rst index 0328a582f1471..a8774ff332342 100644 --- a/doc/mgr/prometheus.rst +++ b/doc/mgr/prometheus.rst @@ -37,6 +37,8 @@ Configuration .. confval:: stale_cache_strategy .. confval:: rbd_stats_pools .. confval:: rbd_stats_pools_refresh_interval +.. confval:: standby_behaviour +.. confval:: standby_error_status_code By default the module will accept HTTP requests on port ``9283`` on all IPv4 and IPv6 addresses on the host. The port and listen address are both @@ -96,6 +98,24 @@ If you are confident that you don't require the cache, you can disable it:: ceph config set mgr mgr/prometheus/cache false +If you are using the prometheus module behind some kind of reverse proxy or +loadbalancer, you can simplify discovering the active instance by switching +to ``error``-mode:: + + ceph config set mgr mgr/prometheus/standby_behaviour error + +If set, the prometheus module will repond with a HTTP error when requesting ``/`` +from the standby instance. The default error code is 500, but you can configure +the HTTP response code with:: + + ceph config set mgr mgr/prometheus/standby_error_status_code 503 + +Valid error codes are between 400-599. + +To switch back to the default behaviour, simply set the config key to ``default``:: + + ceph config set mgr mgr/prometheus/standby_behaviour default + .. _prometheus-rbd-io-statistics: RBD IO statistics diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index cfc7bff00db4b..9b885cbfb7ee0 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -298,6 +298,21 @@ class Module(MgrModule): name='rbd_stats_pools_refresh_interval', type='int', default=300 + ), + Option( + name='standby_behaviour', + type='str', + default='default', + enum_allowed=['default', 'error'], + runtime=True + ), + Option( + name='standby_error_status_code', + type='int', + default=500, + min=400, + max=599, + runtime=True ) ] @@ -1440,7 +1455,8 @@ class StandbyModule(MgrStandbyModule): cherrypy.config.update({ 'server.socket_host': server_addr, 'server.socket_port': server_port, - 'engine.autoreload.on': False + 'engine.autoreload.on': False, + 'request.show_tracebacks': False }) module = self @@ -1448,8 +1464,10 @@ class StandbyModule(MgrStandbyModule): class Root(object): @cherrypy.expose def index(self) -> str: - active_uri = module.get_active_uri() - return ''' + standby_behaviour = module.get_module_option('standby_behaviour') + if standby_behaviour == 'default': + active_uri = module.get_active_uri() + return ''' Ceph Exporter @@ -1457,6 +1475,9 @@ class StandbyModule(MgrStandbyModule):

Metrics

'''.format(active_uri) + else: + status = module.get_module_option('standby_error_status_code') + raise cherrypy.HTTPError(status, message="Keep on looking") @cherrypy.expose def metrics(self) -> str: