From: Roland Sommer Date: Fri, 8 Oct 2021 06:40:26 +0000 (+0200) Subject: mgr/prometheus: Make standby discoverable X-Git-Tag: v16.2.8~211^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=24e843a075a74daf4708f51e2a2f050d2f8c1a47;p=ceph.git mgr/prometheus: Make standby discoverable Enable config settings to modify standby's behaviour on the index page This makes the standby discoverable by reverse proxy or loadbalancer setups. Testing for the empty response of the '/metrics' endpoint would trigger metric collection on the active manager instance. The newly added configuration options settings standby_behaviour and standby_error_status_code are documented and flagged as runtime, as modifying both settings has an immediate effect (no restart required). Co-authored-by: Ernesto Puerta <37327689+epuertat@users.noreply.github.com> Signed-off-by: Roland Sommer Fixes: https://tracker.ceph.com/issues/53229 (cherry picked from commit c1570f870e28d8fd4cd072832f1a2e19bac663eb) --- diff --git a/doc/mgr/prometheus.rst b/doc/mgr/prometheus.rst index 0f1caff2353ff..04a10c3cd2f6b 100644 --- a/doc/mgr/prometheus.rst +++ b/doc/mgr/prometheus.rst @@ -88,6 +88,24 @@ If you are confident that you don't require the cache, you can disable it:: ceph config set mgr mgr/prometheus/cache false +If you are using the prometheus module behind some kind of reverse proxy or +loadbalancer, you can simplify discovering the active instance by switching +to ``error``-mode:: + + ceph config set mgr mgr/prometheus/standby_behaviour error + +If set, the prometheus module will repond with a HTTP error when requesting ``/`` +from the standby instance. The default error code is 500, but you can configure +the HTTP response code with:: + + ceph config set mgr mgr/prometheus/standby_error_status_code 503 + +Valid error codes are between 400-599. + +To switch back to the default behaviour, simply set the config key to ``default``:: + + ceph config set mgr mgr/prometheus/standby_behaviour default + .. _prometheus-rbd-io-statistics: RBD IO statistics diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 896c0f4e3c5ac..5ec1e874922e2 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -294,6 +294,21 @@ class Module(MgrModule): name='rbd_stats_pools_refresh_interval', type='int', default=300 + ), + Option( + name='standby_behaviour', + type='str', + default='default', + enum_allowed=['default', 'error'], + runtime=True + ), + Option( + name='standby_error_status_code', + type='int', + default=500, + min=400, + max=599, + runtime=True ) ] @@ -1436,7 +1451,8 @@ class StandbyModule(MgrStandbyModule): cherrypy.config.update({ 'server.socket_host': server_addr, 'server.socket_port': server_port, - 'engine.autoreload.on': False + 'engine.autoreload.on': False, + 'request.show_tracebacks': False }) module = self @@ -1444,8 +1460,10 @@ class StandbyModule(MgrStandbyModule): class Root(object): @cherrypy.expose def index(self) -> str: - active_uri = module.get_active_uri() - return ''' + standby_behaviour = module.get_module_option('standby_behaviour') + if standby_behaviour == 'default': + active_uri = module.get_active_uri() + return ''' Ceph Exporter @@ -1453,6 +1471,9 @@ class StandbyModule(MgrStandbyModule):

Metrics

'''.format(active_uri) + else: + status = module.get_module_option('standby_error_status_code') + raise cherrypy.HTTPError(status, message="Keep on looking") @cherrypy.expose def metrics(self) -> str: