From: John Spray Date: Thu, 30 Jun 2016 13:07:50 +0000 (+0100) Subject: pybind/mgr: add the `rest` module X-Git-Tag: v11.0.1~60^2~51 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b82533fcb73149886461d35598799572ffc1d771;p=ceph.git pybind/mgr: add the `rest` module This is derived from what used to be Calamari. Signed-off-by: John Spray --- diff --git a/src/pybind/mgr/calamari.conf b/src/pybind/mgr/calamari.conf new file mode 100644 index 000000000000..563aca8f01f0 --- /dev/null +++ b/src/pybind/mgr/calamari.conf @@ -0,0 +1,43 @@ + +[cthulhu] + +plugin_path = +salt_config_path = +alembic_config_path = +db_path = +log_path = /tmp/cthulhu.log +log_level = DEBUG +rpc_url = +crush_host_type = host +crush_osd_type = osd +cluster_map_retention = 3600 +db_log_level = WARN +favorite_timeout_factor = 3 +server_timeout_factor = 3 +cluster_contact_threshold = 60 +emit_events_to_salt_event_bus = False +event_tag_prefix = calamari/ + +[calamari_web] + +log_path = /Users/john/git/calamari/dev/calamari.log +log_level = DEBUG +db_engine = django.db.backends.postgresql_psycopg2 +db_name = calamari +db_user = calamari +db_password = 27HbZwr*g +db_host = localhost +secret_key_path = /Users/john/git/calamari/dev/secret.key +username = john +static_root = /Users/john/git/calamari/webapp/content/ + +[graphite] + +root = /Users/john/git/calamari/env +storage_path = /Users/john/git/calamari/env/storage + +[testing] + +calamari_control = embedded +ceph_control = embedded +external_cluster_path = /Users/john/git/calamari/cluster.yaml diff --git a/src/pybind/mgr/calamari_rest/__init__.py b/src/pybind/mgr/calamari_rest/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/pybind/mgr/calamari_rest/config.py b/src/pybind/mgr/calamari_rest/config.py new file mode 100644 index 000000000000..159ced7ed882 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/config.py @@ -0,0 +1,39 @@ +import os +import ConfigParser + +try: + import alembic +except ImportError: + alembic = None +else: + import alembic.config + + +class ConfigNotFound(Exception): + pass + + +DEFAULT_CONFIG_PATH = "/etc/calamari/calamari.conf" +CONFIG_PATH_VAR = "CALAMARI_CONFIG" + + +class CalamariConfig(ConfigParser.SafeConfigParser): + def __init__(self): + ConfigParser.SafeConfigParser.__init__(self) + + try: + self.path = os.environ[CONFIG_PATH_VAR] + except KeyError: + self.path = DEFAULT_CONFIG_PATH + + if not os.path.exists(self.path): + raise ConfigNotFound("Configuration not found at %s" % self.path) + + self.read(self.path) + + +if alembic is not None: + class AlembicConfig(alembic.config.Config): + def __init__(self): + path = CalamariConfig().get('cthulhu', 'alembic_config_path') + super(AlembicConfig, self).__init__(path) diff --git a/src/pybind/mgr/calamari_rest/fixtures/ceph_fake.json b/src/pybind/mgr/calamari_rest/fixtures/ceph_fake.json new file mode 100644 index 000000000000..305242f9a3aa --- /dev/null +++ b/src/pybind/mgr/calamari_rest/fixtures/ceph_fake.json @@ -0,0 +1 @@ +[{"pk": 1, "model": "ceph.cluster", "fields": {"name": "ceph"}}, {"pk": 1, "model": "ceph.clusterspace", "fields": {"total_used": 5000, "cluster": 1, "total_space": 12345, "added_date": "2013-07-08T20:48:14.502Z", "total_avail": 1234}}, {"pk": 2, "model": "ceph.clusterspace", "fields": {"total_used": 4, "cluster": 1, "total_space": 344444, "added_date": "2013-07-08T21:12:42.061Z", "total_avail": 4}}] \ No newline at end of file diff --git a/src/pybind/mgr/calamari_rest/management/__init__.py b/src/pybind/mgr/calamari_rest/management/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/pybind/mgr/calamari_rest/management/commands/__init__.py b/src/pybind/mgr/calamari_rest/management/commands/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/pybind/mgr/calamari_rest/management/commands/api_docs.py b/src/pybind/mgr/calamari_rest/management/commands/api_docs.py new file mode 100644 index 000000000000..c9922ee73f2c --- /dev/null +++ b/src/pybind/mgr/calamari_rest/management/commands/api_docs.py @@ -0,0 +1,383 @@ +from collections import defaultdict +import json +from optparse import make_option +import os +from django.core.management.base import NoArgsCommand +import importlib +from jinja2 import Environment +import re +import rest_framework.viewsets +import traceback +from django.core.urlresolvers import RegexURLPattern, RegexURLResolver +import sys +import codecs + +from calamari_rest.serializers.v2 import ValidatingSerializer + + +GENERATED_PREFIX = "." + + +EXAMPLES_FILE = os.path.join(GENERATED_PREFIX, "api_examples.json") +RESOURCES_FILE = os.path.join("resources.rst") +EXAMPLES_PREFIX = "api_example_" + + +old_as_view = rest_framework.viewsets.ViewSetMixin.as_view + + +@classmethod +def as_view(cls, actions=None, **initkwargs): + view = old_as_view.__func__(cls) + view._actions = actions + return view + +rest_framework.viewsets.ViewSetMixin.as_view = as_view + +# >>> RsT table code borrowed from http://stackoverflow.com/a/17203834/99876 + + +def make_table(grid): + max_cols = [max(out) for out in map(list, zip(*[[len(item) for item in row] for row in grid]))] + rst = table_div(max_cols, 1) + + for i, row in enumerate(grid): + header_flag = False + if i == 0 or i == len(grid) - 1: + header_flag = True + rst += normalize_row(row, max_cols) + rst += table_div(max_cols, header_flag) + return rst + + +def table_div(max_cols, header_flag=1): + out = "" + if header_flag == 1: + style = "=" + else: + style = "-" + + for max_col in max_cols: + out += max_col * style + " " + + out += "\n" + return out + + +def normalize_row(row, max_cols): + r = "" + for i, max_col in enumerate(max_cols): + r += row[i] + (max_col - len(row[i]) + 1) * " " + + return r + "\n" +# <<< RsT table code borrowed from http://stackoverflow.com/a/17203834/99876 + + +PAGE_TEMPLATE = """ + +:tocdepth: 3 + +API resources +============= + +URL summary +----------- + +{{url_summary_rst}} + +API reference +------------- + +{{resources_rst}} + +Examples +-------- + +.. toctree:: + :maxdepth: 1 + +{% for example_doc in example_docs %} + {{example_doc}} +{% endfor %} + + +""" + + +RESOURCE_TEMPLATE = """ + +.. _{{class_name}}: + +{{name}} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +{{help_text}} + +URLs +____ + +{{url_table}} + +Fields +______ + +{{field_table}} + +""" + + +VERBS = ["GET", "PUT", "POST", "PATCH", "DELETE"] + + +def _url_pattern_methods(url_pattern): + view_class = url_pattern.callback.cls + + if hasattr(url_pattern.callback, '_actions'): + # An APIViewSet + methods = [k.upper() for k in url_pattern.callback._actions.keys()] + else: + methods = view_class().allowed_methods + + if not methods: + # A view that isn't using django rest framework? + raise RuntimeError("No methods for url %s" % url_pattern.regex.pattern) + + return methods + + +def _stripped_url(prefix, url_pattern): + """ + Convert a URL regex into something for human eyes + + ^server/(?P[^/]+)$ becomes server/ + """ + url = prefix + url_pattern.regex.pattern.strip("^$") + url = re.sub("\(.+?<(.+?)>.+?\)", "<\\1>", url) + return url + + +def _pretty_url(prefix, url_pattern): + return "%s" % _stripped_url(prefix, url_pattern).replace("<", "\\<").replace(">", "\\>") + + +def _find_prefix(toplevel_mod, sub_mod): + """ + Find the URL prefix of sub_mod in toplevel_mod + """ + for toplevel_pattern in importlib.import_module(toplevel_mod).urlpatterns: + if isinstance(toplevel_pattern, RegexURLResolver): + if toplevel_pattern.urlconf_name.__name__ == sub_mod: + regex_str = toplevel_pattern.regex.pattern + return regex_str.strip("^") + + raise RuntimeError("'%s' not included in '%s', cannot find prefix" % (sub_mod, toplevel_mod)) + + +class ApiIntrospector(object): + def __init__(self, url_module): + view_to_url_patterns = defaultdict(list) + + def parse_urls(urls): + for url_pattern in urls: + if isinstance(url_pattern, RegexURLResolver): + parse_urls(url_pattern.urlconf_module) + elif isinstance(url_pattern, RegexURLPattern): + if url_pattern.regex.pattern.endswith('\.(?P[a-z0-9]+)$'): + # Suppress the . urls that rest_framework generates + continue + + if hasattr(url_pattern.callback, 'cls'): + # This is a rest_framework as_view wrapper + view_cls = url_pattern.callback.cls + if view_cls.__name__.endswith("APIRoot"): + continue + view_to_url_patterns[view_cls].append(url_pattern) + + self.prefix = _find_prefix("calamari_web.urls", url_module) + parse_urls(importlib.import_module(url_module).urlpatterns) + + self.view_to_url_patterns = sorted(view_to_url_patterns.items(), cmp=lambda x, y: cmp(x[0].__name__, y[0].__name__)) + + self.all_url_patterns = [] + for view, url_patterns in self.view_to_url_patterns: + self.all_url_patterns.extend(url_patterns) + self.all_url_patterns = sorted(self.all_url_patterns, + lambda a, b: cmp(_pretty_url(self.prefix, a), _pretty_url(self.prefix, b))) + + def _view_rst(self, view, url_patterns): + """ + Output RsT for one API view + """ + name = view().metadata(None)['name'] + + if view.__doc__: + view_help_text = view.__doc__ + else: + view_help_text = "*No description available*" + + url_table = [["URL"] + VERBS] + for url_pattern in url_patterns: + methods = _url_pattern_methods(url_pattern) + + row = [":doc:`%s <%s>`" % (_pretty_url(self.prefix, url_pattern), + self._example_document_name(_stripped_url(self.prefix, url_pattern)))] + for v in VERBS: + if v in methods: + row.append("Yes") + else: + row.append("") + url_table.append(row) + + url_table_rst = make_table(url_table) + + if hasattr(view, 'serializer_class') and view.serializer_class: + field_table = [["Name", "Type", "Readonly", "Create", "Modify", "Description"]] + + serializer = view.serializer_class() + if isinstance(serializer, ValidatingSerializer): + allowed_during_create = serializer.Meta.create_allowed + required_during_create = serializer.Meta.create_required + allowed_during_modify = serializer.Meta.modify_allowed + required_during_modify = serializer.Meta.modify_required + else: + allowed_during_create = required_during_create = allowed_during_modify = required_during_modify = () + + fields = serializer.get_fields() + for field_name, field in fields.items(): + create = modify = '' + if field_name in allowed_during_create: + create = 'Allowed' + if field_name in required_during_create: + create = 'Required' + + if field_name in allowed_during_modify: + modify = 'Allowed' + if field_name in required_during_modify: + modify = 'Required' + + if hasattr(field, 'help_text'): + field_help_text = field.help_text + else: + field_help_text = "" + field_table.append( + [field_name, + field.type_label, + str(field.read_only), + create, + modify, + field_help_text if field_help_text else ""]) + field_table_rst = make_table(field_table) + else: + field_table_rst = "*No field data available*" + + return Environment().from_string(RESOURCE_TEMPLATE).render( + name=name, + class_name=view.__name__, + help_text=view_help_text, + field_table=field_table_rst, + url_table=url_table_rst + ) + + def _url_table(self, url_patterns): + url_table = [["URL", "View", "Examples"] + VERBS] + for view, url_patterns in self.view_to_url_patterns: + for url_pattern in url_patterns: + methods = _url_pattern_methods(url_pattern) + + row = [_pretty_url(self.prefix, url_pattern)] + + view_name = view().metadata(None)['name'] + row.append( + u":ref:`{0} <{1}>`".format(view_name.replace(" ", unichr(0x00a0)), view.__name__) + ) + + example_doc_name = self._example_document_name(_stripped_url(self.prefix, url_pattern)) + if os.path.exists("{0}/{1}.rst".format(GENERATED_PREFIX, example_doc_name)): + print "It exists: {0}".format(example_doc_name) + row.append(":doc:`%s <%s>`" % ("Example", example_doc_name)) + else: + row.append("") + for v in VERBS: + if v in methods: + row.append("Yes") + else: + row.append("") + url_table.append(row) + + return make_table(url_table) + + def _flatten_path(self, path): + """ + Escape a URL pattern to something suitable for use as a filename + """ + return path.replace("/", "_").replace("<", "_").replace(">", "_") + + def _example_document_name(self, pattern): + return EXAMPLES_PREFIX + self._flatten_path(pattern) + + def _write_example(self, example_pattern, example_results): + """ + Write RsT file with API examples for a particular pattern + """ + rst = "" + title = "Examples for %s" % example_pattern + rst += "%s\n%s\n\n" % (title, "=" * len(title)) + for url, content in example_results.items(): + rst += "%s\n" % url + rst += "-" * len(url) + rst += "\n\n.. code-block:: json\n\n" + data_dump = json.dumps(json.loads(content), indent=2) + data_dump = "\n".join([" %s" % l for l in data_dump.split("\n")]) + rst += data_dump + rst += "\n\n" + codecs.open("{0}/{1}.rst".format(GENERATED_PREFIX, self._example_document_name(example_pattern)), 'w', + encoding="UTF-8").write(rst) + + def write_docs(self, examples): + resources_rst = "" + for view, url_patterns in self.view_to_url_patterns: + resources_rst += self._view_rst(view, url_patterns) + + url_table_rst = self._url_table(self.all_url_patterns) + + example_docs = [self._example_document_name(p) for p in examples.keys()] + + resources_rst = Environment().from_string(PAGE_TEMPLATE).render( + resources_rst=resources_rst, url_summary_rst=url_table_rst, example_docs=example_docs) + codecs.open(RESOURCES_FILE, 'w', encoding="UTF-8").write(resources_rst) + + for example_pattern, example_results in examples.items(): + self._write_example(example_pattern, example_results) + + def get_url_list(self, method="GET"): + return [_stripped_url(self.prefix, u) + for u in self.all_url_patterns + if method in _url_pattern_methods(u)] + + +class Command(NoArgsCommand): + help = "Print introspected REST API documentation" + option_list = NoArgsCommand.option_list + ( + make_option('--list-urls', + action='store_true', + dest='list_urls', + default=False, + help='Print a list of URL patterns instead of RsT documentation'), + ) + + def handle_noargs(self, list_urls, **options): + introspector = ApiIntrospector("calamari_rest.urls.v2") + if list_urls: + print json.dumps(introspector.get_url_list()) + else: + try: + try: + examples = json.load(open(EXAMPLES_FILE, 'r')) + except IOError: + print >>sys.stderr, "Examples data '%s' not found, have you run test_rest_api?" % EXAMPLES_FILE + return + + introspector.write_docs(examples) + except: + print >>sys.stderr, traceback.format_exc() + raise diff --git a/src/pybind/mgr/calamari_rest/manager/__init__.py b/src/pybind/mgr/calamari_rest/manager/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/pybind/mgr/calamari_rest/manager/osd_request_factory.py b/src/pybind/mgr/calamari_rest/manager/osd_request_factory.py new file mode 100644 index 000000000000..1cd803f7f1c5 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/manager/osd_request_factory.py @@ -0,0 +1,119 @@ +from calamari_rest.manager.request_factory import RequestFactory +from calamari_rest.types import OsdMap, OSD_IMPLEMENTED_COMMANDS, OSD_FLAGS +from calamari_rest.manager.user_request import OsdMapModifyingRequest, RadosRequest + +from rest import global_instance as rest_plugin + +class OsdRequestFactory(RequestFactory): + """ + This class converts CRUD operations to UserRequest objects, and + exposes non-crud functions to return the appropriate UserRequest. + """ + def update(self, osd_id, attributes): + commands = [] + + osd_map = rest_plugin().get_sync_object(OsdMap) + + # in/out/down take a vector of strings called 'ids', while 'reweight' takes a single integer + + if 'in' in attributes and bool(attributes['in']) != bool(osd_map.osds_by_id[osd_id]['in']): + if attributes['in']: + commands.append(('osd in', {'ids': [attributes['id'].__str__()]})) + else: + commands.append(('osd out', {'ids': [attributes['id'].__str__()]})) + + if 'up' in attributes and bool(attributes['up']) != bool(osd_map.osds_by_id[osd_id]['up']): + if not attributes['up']: + commands.append(('osd down', {'ids': [attributes['id'].__str__()]})) + else: + raise RuntimeError("It is not valid to set a down OSD to be up") + + if 'reweight' in attributes: + if attributes['reweight'] != float(osd_map.osd_tree_node_by_id[osd_id]['reweight']): + commands.append(('osd reweight', {'id': osd_id, 'weight': attributes['reweight']})) + + if not commands: + # Returning None indicates no-op + return None + + msg_attrs = attributes.copy() + del msg_attrs['id'] + + if msg_attrs.keys() == ['in']: + message = "Marking osd.{id} {state}".format( + id=osd_id, state=("in" if msg_attrs['in'] else "out")) + elif msg_attrs.keys() == ['up']: + message = "Marking osd.{id} down".format( + id=osd_id) + elif msg_attrs.keys() == ['reweight']: + message = "Re-weighting osd.{id} to {pct}%".format( + id=osd_id, pct="{0:.1f}".format(msg_attrs['reweight'] * 100.0)) + else: + message = "Modifying osd.{id} ({attrs})".format( + id=osd_id, attrs=", ".join( + "%s=%s" % (k, v) for k, v in msg_attrs.items())) + + return OsdMapModifyingRequest(message, commands) + + def scrub(self, osd_id): + return RadosRequest( + "Initiating scrub on osd.{id}".format(id=osd_id), + [('osd scrub', {'who': str(osd_id)})]) + + def deep_scrub(self, osd_id): + return RadosRequest( + "Initiating deep-scrub on osd.{id}".format(id=osd_id), + [('osd deep-scrub', {'who': str(osd_id)})]) + + def repair(self, osd_id): + return RadosRequest( + "Initiating repair on osd.{id}".format(id=osd_id), + [('osd repair', {'who': str(osd_id)})]) + + def get_valid_commands(self, osds): + """ + For each OSD in osds list valid commands + """ + ret_val = {} + osd_map = rest_plugin().get_sync_object(OsdMap) + for osd_id in osds: + if osd_map.osds_by_id[osd_id]['up']: + ret_val[osd_id] = {'valid_commands': OSD_IMPLEMENTED_COMMANDS} + else: + ret_val[osd_id] = {'valid_commands': []} + + return ret_val + + def _commands_to_set_flags(self, osd_map, attributes): + commands = [] + + flags_not_implemented = set(attributes.keys()) - set(OSD_FLAGS) + if flags_not_implemented: + raise RuntimeError("%s not valid to set/unset" % list(flags_not_implemented)) + + flags_to_set = set(k for k, v in attributes.iteritems() if v) + flags_to_unset = set(k for k, v in attributes.iteritems() if not v) + flags_that_are_set = set(k for k, v in osd_map.flags.iteritems() if v) + + for x in flags_to_set - flags_that_are_set: + commands.append(('osd set', {'key': x})) + + for x in flags_that_are_set & flags_to_unset: + commands.append(('osd unset', {'key': x})) + + return commands + + def update_config(self, _, attributes): + + osd_map = rest_plugin().get_sync_object(OsdMap) + + commands = self._commands_to_set_flags(osd_map, attributes) + + if commands: + return OsdMapModifyingRequest( + "Modifying OSD config ({attrs})".format( + attrs=", ".join("%s=%s" % (k, v) for k, v in attributes.items()) + ), commands) + + else: + return None diff --git a/src/pybind/mgr/calamari_rest/manager/pool_request_factory.py b/src/pybind/mgr/calamari_rest/manager/pool_request_factory.py new file mode 100644 index 000000000000..2cbcca4919ac --- /dev/null +++ b/src/pybind/mgr/calamari_rest/manager/pool_request_factory.py @@ -0,0 +1,165 @@ + +from calamari_rest.manager.request_factory import RequestFactory +from calamari_rest.types import OsdMap, Config +from calamari_rest.manager.user_request import OsdMapModifyingRequest, \ + PgCreatingRequest, PoolCreatingRequest + +from rest import global_instance as rest_plugin + +from rest import logger +log = logger() + +# Valid values for the 'var' argument to 'ceph osd pool set' +POOL_PROPERTIES = ["size", "min_size", "crash_replay_interval", "pg_num", + "pgp_num", "crush_ruleset", "hashpspool"] + +# In Ceph versions before mon_osd_max_split_count, assume it is set to this +LEGACY_MON_OSD_MAX_SPLIT_COUNT = "32" + + +class PoolRequestFactory(RequestFactory): + def _resolve_pool(self, pool_id): + osd_map = rest_plugin().get_sync_object(OsdMap) + return osd_map.pools_by_id[pool_id] + + def _pool_attribute_commands(self, pool_name, attributes): + commands = [] + for var in POOL_PROPERTIES: + if var in attributes: + val = attributes[var] + + # Special case for hashpspool, accepts 'true' from firefly + # onwards but requires 0 or 1 for dumpling, so just use the + # old style. + if isinstance(val, bool): + val = 1 if val else 0 + + commands.append(('osd pool set', { + 'pool': pool_name, + 'var': var, + 'val': val + })) + + # Quota setting ('osd pool set-quota') is separate to the main 'set' + # operation + for attr_name, set_name in [('quota_max_bytes', 'max_bytes'), + ('quota_max_objects', 'max_objects')]: + if attr_name in attributes: + commands.append(('osd pool set-quota', { + 'pool': pool_name, + 'field': set_name, + # set-quota wants a string in case it has units in + 'val': attributes[attr_name].__str__() + })) + + # Renames come last (the preceeding commands reference the pool by its + # old name) + if 'name' in attributes: + commands.append(('osd pool rename', { + "srcpool": pool_name, + "destpool": attributes['name'] + })) + + return commands + + def delete(self, pool_id): + # Resolve pool ID to name + pool_name = self._resolve_pool(pool_id)['pool_name'] + + # TODO: perhaps the REST API should have something in the body to + # make it slightly harder to accidentally delete a pool, to respect + # the severity of this operation since we're hiding the + # --yes-i-really-really-want-to stuff here + # TODO: handle errors in a way that caller can show to a user, e.g. + # if the name is wrong we should be sending a structured errors dict + # that they can use to associate the complaint with the 'name' field. + commands = [ + ('osd pool delete', {'pool': pool_name, 'pool2': pool_name, + 'sure': '--yes-i-really-really-mean-it'})] + return OsdMapModifyingRequest( + "Deleting pool '{name}'".format(name=pool_name), + commands) + + def update(self, pool_id, attributes): + osd_map = rest_plugin().get_sync_object(OsdMap) + pool = self._resolve_pool(pool_id) + pool_name = pool['pool_name'] + + if 'pg_num' in attributes: + # Special case when setting pg_num: have to do some extra work + # to wait for PG creation between setting these two fields. + final_pg_count = attributes['pg_num'] + + if 'pgp_num' in attributes: + pgp_num = attributes['pgp_num'] + del attributes['pgp_num'] + else: + pgp_num = attributes['pg_num'] + del attributes['pg_num'] + + pre_create_commands = self._pool_attribute_commands(pool_name, + attributes) + + # This setting is new in Ceph Firefly, where it defaults to 32. + # For older revisions, we simply pretend that the setting exists + # with a default setting. + mon_osd_max_split_count = int(rest_plugin().get_sync_object(Config).data.get( + 'mon_osd_max_split_count', LEGACY_MON_OSD_MAX_SPLIT_COUNT)) + initial_pg_count = pool['pg_num'] + n_osds = min(initial_pg_count, len(osd_map.osds_by_id)) + # The rules about creating PGs: + # where N_osds = min(old_pg_count, osd_count) + # the number of new PGs divided by N_osds may not be greater + # than mon_osd_max_split_count + block_size = mon_osd_max_split_count * n_osds + + return PgCreatingRequest( + "Growing pool '{name}' to {size} PGs".format( + name=pool_name, size=final_pg_count), + pre_create_commands, + pool_id, pool_name, pgp_num, + initial_pg_count, final_pg_count, block_size) + else: + commands = self._pool_attribute_commands(pool_name, attributes) + if not commands: + raise NotImplementedError(attributes) + + # TODO: provide some machine-readable indication of which objects + # are affected by a particular request. + # Perhaps subclass Request for each type of object, and have that + # subclass provide both the patches->commands mapping and the + # human readable and machine readable descriptions of it? + + # Objects may be decorated with 'id' from use in a bulk PATCH, but + # we don't want anything + # from this point onwards to see that. + if 'id' in attributes: + del attributes['id'] + return OsdMapModifyingRequest( + "Modifying pool '{name}' ({attrs})".format( + name=pool_name, attrs=", ".join( + "%s=%s" % (k, v) for k, v in attributes.items()) + ), commands) + + def create(self, attributes): + commands = [('osd pool create', {'pool': attributes['name'], + 'pg_num': attributes['pg_num']})] + + # Which attributes must we set after the initial create? + post_create_attrs = attributes.copy() + del post_create_attrs['name'] + del post_create_attrs['pg_num'] + if 'pgp_num' in post_create_attrs: + del post_create_attrs['pgp_num'] + + commands.extend(self._pool_attribute_commands( + attributes['name'], + post_create_attrs + )) + + log.debug("Post-create attributes: %s" % post_create_attrs) + log.debug("Commands: %s" % post_create_attrs) + + return PoolCreatingRequest( + "Creating pool '{name}'".format(name=attributes['name']), + attributes['name'], commands) diff --git a/src/pybind/mgr/calamari_rest/manager/request_collection.py b/src/pybind/mgr/calamari_rest/manager/request_collection.py new file mode 100644 index 000000000000..8bbe07085421 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/manager/request_collection.py @@ -0,0 +1,259 @@ + +from threading import RLock +from calamari_rest.manager.user_request import UserRequest +from rest import logger +log = logger() + +from rest import global_instance as rest_plugin + +TICK_PERIOD = 20 + + +log = log.getChild("request_collection") + + +class RequestCollection(object): + """ + Manage a collection of UserRequests, indexed by + salt JID and request ID. + + Unlike most of cthulhu, this class contains a lock, which + is used in all entry points which may sleep (anything which + progresses a UserRequest might involve I/O to create jobs + in the salt master), so that they don't go to sleep and + wake up in a different world. + """ + + def __init__(self): + super(RequestCollection, self).__init__() + + self._by_request_id = {} + self._lock = RLock() + + def get_by_id(self, request_id): + return self._by_request_id[request_id] + + def get_all(self, state=None): + if not state: + return self._by_request_id.values() + else: + return [r for r in self._by_request_id.values() if r.state == state] + + # def tick(self): + # """ + # For walltime-based monitoring of running requests. Long-running requests + # get a periodic call to saltutil.running to verify that things really + # are still happening. + # """ + # + # if not self._by_tag: + # return + # else: + # log.debug("RequestCollection.tick: %s JIDs underway" % len(self._by_tag)) + # + # # Identify JIDs who haven't had a saltutil.running reponse for too long. + # # Kill requests in a separate phase because request:JID is not 1:1 + # stale_jobs = set() + # _now = now() + # for request in self._by_tag.values(): + # if _now - request.alive_at > datetime.timedelta(seconds=TICK_PERIOD * 3): + # log.error("Request %s JID %s stale: now=%s, alive_at=%s" % ( + # request.id, request.jid, _now, request.alive_at + # )) + # stale_jobs.add(request) + # + # # Any identified stale jobs are errored out. + # for request in stale_jobs: + # with self._update_index(request): + # request.set_error("Lost contact") + # request.jid = None + # request.complete() + # + # # Identify minions associated with JIDs in flight + # query_minions = set() + # for jid, request in self._by_tag.items(): + # query_minions.add(request.minion_id) + # + # # Attempt to emit a saltutil.running to ping jobs, next tick we + # # will see if we got updates to the alive_at attribute to indicate non-staleness + # if query_minions: + # log.info("RequestCollection.tick: sending get_running for {0}".format(query_minions)) + # self._remote.get_running(list(query_minions)) + + # def on_tick_response(self, minion_id, jobs): + # """ + # Update the alive_at parameter of requests to record that they + # are still running remotely. + # + # :param jobs: The response from a saltutil.running + # """ + # log.debug("RequestCollection.on_tick_response: %s from %s" % (len(jobs), minion_id)) + # for job in jobs: + # try: + # request = self._by_tag[job['jid']] + # except KeyError: + # # Not one of mine, ignore it + # pass + # else: + # request.alive_at = now() + + # def cancel(self, request_id): + # """ + # Immediately mark a request as cancelled, and in the background + # try and cancel any outstanding JID for it. + # """ + # request = self._by_request_id[request_id] + # + # # Idempotent behaviour: no-op if already cancelled + # if request.state == request.COMPLETE: + # return + # + # with self._update_index(request): + # # I will take over cancelling the JID from the request + # cancel_jid = request.jid + # request.jid = None + # + # # Request is now done, no further calls + # request.set_error("Cancelled") + # request.complete() + # + # # In the background, try to cancel the request's JID on a best-effort basis + # if cancel_jid: + # self._remote.cancel(request.minion_id, cancel_jid) + # # We don't check for completion or errors, it's a best-effort thing. If we're + # # cancelling something we will do our best to kill any subprocess but can't + # # any guarantees because running nodes may be out of touch with the calamari server. + # + # @nosleep + # def fail_all(self, failed_minion): + # """ + # For use when we lose contact with the minion that was in use for running + # requests: assume all these requests are never going to return now. + # """ + # for request in self.get_all(UserRequest.SUBMITTED): + # with self._update_index(request): + # request.set_error("Lost contact with server %s" % failed_minion) + # if request.jid: + # log.error("Giving up on JID %s" % request.jid) + # request.jid = None + # request.complete() + + def submit(self, request): + """ + Submit a request and store it. Do this in one operation + to hold the lock over both operations, otherwise a response + to a job could arrive before the request was filed here. + """ + with self._lock: + log.info("RequestCollection.submit: {0} {1}".format( + request.id, request.headline)) + self._by_request_id[request.id] = request + request.submit() + + def on_map(self, sync_type, sync_object): + """ + Callback for when a new cluster map is available, in which + we notify any interested ongoing UserRequests of the new map + so that they can progress if they were waiting for it. + """ + with self._lock: + log.info("RequestCollection.on_map: {0}".format(sync_type)) + requests = self.get_all(state=UserRequest.SUBMITTED) + for request in requests: + try: + # If this is one of the types that this request + # is waiting for, invoke on_map. + for awaited_type in request.awaiting_versions.keys(): + if awaited_type == sync_type: + request.on_map(sync_type, sync_object) + except Exception as e: + log.error("e.__class__ = {0}".format(e.__class__)) + log.exception("Request %s threw exception in on_map", request.id) + request.set_error("Internal error %s" % e) + request.complete() + # + # def _on_rados_completion(self, request, result): + # """ + # Handle JID completion from a ceph.rados_commands operation + # """ + # if request.state != UserRequest.SUBMITTED: + # # Unexpected, ignore. + # log.error("Received completion for request %s/%s in state %s" % ( + # request.id, request.jid, request.state + # )) + # return + # + # if result['error']: + # # This indicates a failure within ceph.rados_commands which was caught + # # by our code, like one of our Ceph commands returned an error code. + # # NB in future there may be UserRequest subclasses which want to receive + # # and handle these errors themselves, so this branch would be refactored + # # to allow that. + # log.error("Request %s experienced an error: %s" % (request.id, result['error_status'])) + # request.jid = None + # request.set_error(result['error_status']) + # request.complete() + # return + # + # try: + # request.complete_jid() + # + # # After a jid completes, requests may start waiting for cluster + # # map updates, we ask ClusterMonitor to hurry up and get them + # # on behalf of the request. + # if request.awaiting_versions: + # # The request may be waiting for an epoch that we already + # # have, if so give it to the request right away + # for sync_type, want_version in request.awaiting_versions.items(): + # data = ceph_state.get(sync_type) + # + # if want_version and sync_type.cmp(data['epoch'], + # want_version) >= 0: + # log.info( + # "Awaited %s %s is immediately available" % ( + # sync_type, want_version)) + # request.on_map(sync_type, data) + # + # except Exception as e: + # # Ensure that a misbehaving piece of code in a UserRequest subclass + # # results in a terminated job, not a zombie job + # log.exception("Calling complete_jid for %s/%s" % (request.id, request.jid)) + # request.jid = None + # request.set_error("Internal error %s" % e) + # request.complete() + + def on_completion(self, tag): + """ + Callback for when a salt/job//ret event is received, in which + we find the UserRequest that created the job, and inform it of + completion so that it can progress. + """ + with self._lock: + log.info("RequestCollection.on_completion: {0}".format(tag)) + + try: + request = self.get_by_id(tag) + except KeyError: + log.warning("on_completion: unknown tag {0}" % tag) + return + + request.rados_commands.advance() + if request.rados_commands.is_complete(): + if request.rados_commands.r == 0: + try: + request.complete_jid() + except Exception as e: + log.exception("Request %s threw exception in on_map", request.id) + request.set_error("Internal error %s" % e) + request.complete() + + # The request may be waiting for an epoch that we already have, if so + # give it to the request right away + for sync_type, want_version in request.awaiting_versions.items(): + sync_object = rest_plugin().get_sync_object(sync_type) + if want_version and sync_type.cmp(sync_object.version, want_version) >= 0: + log.info("Awaited %s %s is immediately available" % (sync_type, want_version)) + request.on_map(sync_type, sync_object) + else: + request.set_error(request.rados_commands.outs) + request.complete() diff --git a/src/pybind/mgr/calamari_rest/manager/request_factory.py b/src/pybind/mgr/calamari_rest/manager/request_factory.py new file mode 100644 index 000000000000..7ec04f8a4a07 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/manager/request_factory.py @@ -0,0 +1,24 @@ + +class RequestFactory(object): + """ + A class to generate UserRequests with commands (e.g. Ceph RADOS admin + commands) in response to C[r]UD operations. + + The mapping is sometimes very simple (e.g. delete on a pool is + just a 'ceph osd pool delete'), and sometimes more complex (e.g. + pool creation requires a 'pool create' followed by a series of + 'pool set' and/or 'pool set-quota' commands). + + """ + + def delete(self, obj_id): + raise NotImplementedError() + + def update(self, obj_id, attributes): + raise NotImplementedError() + + def create(self, attributes): + raise NotImplementedError() + + def get_valid_commands(self, obj_ids): + return {} diff --git a/src/pybind/mgr/calamari_rest/manager/user_request.py b/src/pybind/mgr/calamari_rest/manager/user_request.py new file mode 100644 index 000000000000..96c85a9e3072 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/manager/user_request.py @@ -0,0 +1,553 @@ +import json +import logging +import uuid + +from calamari_rest.types import OsdMap, PgSummary, USER_REQUEST_COMPLETE, USER_REQUEST_SUBMITTED +from calamari_rest.util import now +from mgr_module import CommandResult + +from rest import logger +log = logger() +from rest import global_instance as rest_plugin + + +class UserRequestBase(object): + """ + A request acts on one or more Ceph-managed objects, i.e. + mon, mds, osd, pg. + + Amist the terminology mess of 'jobs', 'commands', 'operations', this class + is named for clarity: it's an operation at an end-user level of + granularity, something that might be a button in the UI. + + UserRequests are usually remotely executed on a mon. However, there + may be a final step of updating the state of ClusterMonitor in order + that subsequent REST API consumer reads return values consistent with + the job having completed, e.g. waiting for the OSD map to be up + to date before calling a pool creation complete. For this reason, + UserRequests have a local ID and completion state that is independent + of their remote ID (salt jid). UserRequests may also execute more than + one JID in the course of their lifetime. + + Requests have the following lifecycle: + NEW object is created, it has all the information needed to do its job + other than where it should execute. + SUBMITTED the request has started executing, usually this will have involved sending + out a salt job, so .jid is often set but not always. + COMPLETE no further action, this instance will remain constant from this point on. + this does not indicate anything about success or failure. + """ + + NEW = 'new' + SUBMITTED = USER_REQUEST_SUBMITTED + COMPLETE = USER_REQUEST_COMPLETE + states = [NEW, SUBMITTED, COMPLETE] + + def __init__(self): + """ + Requiring cluster_name and fsid is redundant (ideally everything would + speak in terms of fsid) but convenient, because the librados interface + wants a cluster name when you create a client, and otherwise we would + have to look up via ceph.conf. + """ + # getChild isn't in 2.6 + logname = '.'.join((log.name, self.__class__.__name__)) + self.log = logging.getLogger(logname) + self.requested_at = now() + self.completed_at = None + + # This is actually kind of overkill compared with having a counter, + # somewhere but it's easy. + self.id = uuid.uuid4().__str__() + + self.state = self.NEW + self.result = None + self.error = False + self.error_message = "" + + # Time at which we last believed the current JID to be really running + self.alive_at = None + + def set_error(self, message): + self.error = True + self.error_message = message + + @property + def associations(self): + """ + A dictionary of Event-compatible assocations for this request, indicating + which cluster/server/services we are affecting. + """ + return {} + + @property + def headline(self): + """ + Single line describing what the request is trying to accomplish. + """ + raise NotImplementedError() + + @property + def status(self): + """ + Single line describing which phase of the request is currently happening, useful + to distinguish what's going on for long running operations. For simple quick + operations no need to return anything here as the headline tells all. + """ + if self.state != self.COMPLETE: + return "Running" + elif self.error: + return "Failed (%s)" % self.error_message + else: + return "Completed successfully" + + @property + def awaiting_versions(self): + """ + Requests indicate that they are waiting for particular sync objects, optionally + specifying the particular version they are waiting for (otherwise set version + to None). + + :return dict of SyncObject subclass to (version or None) + """ + return {} + + def submit(self): + """ + Start remote execution phase by publishing a job to salt. + """ + assert self.state == self.NEW + + self._submit() + + self.state = self.SUBMITTED + + def _submit(self): + raise NotImplementedError() + + def complete_jid(self): + """ + Call this when remote execution is done. + + Implementations must always update .jid appropriately here: either to the + jid of a new job, or to None. + """ + + # This is a default behaviour for UserRequests which don't override this method: + # assume completion of a JID means the job is now done. + self.complete() + + def complete(self): + """ + Call this when you're all done + """ + assert self.state != self.COMPLETE + + self.log.info("Request %s completed with error=%s (%s)" % (self.id, self.error, self.error_message)) + self.state = self.COMPLETE + self.completed_at = now() + + def on_map(self, sync_type, sync_object): + """ + It is only valid to call this for sync_types which are currently in awaiting_versions + """ + pass + + +class UserRequest(UserRequestBase): + def __init__(self, headline): + super(UserRequest, self).__init__() + self._await_version = None + self._headline = headline + + @property + def headline(self): + return self._headline + + +class RadosCommands(object): + def __init__(self, tag, commands): + self.result = None + self._tag = tag + self._commands = commands + + self.r = None + self.outs = None + self.outb = None + + def run(self): + cmd = self._commands[0] + self._commands = self._commands[1:] + self.result = CommandResult(self._tag) + + log.debug("cmd={0}".format(cmd)) + + # Commands come in as 2-tuple of args and prefix, convert them + # to the form that send_command uses + command = cmd[1] + command['prefix'] = cmd[0] + + rest_plugin().send_command(self.result, json.dumps(command), self._tag) + + def is_complete(self): + return self.result is None and not self._commands + + def advance(self): + self.r, self.outb, self.outs = self.result.wait() + self.result = None + + if self.r == 0: + if self._commands: + self.run() + else: + # Stop on errors + self._commands = [] + + +class RadosRequest(UserRequest): + """ + A user request whose remote operations consist of librados mon commands + """ + def __init__(self, headline, commands): + super(RadosRequest, self).__init__(headline) + self.rados_commands = RadosCommands(self.id, commands) + self._commands = commands + + def _submit(self, commands=None): + if commands is None: + commands = self._commands + else: + commands = commands + [["osd stat", {"format": "json-pretty"}]] + self.rados_commands = RadosCommands(self.id, commands) + + self.rados_commands.run() + + self.log.info("Request %s started" % (self.id,)) + self.alive_at = now() + + return self.id + + +class OsdMapModifyingRequest(RadosRequest): + """ + Specialization of UserRequest which waits for Calamari's copy of + the OsdMap sync object to catch up after execution of RADOS commands. + """ + + def __init__(self, headline, commands): + commands = commands + [["osd stat", {"format": "json-pretty"}]] + + super(OsdMapModifyingRequest, self).__init__(headline, commands) + self._await_version = None + + # FIXME: would be nice to make all ceph command return epochs + # on completion, so we don't always do this to find out what + # epoch to wait for to see results of command + # FIXME: OR we could enforce that the C++ layer of ceph-mgr should + # always wait_for_latest before passing notifications to pythno land + + + @property + def status(self): + if self.state != self.COMPLETE and self._await_version: + return "Waiting for OSD map epoch %s" % self._await_version + else: + return super(OsdMapModifyingRequest, self).status + + @property + def associations(self): + return { + } + + @property + def awaiting_versions(self): + if self._await_version and self.state != self.COMPLETE: + return { + OsdMap: self._await_version + } + else: + return {} + + def complete_jid(self): + # My remote work is done, record the version of the map that I will wait for + # and start waiting for it. + log.debug("decoding outb: '{0}'".format(self.rados_commands.outb)) + self._await_version = json.loads(self.rados_commands.outb)['epoch'] + + def on_map(self, sync_type, osd_map): + assert sync_type == OsdMap + assert self._await_version is not None + + ready = osd_map.version >= self._await_version + if ready: + self.log.debug("check passed (%s >= %s)" % (osd_map.version, self._await_version)) + self.complete() + else: + self.log.debug("check pending (%s < %s)" % (osd_map.version, self._await_version)) + + +class PoolCreatingRequest(OsdMapModifyingRequest): + """ + Like an OsdMapModifyingRequest, but additionally wait for all PGs in the resulting pool + to leave state 'creating' before completing. + """ + + def __init__(self, headline, pool_name, commands): + super(PoolCreatingRequest, self).__init__(headline, commands) + self._awaiting_pgs = False + self._pool_name = pool_name + + self._pool_id = None + self._pg_count = None + + @property + def awaiting_versions(self): + if self._awaiting_pgs: + return {PgSummary: None} + elif self._await_version: + return {OsdMap: self._await_version} + else: + return {} + + def on_map(self, sync_type, sync_object): + if self._awaiting_pgs: + assert sync_type == PgSummary + pg_summary = sync_object + pgs_not_creating = 0 + for state_tuple, count in pg_summary.data['by_pool'][self._pool_id.__str__()].items(): + states = state_tuple.split("+") + if 'creating' not in states: + pgs_not_creating += count + + if pgs_not_creating >= self._pg_count: + self.complete() + + elif self._await_version: + assert sync_type == OsdMap + osd_map = sync_object + if osd_map.version >= self._await_version: + for pool_id, pool in osd_map.pools_by_id.items(): + if pool['pool_name'] == self._pool_name: + self._pool_id = pool_id + self._pg_count = pool['pg_num'] + break + + if self._pool_id is None: + log.error("'{0}' not found, pools are {1}".format( + self._pool_name, [p['pool_name'] for p in osd_map.pools_by_id.values()] + )) + self.set_error("Expected pool '{0}' not found".format(self._pool_name)) + self.complete() + + self._awaiting_pgs = True + else: + raise NotImplementedError("Unexpected map {0}".format(sync_type)) + + +class PgProgress(object): + """ + Encapsulate the state that PgCreatingRequest uses for splitting up + creation operations into blocks. + """ + def __init__(self, initial, final, block_size): + self.initial = initial + self.final = final + self._block_size = block_size + + self._still_to_create = self.final - self.initial + + self._intermediate_goal = self.initial + if self._still_to_create > 0: + self.advance_goal() + + def advance_goal(self): + assert not self.is_final_block() + self._intermediate_goal = min(self.final, self._intermediate_goal + self._block_size) + + def set_created_pg_count(self, pg_count): + self._still_to_create = max(self.final - pg_count, 0) + + def get_status(self): + total_creating = (self.final - self.initial) + created = total_creating - self._still_to_create + + if self._intermediate_goal != self.final: + currently_creating_min = max(self._intermediate_goal - self._block_size, self.initial) + currently_creating_max = self._intermediate_goal + return "Waiting for PG creation (%s/%s), currently creating PGs %s-%s" % ( + created, total_creating, currently_creating_min, currently_creating_max) + else: + return "Waiting for PG creation (%s/%s)" % (created, total_creating) + + def expected_count(self): + """ + After a successful 'osd pool set' operation, what should pg_num be? + """ + return self._intermediate_goal + + def is_final_block(self): + """ + Is the current expansion under way the final one? + """ + return self._intermediate_goal == self.final + + def is_complete(self): + """ + Have all expected PGs been created? + """ + return self._still_to_create == 0 + + @property + def goal(self): + return self._intermediate_goal + + +class PgCreatingRequest(OsdMapModifyingRequest): + """ + Specialization of OsdMapModifyingRequest to issue a request + to issue a second set of commands after PGs created by an + initial set of commands have left the 'creating' state. + + This handles issuing multiple smaller "osd pool set pg_num" calls when + the number of new PGs requested is greater than mon_osd_max_split_count, + caller is responsible for telling us how many we may create at once. + """ + + # Simple state machine for phases: + # - have send a job, waiting for JID to complete + # - a jid completed, waiting for corresponding OSD map update + # - OSD map has updated, waiting for created PGs to leave state 'creating' + JID_WAIT = 'jid_wait' + OSD_MAP_WAIT = 'osd_map_wait' + PG_MAP_WAIT = 'pg_map_wait' + + def __init__(self, headline, commands, + pool_id, pool_name, pgp_num, + initial_pg_count, final_pg_count, block_size): + """ + :param commands: Commands to execute before creating PGs + :param initial_pg_count: How many PGs the pool has before we change anything + :param final_pg_count: How many PGs the pool should have when we are done + :param block_size: How many PGs we may create in one "osd pool set" command + """ + + self._await_osd_version = None + + self._pool_id = pool_id + self._pool_name = pool_name + self._headline = headline + + self._pg_progress = PgProgress(initial_pg_count, final_pg_count, block_size) + if initial_pg_count != final_pg_count: + commands.append(('osd pool set', { + 'pool': self._pool_name, + 'var': 'pg_num', + 'val': self._pg_progress.goal + })) + self._post_create_commands = [("osd pool set", {'pool': pool_name, 'var': 'pgp_num', 'val': pgp_num})] + + super(PgCreatingRequest, self).__init__(headline, commands) + self._phase = self.JID_WAIT + + @property + def status(self): + if not self.state == self.COMPLETE and not self._pg_progress.is_complete(): + return self._pg_progress.get_status() + else: + return super(PgCreatingRequest, self).status + + def complete_jid(self): + self._await_version = json.loads(self.rados_commands.outb)['epoch'] + self._phase = self.OSD_MAP_WAIT + + @property + def awaiting_versions(self): + if self._phase == self.JID_WAIT: + return {} + elif self._phase == self.OSD_MAP_WAIT: + return { + OsdMap: self._await_version + } + elif self._phase == self.PG_MAP_WAIT: + return { + PgSummary: None, + OsdMap: None + } + + def on_map(self, sync_type, sync_object): + self.log.debug("PgCreatingRequest %s %s" % (sync_type.str, self._phase)) + if self._phase == self.PG_MAP_WAIT: + if sync_type == PgSummary: + # Count the PGs in this pool which are not in state 'creating' + pg_summary = sync_object + pgs_not_creating = 0 + + for state_tuple, count in pg_summary.data['by_pool'][self._pool_id.__str__()].items(): + states = state_tuple.split("+") + if 'creating' not in states: + pgs_not_creating += count + + self._pg_progress.set_created_pg_count(pgs_not_creating) + self.log.debug("PgCreatingRequest.on_map: pg_counter=%s/%s (final %s)" % ( + pgs_not_creating, self._pg_progress.goal, self._pg_progress.final)) + if pgs_not_creating >= self._pg_progress.goal: + if self._pg_progress.is_final_block(): + self.log.debug("PgCreatingRequest.on_map Creations complete") + if self._post_create_commands: + self.log.debug("PgCreatingRequest.on_map Issuing post-create commands") + self._submit(self._post_create_commands) + self._phase = self.JID_WAIT + else: + self.log.debug("PgCreatingRequest.on_map All done") + self.complete() + else: + self.log.debug("PgCreatingREQUEST.on_map Issuing more creates") + self._pg_progress.advance_goal() + # Request another tranche of PGs up to _block_size + self._submit([('osd pool set', { + 'pool': self._pool_name, + 'var': 'pg_num', + 'val': self._pg_progress.goal + })]) + self._phase = self.JID_WAIT + elif sync_type == OsdMap: + # Keep an eye on the OsdMap to check that pg_num is what we expect: otherwise + # if forces of darkness changed pg_num then our PG creation check could + # get confused and fail to complete. + osd_map = sync_object + pool = osd_map.pools_by_id[self._pool_id] + if pool['pg_num'] != self._pg_progress.expected_count(): + self.set_error("PG creation interrupted (unexpected change to pg_num)") + self.complete() + return + else: + raise NotImplementedError("Unexpected map {1} in state {2}".format( + sync_type, self._phase + )) + + elif self._phase == self.OSD_MAP_WAIT: + # Read back the pg_num for my pool from the OSD map + osd_map = sync_object + pool = osd_map.pools_by_id[self._pool_id] + + # In Ceph <= 0.67.7, "osd pool set pg_num" will return success even if it hasn't + # really increased pg_num, so we must examine the OSD map to see if it really succeded + if pool['pg_num'] != self._pg_progress.expected_count(): + self.set_error("PG creation failed (check that there aren't already PGs in 'creating' state)") + self.complete() + return + + assert self._await_version + ready = osd_map.version >= self._await_version + if ready: + # OSD map advancement either means a PG creation round completed, or that + # the post_create_commands completed. Distinguish by looking at pg_progress. + if self._pg_progress.is_complete(): + # This was the OSD map update from the post_create_commands, we we're all done! + self.complete() + else: + # This was the OSD map update from a PG creation command, so start waiting + # for the pgs + self._phase = self.PG_MAP_WAIT + else: + raise NotImplementedError("Unexpected {0} in phase {1}".format(sync_type, self._phase)) diff --git a/src/pybind/mgr/calamari_rest/models.py b/src/pybind/mgr/calamari_rest/models.py new file mode 100644 index 000000000000..43ae30b45866 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/models.py @@ -0,0 +1,2 @@ + +# This file just exists to make manage.py happy diff --git a/src/pybind/mgr/calamari_rest/serializers/__init__.py b/src/pybind/mgr/calamari_rest/serializers/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/pybind/mgr/calamari_rest/serializers/fields.py b/src/pybind/mgr/calamari_rest/serializers/fields.py new file mode 100644 index 000000000000..743feb7b9522 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/serializers/fields.py @@ -0,0 +1,47 @@ +from rest_framework import serializers +from rest_framework import fields + + +if False: + class BooleanField(serializers.BooleanField): + """ + Version of BooleanField which handles fields which are 1,0 + """ + def to_native(self, value): + if isinstance(value, int) and value in [0, 1]: + return bool(value) + else: + super(BooleanField, self).to_native(value) +else: + # rest-framework 3 booleanfield handles 0, 1 + BooleanField = fields.BooleanField + + +if False: + class UuidField(serializers.CharField): + """ + For strings like Ceph service UUIDs and Ceph cluster FSIDs + """ + type_name = "UuidField" + type_label = "uuid string" +else: + # rest-framework 3 has built in uuid field. + UuidField = fields.UUIDField + +if False: + class EnumField(serializers.CharField): + def __init__(self, mapping, *args, **kwargs): + super(EnumField, self).__init__(*args, **kwargs) + self.mapping = mapping + self.reverse_mapping = dict([(v, k) for (k, v) in self.mapping.items()]) + if self.help_text: + self.help_text += " (one of %s)" % ", ".join(self.mapping.values()) + + def from_native(self, value): + return self.reverse_mapping.get(value, value) + + def to_native(self, value): + return self.mapping.get(value, value) +else: + #rest-framework 3 has ChoiceField + EnumField = fields.ChoiceField diff --git a/src/pybind/mgr/calamari_rest/serializers/v1.py b/src/pybind/mgr/calamari_rest/serializers/v1.py new file mode 100644 index 000000000000..f5f1f777e149 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/serializers/v1.py @@ -0,0 +1,200 @@ + +from django.contrib.auth.models import User +from django.utils import dateformat + +from rest_framework import serializers +import dateutil.parser + + +def to_unix(t): + if t is None: + return None + return int(dateformat.format(t, 'U')) * 1000 + + +class ClusterSerializer(serializers.Serializer): + class Meta: + fields = ('cluster_update_time', 'cluster_update_time_unix', 'id', 'name') + + cluster_update_time = serializers.SerializerMethodField('get_update_time') + name = serializers.Field() + id = serializers.Field() + + # FIXME: we should not be sending out time in two formats: if API consumers want + # unix timestamps they can do the conversion themselves. + cluster_update_time_unix = serializers.SerializerMethodField('get_update_time_unix') + + def get_update_time(self, obj): + return obj.update_time + + def get_update_time_unix(self, obj): + update_time = dateutil.parser.parse(obj.update_time) + return to_unix(update_time) + + # NB calamari 1.0 had cluster_atttempt_time, which no longer makes sense + # because we're listening for events, not polling. TODO: expunge from GUI code. + + +class UserSerializer(serializers.ModelSerializer): + """ + Serializer for the Django User model. + + Used to expose a django-rest-framework user management resource. + """ + class Meta: + model = User + fields = ('id', 'username', 'password', 'email') + + def to_native(self, obj): + # Before conversion, remove the password field. This prevents the hash + # from being displayed when requesting user details. + if 'password' in self.fields: + del self.fields['password'] + return super(UserSerializer, self).to_native(obj) + + def restore_object(self, attrs, instance=None): + user = super(UserSerializer, self).restore_object(attrs, instance) + if user: + # This will perform the Django-specific password obfuscation + user.set_password(attrs['password']) + return user + + +class ClusterSpaceSerializer(serializers.Serializer): + space = serializers.Field() + + class Meta: + fields = ('space',) + + +class ClusterHealthSerializer(serializers.Serializer): + report = serializers.Field() + + class Meta: + fields = ('report', 'cluster_update_time', 'cluster_update_time_unix') + + # FIXME: should not be copying this field onto health counters etc, clients should get + # it by querying the cluster directly. + cluster_update_time = serializers.Field() + cluster_update_time_unix = serializers.SerializerMethodField('get_cluster_update_time_unix') + + def get_cluster_update_time_unix(self, obj): + update_time = dateutil.parser.parse(obj.cluster_update_time) + return to_unix(update_time) + + +class ClusterHealthCountersSerializer(serializers.Serializer): + pg = serializers.SerializerMethodField('get_pg') + mds = serializers.SerializerMethodField('get_mds') + mon = serializers.SerializerMethodField('get_mon') + osd = serializers.SerializerMethodField('get_osd') + + class Meta: + fields = ('pg', 'mds', 'mon', 'osd', 'cluster_update_time', 'cluster_update_time_unix') + + def get_pg(self, obj): + return obj.counters['pg'] + + def get_mds(self, obj): + return obj.counters['mds'] + + def get_mon(self, obj): + return obj.counters['mon'] + + def get_osd(self, obj): + return obj.counters['osd'] + + # FIXME: should not be copying this field onto health counters etc, clients should get + # it by querying the cluster directly. + cluster_update_time = serializers.Field() + cluster_update_time_unix = serializers.SerializerMethodField('get_cluster_update_time_unix') + + def get_cluster_update_time_unix(self, obj): + update_time = dateutil.parser.parse(obj.cluster_update_time) + return to_unix(update_time) + + +class OSDDetailSerializer(serializers.Serializer): + class Meta: + # FIXME: should just be returning the OSD as the object + fields = ('osd',) + + osd = serializers.Field() + + +class OSDListSerializer(serializers.Serializer): + # TODO: the OSD list resource should just return a list, so that + # this serializer class isn't necessary + osds = serializers.Field() + pg_state_counts = serializers.SerializerMethodField('get_pg_state_counts') + + def get_pg_state_counts(self, obj): + return dict((s, len(v)) for s, v in obj.osds_by_pg_state.iteritems()) + + class Meta: + fields = ('osds', 'pg_state_counts') + + +class PoolSerializer(serializers.Serializer): + class Meta: + fields = ('pool_id', 'name', 'quota_max_bytes', 'quota_max_objects', 'used_objects', 'used_bytes', 'id', 'cluster') + + id = serializers.IntegerField() + cluster = serializers.CharField() + pool_id = serializers.IntegerField() + name = serializers.CharField() + quota_max_bytes = serializers.IntegerField() + quota_max_objects = serializers.IntegerField() + used_objects = serializers.IntegerField() + used_bytes = serializers.IntegerField() + + +class ServiceStatusSerializer(serializers.Serializer): + class Meta: + fields = ('type', 'service_id', 'name') + + type = serializers.SerializerMethodField('get_type') + service_id = serializers.SerializerMethodField('get_service_id') + name = serializers.SerializerMethodField('get_name') + + def get_type(self, obj): + return obj['id'][1] + + def get_service_id(self, obj): + return obj['id'][2] + + def get_name(self, obj): + return "%s.%s" % (self.get_type(obj), self.get_service_id(obj)) + + +class ServerSerializer(serializers.Serializer): + class Meta: + fields = ('addr', 'hostname', 'name', 'services') + + services = ServiceStatusSerializer(source='services', many=True) + + addr = serializers.SerializerMethodField('get_addr') + hostname = serializers.CharField() + name = serializers.SerializerMethodField('get_name') + + def get_name(self, obj): + return obj.hostname + + def get_addr(self, obj): + return obj.fqdn + + +class InfoSerializer(serializers.Serializer): + class Meta: + fields = ('version', 'license', 'registered', 'hostname', 'fqdn', 'ipaddr', 'bootstrap_url', 'bootstrap_rhel', + 'bootstrap_ubuntu') + + version = serializers.CharField(help_text="Calamari server version") + license = serializers.CharField(help_text="Calamari license metadata") + registered = serializers.CharField(help_text="Calamari registration metadata") + hostname = serializers.CharField(help_text="Hostname of Calamari server") + fqdn = serializers.CharField(help_text="Fully qualified domain name of Calamari server") + ipaddr = serializers.CharField(help_text="IP address of Calamari server") + bootstrap_url = serializers.CharField(help_text="URL to minion bootstrap script") + bootstrap_rhel = serializers.CharField(help_text="Minion bootstrap command line for Red Hat systems") + bootstrap_ubuntu = serializers.CharField(help_text="Minion bootstrap command line for Ubuntu systems") diff --git a/src/pybind/mgr/calamari_rest/serializers/v2.py b/src/pybind/mgr/calamari_rest/serializers/v2.py new file mode 100644 index 000000000000..5ca7a089018a --- /dev/null +++ b/src/pybind/mgr/calamari_rest/serializers/v2.py @@ -0,0 +1,386 @@ +from rest_framework import serializers +import calamari_rest.serializers.fields as fields +from calamari_rest.types import CRUSH_RULE_TYPE_REPLICATED, \ + CRUSH_RULE_TYPE_ERASURE, USER_REQUEST_COMPLETE, \ + USER_REQUEST_SUBMITTED, OSD_FLAGS, severity_str, SEVERITIES + + +class ValidatingSerializer(serializers.Serializer): + # django rest framework >= 3 renamed this field + @property + def init_data(self): + return self.initial_data + + def is_valid(self, http_method): + if False: + self._errors = super(ValidatingSerializer, self).errors or {} + else: + # django rest framework >= 3 has different is_Valid prototype + # than <= 2 + super(ValidatingSerializer, self).is_valid(False) + + if self.init_data is not None: + if http_method == 'POST': + self._errors.update( + self.construct_errors(self.Meta.create_allowed, + self.Meta.create_required, + self.init_data.keys(), + http_method)) + + elif http_method in ('PATCH', 'PUT'): + self._errors.update( + self.construct_errors(self.Meta.modify_allowed, + self.Meta.modify_required, + self.init_data.keys(), + http_method)) + else: + self._errors.update([[http_method, 'Not a valid method']]) + + return not self._errors + + def construct_errors(self, allowed, required, init_data, action): + errors = {} + + not_allowed = set(init_data) - set(allowed) + errors.update( + dict([x, 'Not allowed during %s' % action] for x in not_allowed)) + + required = set(required) - set(init_data) + errors.update( + dict([x, 'Required during %s' % action] for x in required)) + + return errors + + def get_data(self): + # like http://www.django-rest-framework.org/api-guide/serializers#dynamically-modifying-fields + filtered_data = {} + for field, value in self.init_data.iteritems(): + filtered_data[field] = self.data[field] + + return filtered_data + + +class ClusterSerializer(serializers.Serializer): + class Meta: + fields = ('update_time', 'id', 'name') + + update_time = serializers.DateTimeField( + help_text="The time at which the last status update from this cluster was received" + ) + name = serializers.Field( + help_text="Human readable cluster name, not a unique identifier" + ) + id = serializers.Field( + help_text="The FSID of the cluster, universally unique" + ) + + +class PoolSerializer(ValidatingSerializer): + class Meta: + fields = ('name', 'id', 'size', 'pg_num', 'crush_ruleset', 'min_size', + 'crash_replay_interval', 'crush_ruleset', + 'pgp_num', 'hashpspool', 'full', 'quota_max_objects', + 'quota_max_bytes') + create_allowed = ('name', 'pg_num', 'pgp_num', 'size', 'min_size', + 'crash_replay_interval', 'crush_ruleset', + 'quota_max_objects', 'quota_max_bytes', 'hashpspool') + create_required = ('name', 'pg_num') + modify_allowed = ('name', 'pg_num', 'pgp_num', 'size', 'min_size', + 'crash_replay_interval', 'crush_ruleset', + 'quota_max_objects', 'quota_max_bytes', 'hashpspool') + modify_required = () + + # Required in creation + name = serializers.CharField(required=False, source='pool_name', + help_text="Human readable name of the pool, may" + "change over the pools lifetime at user request.") + pg_num = serializers.IntegerField(required=False, + help_text="Number of placement groups in this pool") + + # Not required in creation, immutable + id = serializers.CharField(source='pool', required=False, + help_text="Unique numeric ID") + + # May be set in creation or updates + size = serializers.IntegerField(required=False, + help_text="Replication factor") + min_size = serializers.IntegerField(required=False, + help_text="Minimum number of replicas required for I/O") + crash_replay_interval = serializers.IntegerField(required=False, + help_text="Number of seconds to allow clients to " + "replay acknowledged, but uncommitted requests") + crush_ruleset = serializers.IntegerField(required=False, + help_text="CRUSH ruleset in use") + # In 'ceph osd pool set' it's called pgp_num, but in 'ceph osd dump' it's called + # pg_placement_num :-/ + pgp_num = serializers.IntegerField(source='pg_placement_num', + required=False, + help_text="Effective number of placement groups to use when calculating " + "data placement") + + # This is settable by 'ceph osd pool set' but in 'ceph osd dump' it only appears + # within the 'flags' integer. We synthesize a boolean from the flags. + hashpspool = serializers.BooleanField(required=False, + help_text="Enable HASHPSPOOL flag") + + # This is synthesized from ceph's 'flags' attribute, read only. + full = serializers.BooleanField(required=False, + help_text="True if the pool is full") + + quota_max_objects = serializers.IntegerField(required=False, + help_text="Quota limit on object count (0 is unlimited)") + quota_max_bytes = serializers.IntegerField(required=False, + help_text="Quota limit on usage in bytes (0 is unlimited)") + + +class OsdSerializer(ValidatingSerializer): + class Meta: + fields = ('uuid', 'up', 'in', 'id', 'reweight', 'server', 'pools', + 'valid_commands', 'public_addr', 'cluster_addr') + create_allowed = () + create_required = () + modify_allowed = ('up', 'in', 'reweight') + modify_required = () + + id = serializers.IntegerField(read_only=True, source='osd', + help_text="ID of this OSD within this cluster") + uuid = fields.UuidField(read_only=True, + help_text="Globally unique ID for this OSD") + up = fields.BooleanField(required=False, + help_text="Whether the OSD is running from the point of view of the rest of the cluster") + _in = fields.BooleanField(required=False, + help_text="Whether the OSD is 'in' the set of OSDs which will be used to store data") + reweight = serializers.FloatField(required=False, + help_text="CRUSH weight factor") + server = serializers.CharField(read_only=True, + help_text="FQDN of server this OSD was last running on") + pools = serializers.ListField( + help_text="List of pool IDs which use this OSD for storage", + required=False) + valid_commands = serializers.CharField(read_only=True, + help_text="List of commands that can be applied to this OSD") + + public_addr = serializers.CharField(read_only=True, + help_text="Public/frontend IP address") + cluster_addr = serializers.CharField(read_only=True, + help_text="Cluster/backend IP address") + + +class OsdConfigSerializer(ValidatingSerializer): + class Meta: + fields = OSD_FLAGS + create_allowed = () + create_required = () + modify_allowed = OSD_FLAGS + modify_required = () + + pause = serializers.BooleanField( + help_text="Disable IO requests to all OSDs in cluster", required=False) + noup = serializers.BooleanField( + help_text="Prevent OSDs from automatically getting marked as Up by the monitors. This setting is useful for troubleshooting", + required=False) + nodown = serializers.BooleanField( + help_text="Prevent OSDs from automatically getting marked as Down by the monitors. This setting is useful for troubleshooting", + required=False) + noout = serializers.BooleanField( + help_text="Prevent Down OSDs from being marked as out", required=False) + noin = serializers.BooleanField( + help_text="Prevent OSDs from booting OSDs from being marked as IN. Will cause cluster health to be set to WARNING", + required=False) + nobackfill = serializers.BooleanField( + help_text="Disable backfill operations on cluster", required=False) + norecover = serializers.BooleanField( + help_text="Disable replication of Placement Groups", required=False) + noscrub = serializers.BooleanField( + help_text="Disables automatic periodic scrub operations on OSDs. May still be initiated on demand", + required=False) + nodeepscrub = serializers.BooleanField( + help_text="Disables automatic periodic deep scrub operations on OSDs. May still be initiated on demand", + required=False) + + +class CrushRuleSerializer(serializers.Serializer): + class Meta: + fields = ( + 'id', 'name', 'ruleset', 'type', 'min_size', 'max_size', 'steps', + 'osd_count') + + id = serializers.IntegerField(source='rule_id') + name = serializers.CharField(source='rule_name', + help_text="Human readable name") + ruleset = serializers.IntegerField( + help_text="ID of the CRUSH ruleset of which this rule is a member") + type = fields.EnumField({CRUSH_RULE_TYPE_REPLICATED: 'replicated', + CRUSH_RULE_TYPE_ERASURE: 'erasure'}, + help_text="Data redundancy type") + min_size = serializers.IntegerField( + help_text="If a pool makes more replicas than this number, CRUSH will NOT select this rule") + max_size = serializers.IntegerField( + help_text="If a pool makes fewer replicas than this number, CRUSH will NOT select this rule") + steps = serializers.ListField( + help_text="List of operations used to select OSDs") + osd_count = serializers.IntegerField( + help_text="Number of OSDs which are used for data placement") + + +class CrushRuleSetSerializer(serializers.Serializer): + class Meta: + fields = ('id', 'rules') + + id = serializers.IntegerField() + rules = CrushRuleSerializer(many=True) + + +class RequestSerializer(serializers.Serializer): + class Meta: + fields = ( + 'id', 'state', 'error', 'error_message', 'headline', 'status', + 'requested_at', 'completed_at') + + id = serializers.CharField( + help_text="A globally unique ID for this request") + state = serializers.CharField( + help_text="One of '{complete}', '{submitted}'".format( + complete=USER_REQUEST_COMPLETE, submitted=USER_REQUEST_SUBMITTED)) + error = serializers.BooleanField( + help_text="True if the request completed unsuccessfully") + error_message = serializers.CharField( + help_text="Human readable string describing failure if ``error`` is True") + headline = serializers.CharField( + help_text="Single sentence human readable description of the request") + status = serializers.CharField( + help_text="Single sentence human readable description of the request's current " + "activity, if it has more than one stage. May be null.") + requested_at = serializers.DateTimeField( + help_text="Time at which the request was received by calamari server") + completed_at = serializers.DateTimeField( + help_text="Time at which the request completed, may be null.") + + +class SaltKeySerializer(ValidatingSerializer): + class Meta: + fields = ('id', 'status') + create_allowed = () + create_required = () + modify_allowed = ('status',) + modify_required = () + + id = serializers.CharField(required=False, + help_text="The minion ID, usually equal to a host's FQDN") + status = serializers.CharField( + help_text="One of 'accepted', 'rejected' or 'pre'") + + +class ServiceSerializer(serializers.Serializer): + class Meta: + fields = ('type', 'id') + + type = serializers.CharField() + id = serializers.CharField() + + +class ServerSerializer(serializers.Serializer): + class Meta: + fields = ('hostname', 'services', 'ceph_version') + + # Identifying information + hostname = serializers.CharField(help_text="Domain name") + + ceph_version = serializers.CharField( + help_text="The version of Ceph installed." + ) + services = ServiceSerializer(many=True, + help_text="List of Ceph services seen" + "on this server") + + # Ceph network configuration + # frontend_addr = serializers.CharField() # may be null if no OSDs or mons on server + # backend_addr = serializers.CharField() # may be null if no OSDs on server + + # TODO: reinstate by having OSDs resolve addresses to ifaces and report + # in their metadata + # frontend_iface = serializers.CharField() # may be null if interface for frontend addr not up + # backend_iface = serializers.CharField() # may be null if interface for backend addr not up + + +class EventSerializer(serializers.Serializer): + class Meta: + fields = ('when', 'severity', 'message') + + when = serializers.DateTimeField( + help_text="Time at which event was generated") + severity = serializers.SerializerMethodField('get_severity') + message = serializers.CharField( + help_text="One line human readable description") + + def get_severity(self, obj): + return severity_str(obj.severity) + + +class LogTailSerializer(serializers.Serializer): + """ + Trivial serializer to wrap a string blob of log output + """ + + class Meta: + fields = ('lines',) + + lines = serializers.CharField( + help_text="Retrieved log data as a newline-separated string") + + +class ConfigSettingSerializer(serializers.Serializer): + class Meta: + fields = ('key', 'value') + + # This is very simple for now, but later we may add more things like + # schema information, allowed values, defaults. + + key = serializers.CharField(help_text="Name of the configuration setting") + value = serializers.CharField( + help_text="Current value of the setting, as a string") + + +class MonSerializer(serializers.Serializer): + class Meta: + fields = ('name', 'rank', 'in_quorum', 'server', 'addr') + + name = serializers.CharField(help_text="Human readable name") + rank = serializers.IntegerField( + help_text="Unique of the mon within the cluster") + in_quorum = serializers.BooleanField( + help_text="True if the mon is a member of current quorum") + server = serializers.CharField( + help_text="Hostname of server running the OSD") + addr = serializers.CharField(help_text="IP address of monitor service") + leader = serializers.BooleanField( + help_text="True if this monitor is the leader of the quorum. False otherwise") + + +class CliSerializer(serializers.Serializer): + class Meta: + fields = ('out', 'err', 'status') + + out = serializers.CharField(help_text="Standard out") + err = serializers.CharField(help_text="Standard error") + status = serializers.IntegerField(help_text="Exit code") + + +# Declarative metaclass definitions are great until you want +# to use a reserved word +if False: + # In django-rest-framework 2.3.x (Calamari used this) + OsdSerializer.base_fields['in'] = OsdSerializer.base_fields['_in'] + OsdConfigSerializer.base_fields['nodeep-scrub'] = \ + OsdConfigSerializer.base_fields['nodeepscrub'] + # django_rest_framework 2.3.12 doesn't let me put help_text on a methodfield + # https://github.com/tomchristie/django-rest-framework/pull/1594 + EventSerializer.base_fields['severity'].help_text = "One of %s" % ",".join( + SEVERITIES.values()) +else: + OsdSerializer._declared_fields['in'] = OsdSerializer._declared_fields[ + '_in'] + OsdConfigSerializer._declared_fields['nodeep-scrub'] = \ + OsdConfigSerializer._declared_fields['nodeepscrub'] + EventSerializer._declared_fields[ + 'severity'].help_text = "One of %s" % ",".join(SEVERITIES.values()) diff --git a/src/pybind/mgr/calamari_rest/settings.py b/src/pybind/mgr/calamari_rest/settings.py new file mode 100644 index 000000000000..54c94f91bb7b --- /dev/null +++ b/src/pybind/mgr/calamari_rest/settings.py @@ -0,0 +1,174 @@ +# Django settings for calamari project. + + +from calamari_rest.config import CalamariConfig +config = CalamariConfig() + +DEBUG = False +TEMPLATE_DEBUG = DEBUG + +ADMINS = ( + # ('Your Name', 'your_email@example.com'), +) + +MANAGERS = ADMINS + +# No database, no problem! +DATABASES = { +} + +# Hosts/domain names that are valid for this site; required if DEBUG is False +# See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts +ALLOWED_HOSTS = ['*'] + +# Local time zone for this installation. Choices can be found here: +# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name +# although not all choices may be available on all operating systems. +# In a Windows environment this must be set to your system time zone. +TIME_ZONE = 'America/Chicago' + +# Language code for this installation. All choices can be found here: +# http://www.i18nguy.com/unicode/language-identifiers.html +LANGUAGE_CODE = 'en-us' + +SITE_ID = 1 + +# If you set this to False, Django will make some optimizations so as not +# to load the internationalization machinery. +USE_I18N = True + +# If you set this to False, Django will not format dates, numbers and +# calendars according to the current locale. +USE_L10N = True + +# If you set this to False, Django will not use timezone-aware datetimes. +USE_TZ = True + +# Absolute filesystem path to the directory that will hold user-uploaded files. +# Example: "/var/www/example.com/media/" +MEDIA_ROOT = '' + +# URL that handles the media served from MEDIA_ROOT. Make sure to use a +# trailing slash. +# Examples: "http://example.com/media/", "http://media.example.com/" +MEDIA_URL = '' + +APPEND_SLASH = False + +# Absolute path to the directory static files should be collected to. +# Don't put anything in this directory yourself; store your static files +# in apps' "static/" subdirectories and in STATICFILES_DIRS. +# Example: "/var/www/example.com/static/" +#STATIC_ROOT = config.get('calamari_web', 'static_root') +STATIC_ROOT = None + +# URL prefix for static files. +# Example: "http://example.com/static/", "http://static.example.com/" +STATIC_URL = '/static/' + +# Additional locations of static files +STATICFILES_DIRS = tuple() + +# List of finder classes that know how to find static files in +# various locations. +STATICFILES_FINDERS = ( + 'django.contrib.staticfiles.finders.FileSystemFinder', + 'django.contrib.staticfiles.finders.AppDirectoriesFinder', +) + +# Generate at runtime, because we're not persisting anything we can +# change SECRET_KEY every time we load. +from django.utils.crypto import get_random_string +chars = 'abcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*(-_=+)' +SECRET_KEY = get_random_string(50, chars) + +LOGIN_URL = '/login/' + +# List of callables that know how to import templates from various sources. +TEMPLATE_LOADERS = ( + 'django.template.loaders.filesystem.Loader', + 'django.template.loaders.app_directories.Loader', + # 'django.template.loaders.eggs.Loader', +) + +CSRF_COOKIE_NAME = "XSRF-TOKEN" +SESSION_COOKIE_NAME = "calamari_sessionid" + +MIDDLEWARE_CLASSES = ( + 'django.middleware.common.CommonMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + #'django.middleware.csrf.CsrfViewMiddleware', + #'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + # Uncomment the next line for simple clickjacking protection: + # 'django.middleware.clickjacking.XFrameOptionsMiddleware', +) + +ROOT_URLCONF = 'calamari_rest.urls' + +# Python dotted path to the WSGI application used by Django's runserver. +#WSGI_APPLICATION = 'calamari_rest.wsgi.application' + +INSTALLED_APPS = ( + #'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.sites', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'rest_framework', + 'calamari_rest' +) + +# A sample logging configuration. The only tangible logging +# performed by this configuration is to send an email to +# the site admins on every HTTP 500 error when DEBUG=False. +# See http://docs.djangoproject.com/en/dev/topics/logging for +# more details on how to customize your logging configuration. +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'simple': { + 'format': "%(asctime)s - %(levelname)s - %(name)s %(message)s" + } + }, + 'handlers': { + 'log_file': { + 'class': 'logging.handlers.WatchedFileHandler', + 'filename': + # FIXME: populate configuration like log path from up in + # the C++ layer where we have our ceph config_opts + "/tmp/pyfoo.log", + 'formatter': 'simple' + }, + }, + 'loggers': { + 'django.request': { + 'handlers': ['log_file'], + # FIXME: populate log level from C++-land configuration + 'level': "DEBUG", + 'propagate': True, + }, + } +} + + +REST_FRAMEWORK = { + #'DEFAULT_AUTHENTICATION_CLASSES': ( + # 'rest.APIKeyAuthentication', + #), + + # Use hyperlinked styles by default. + # Only used if the `serializer_class` attribute is not set on a view. + 'DEFAULT_MODEL_SERIALIZER_CLASS': + 'rest_framework.serializers.HyperlinkedModelSerializer', + + # Use Django's standard `django.contrib.auth` permissions, + # or allow read-only access for unauthenticated users. + #'DEFAULT_PERMISSION_CLASSES': [ + # 'rest_framework.permissions.IsAuthenticated' + #] +} + + diff --git a/src/pybind/mgr/calamari_rest/types.py b/src/pybind/mgr/calamari_rest/types.py new file mode 100644 index 000000000000..ed03b1452bdb --- /dev/null +++ b/src/pybind/mgr/calamari_rest/types.py @@ -0,0 +1,271 @@ +from collections import namedtuple +from calamari_rest.util import memoize + +from rest import logger +log = logger() + + +CRUSH_RULE_TYPE_REPLICATED = 1 +CRUSH_RULE_TYPE_ERASURE = 3 + + +ServiceId = namedtuple('ServiceId', ['fsid', 'service_type', 'service_id']) + + +MON = 'mon' +OSD = 'osd' +MDS = 'mds' +POOL = 'pool' +OSD_MAP = 'osd_map' +CRUSH_RULE = 'crush_rule' +CLUSTER = 'cluster' +SERVER = 'server' + + +class SyncObject(object): + """ + An object from a Ceph cluster that we are maintaining + a copy of on the Calamari server. + + We wrap these JSON-serializable objects in a python object to: + + - Decorate them with things like id-to-entry dicts + - Have a generic way of seeing the version of an object + + """ + def __init__(self, version, data): + self.version = version + self.data = data + + @classmethod + def cmp(cls, a, b): + """ + Slight bastardization of cmp. Takes two versions, + and returns a cmp-like value, except that if versions + are not sortable we only return 0 or 1. + """ + # Version is something unique per version (like a hash) + return 1 if a != b else 0 + + +class VersionedSyncObject(SyncObject): + @classmethod + def cmp(cls, a, b): + # Version is something numeric like an epoch + return cmp(a, b) + + +class OsdMap(VersionedSyncObject): + str = OSD_MAP + + def __init__(self, version, data): + super(OsdMap, self).__init__(version, data) + if data is not None: + self.osds_by_id = dict([(o['osd'], o) for o in data['osds']]) + self.pools_by_id = dict([(p['pool'], p) for p in data['pools']]) + self.osd_tree_node_by_id = dict([(o['id'], o) for o in data['tree']['nodes'] if o['id'] >= 0]) + + # Special case Yuck + flags = data.get('flags', '').replace('pauserd,pausewr', 'pause') + tokenized_flags = flags.split(',') + + self.flags = dict([(x, x in tokenized_flags) for x in OSD_FLAGS]) + else: + self.osds_by_id = {} + self.pools_by_id = {} + self.osd_tree_node_by_id = {} + self.flags = dict([(x, False) for x in OSD_FLAGS]) + + @property + def osd_metadata(self): + return self.data['osd_metadata'] + + @memoize + def get_tree_nodes_by_id(self): + return dict((n["id"], n) for n in self.data['tree']["nodes"]) + + def _get_crush_rule_osds(self, rule): + nodes_by_id = self.get_tree_nodes_by_id() + + def _gather_leaf_ids(node): + if node['id'] >= 0: + return set([node['id']]) + + result = set() + for child_id in node['children']: + if child_id >= 0: + result.add(child_id) + else: + result |= _gather_leaf_ids(nodes_by_id[child_id]) + + return result + + def _gather_descendent_ids(node, typ): + result = set() + for child_id in node['children']: + child_node = nodes_by_id[child_id] + if child_node['type'] == typ: + result.add(child_node['id']) + elif 'children' in child_node: + result |= _gather_descendent_ids(child_node, typ) + + return result + + def _gather_osds(root, steps): + if root['id'] >= 0: + return set([root['id']]) + + osds = set() + step = steps[0] + if step['op'] == 'choose_firstn': + # Choose all descendents of the current node of type 'type' + d = _gather_descendent_ids(root, step['type']) + for desc_node in [nodes_by_id[i] for i in d]: + osds |= _gather_osds(desc_node, steps[1:]) + elif step['op'] == 'chooseleaf_firstn': + # Choose all descendents of the current node of type 'type', + # and select all leaves beneath those + for desc_node in [nodes_by_id[i] for i in _gather_descendent_ids(root, step['type'])]: + # Short circuit another iteration to find the emit + # and assume anything we've done a chooseleaf on + # is going to be part of the selected set of osds + osds |= _gather_leaf_ids(desc_node) + elif step['op'] == 'emit': + if root['id'] >= 0: + osds |= root['id'] + + return osds + + osds = set() + for i, step in enumerate(rule['steps']): + if step['op'] == 'take': + osds |= _gather_osds(nodes_by_id[step['item']], rule['steps'][i + 1:]) + return osds + + @property + @memoize + def osds_by_rule_id(self): + result = {} + for rule in self.data['crush']['rules']: + result[rule['rule_id']] = list(self._get_crush_rule_osds(rule)) + + return result + + @property + @memoize + def osds_by_pool(self): + """ + Get the OSDS which may be used in this pool + + :return dict of pool ID to OSD IDs in the pool + """ + + result = {} + for pool_id, pool in self.pools_by_id.items(): + osds = None + for rule in [r for r in self.data['crush']['rules'] if r['ruleset'] == pool['crush_ruleset']]: + if rule['min_size'] <= pool['size'] <= rule['max_size']: + osds = self.osds_by_rule_id[rule['rule_id']] + + if osds is None: + # Fallthrough, the pool size didn't fall within any of the rules in its ruleset, Calamari + # doesn't understand. Just report all OSDs instead of failing horribly. + log.error("Cannot determine OSDS for pool %s" % pool_id) + osds = self.osds_by_id.keys() + + result[pool_id] = osds + + return result + + @property + @memoize + def osd_pools(self): + """ + A dict of OSD ID to list of pool IDs + """ + osds = dict([(osd_id, []) for osd_id in self.osds_by_id.keys()]) + for pool_id in self.pools_by_id.keys(): + for in_pool_id in self.osds_by_pool[pool_id]: + osds[in_pool_id].append(pool_id) + + return osds + + +class FsMap(VersionedSyncObject): + str = 'fs_map' + + +class MonMap(VersionedSyncObject): + str = 'mon_map' + + +class MonStatus(VersionedSyncObject): + str = 'mon_status' + + def __init__(self, version, data): + super(MonStatus, self).__init__(version, data) + if data is not None: + self.mons_by_rank = dict([(m['rank'], m) for m in data['monmap']['mons']]) + else: + self.mons_by_rank = {} + + +class PgSummary(SyncObject): + """ + A summary of the state of PGs in the cluster, reported by pool and by OSD. + """ + str = 'pg_summary' + + +class Health(SyncObject): + str = 'health' + + +class Config(SyncObject): + str = 'config' + + +class NotFound(Exception): + def __init__(self, object_type, object_id): + self.object_type = object_type + self.object_id = object_id + + def __str__(self): + return "Object of type %s with id %s not found" % (self.object_type, self.object_id) + + +# The objects that ClusterMonitor keeps copies of from the mon +SYNC_OBJECT_TYPES = [FsMap, OsdMap, MonMap, PgSummary, Health, Config] +SYNC_OBJECT_STR_TYPE = dict((t.str, t) for t in SYNC_OBJECT_TYPES) + +USER_REQUEST_COMPLETE = 'complete' +USER_REQUEST_SUBMITTED = 'submitted' + +# List of allowable things to send as ceph commands to OSDs +OSD_IMPLEMENTED_COMMANDS = ('scrub', 'deep_scrub', 'repair') +OSD_FLAGS = ('pause', 'noup', 'nodown', 'noout', 'noin', 'nobackfill', 'norecover', 'noscrub', 'nodeep-scrub') + +# Severity codes for Calamari events +CRITICAL = 1 +ERROR = 2 +WARNING = 3 +RECOVERY = 4 +INFO = 5 + +SEVERITIES = { + CRITICAL: "CRITICAL", + ERROR: "ERROR", + WARNING: "WARNING", + RECOVERY: "RECOVERY", + INFO: "INFO" +} + +STR_TO_SEVERITY = dict([(b, a) for (a, b) in SEVERITIES.items()]) + + +def severity_str(severity): + return SEVERITIES[severity] + + +def severity_from_str(severitry_str): + return STR_TO_SEVERITY[severitry_str] diff --git a/src/pybind/mgr/calamari_rest/urls/__init__.py b/src/pybind/mgr/calamari_rest/urls/__init__.py new file mode 100644 index 000000000000..56a2007eba71 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/urls/__init__.py @@ -0,0 +1,18 @@ +from django.conf.urls import patterns, include, url + +# Uncomment the next two lines to enable the admin: +# from django.contrib import admin +# admin.autodiscover() + +urlpatterns = patterns( + '', + + # These views are needed for the django-rest-framework debug interface + # to be able to log in and out. The URL path doesn't matter, rest_framework + # finds the views by name. + url(r'^api/rest_framework/', include('rest_framework.urls', namespace='rest_framework')), + + url(r'^api/v2/', include('calamari_rest.urls.v2')), +) + +#handler500 = 'calamari_web.views.server_error' diff --git a/src/pybind/mgr/calamari_rest/urls/v2.py b/src/pybind/mgr/calamari_rest/urls/v2.py new file mode 100644 index 000000000000..18fc21083147 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/urls/v2.py @@ -0,0 +1,119 @@ +from django.conf.urls import patterns, url, include +from rest_framework import routers +import calamari_rest.views.v2 + +router = routers.DefaultRouter(trailing_slash=False) + +# Information about each Ceph cluster (FSID), see sub-URLs + +urlpatterns = patterns( + '', + + # About the host calamari server is running on + # url(r'^grains', calamari_rest.views.v2.grains), + + # This has to come after /user/me to make sure that special case is handled + url(r'^', include(router.urls)), + + # About ongoing operations in cthulhu + url(r'^request/(?P[a-zA-Z0-9-]+)/cancel$', + calamari_rest.views.v2.RequestViewSet.as_view({'post': 'cancel'}), + name='request-cancel'), + url(r'^request/(?P[a-zA-Z0-9-]+)$', + calamari_rest.views.v2.RequestViewSet.as_view({'get': 'retrieve'}), + name='request-detail'), + url(r'^request$', + calamari_rest.views.v2.RequestViewSet.as_view({'get': 'list'}), + name='request-list'), + url(r'^cluster/request/(?P[a-zA-Z0-9-]+)$', + calamari_rest.views.v2.RequestViewSet.as_view({'get': 'retrieve'}), + name='cluster-request-detail'), + url(r'^cluster/request$', + calamari_rest.views.v2.RequestViewSet.as_view({'get': 'list'}), + name='cluster-request-list'), + + # OSDs, Pools, CRUSH + url(r'^cluster/crush_rule_set$', + calamari_rest.views.v2.CrushRuleSetViewSet.as_view({'get': 'list'}), + name='cluster-crush_rule_set-list'), + url(r'^cluster/crush_rule$', + calamari_rest.views.v2.CrushRuleViewSet.as_view({'get': 'list'}), + name='cluster-crush_rule-list'), + url(r'^cluster/pool$', calamari_rest.views.v2.PoolViewSet.as_view( + {'get': 'list', 'post': 'create'}), + name='cluster-pool-list'), + url(r'^cluster/pool/(?P\d+)$', + calamari_rest.views.v2.PoolViewSet.as_view({ + 'get': 'retrieve', + 'patch': 'update', + 'delete': 'destroy'}), + name='cluster-pool-detail'), + + url(r'^cluster/osd$', + calamari_rest.views.v2.OsdViewSet.as_view({'get': 'list'}), + name='cluster-osd-list'), + url(r'^cluster/osd/(?P\d+)$', + calamari_rest.views.v2.OsdViewSet.as_view( + {'get': 'retrieve', 'patch': 'update'}), + name='cluster-osd-detail'), + url(r'^cluster/osd /command$', calamari_rest.views.v2.OsdViewSet.as_view( + {'get': 'get_implemented_commands'})), + url(r'^cluster/osd/(?P\d+)/command$', + calamari_rest.views.v2.OsdViewSet.as_view( + {'get': 'get_valid_commands'})), + + url(r'^cluster/osd/(?P\d+)/command/(?P[a-zA-Z_]+)$', + calamari_rest.views.v2.OsdViewSet.as_view( + {'get': 'validate_command', 'post': 'apply'})), + url(r'^cluster/osd_config$', + calamari_rest.views.v2.OsdConfigViewSet.as_view( + {'get': 'osd_config', 'patch': 'update'})), + + url(r'^cluster/mon$', + calamari_rest.views.v2.MonViewSet.as_view({'get': 'list'}), + name='cluster-mon-list'), + url(r'^cluster/mon/(?P[a-zA-Z0-9-\.]+)$', + calamari_rest.views.v2.MonViewSet.as_view( + {'get': 'retrieve'}), name='cluster-mon-detail'), + + # Direct access to SyncObjects, mainly for debugging + url(r'^cluster/sync_object$', + calamari_rest.views.v2.SyncObject.as_view({'get': 'describe'}), + name='cluster-sync-object-describe'), + url(r'^cluster/sync_object/(?P[a-zA-Z0-9-_]+)$', + calamari_rest.views.v2.SyncObject.as_view({'get': 'retrieve'}), + name='cluster-sync-object'), + url(r'^server/(?P[a-zA-Z0-9-\.]+)/debug_job', + calamari_rest.views.v2.DebugJob.as_view({'post': 'create'}), + name='server-debug-job'), + + url(r'^cluster/server$', + calamari_rest.views.v2.ServerViewSet.as_view({'get': 'list'}), + name='cluster-server-list'), + url(r'^cluster/server/(?P[a-zA-Z0-9-\.]+)$', + calamari_rest.views.v2.ServerViewSet.as_view( + {'get': 'retrieve'}), name='cluster-server-detail'), + + # Ceph configuration settings + url(r'^cluster/config$', + calamari_rest.views.v2.ConfigViewSet.as_view({'get': 'list'})), + url(r'^cluster/config/(?P[a-zA-Z0-9_]+)$', + calamari_rest.views.v2.ConfigViewSet.as_view({'get': 'retrieve'})), + + # Events + # url(r'^event$', calamari_rest.views.v2.EventViewSet.as_view({'get': 'list'})), + # url(r'^cluster/event$', calamari_rest.views.v2.EventViewSet.as_view({'get': 'list_cluster'})), + # url(r'^server/(?P[a-zA-Z0-9-\.]+)/event$', calamari_rest.views.v2.EventViewSet.as_view({'get': 'list_server'})), + + # Log tail + # url(r'^cluster/log$', + # calamari_rest.views.v2.LogTailViewSet.as_view({'get': 'get_cluster_log'})), + # url(r'^server/(?P[a-zA-Z0-9-\.]+)/log$', + # calamari_rest.views.v2.LogTailViewSet.as_view({'get': 'list_server_logs'})), + # url(r'^server/(?P[a-zA-Z0-9-\.]+)/log/(?P.+)$', + # calamari_rest.views.v2.LogTailViewSet.as_view({'get': 'get_server_log'})), + + # Ceph CLI access + # url(r'^cluster/cli$', + # calamari_rest.views.v2.CliViewSet.as_view({'post': 'create'})) +) diff --git a/src/pybind/mgr/calamari_rest/util.py b/src/pybind/mgr/calamari_rest/util.py new file mode 100644 index 000000000000..bb63b0d2fd1b --- /dev/null +++ b/src/pybind/mgr/calamari_rest/util.py @@ -0,0 +1,43 @@ +import datetime + +from dateutil import tz +import gevent.greenlet +import gevent.event + + +def now(): + """ + A tz-aware now + """ + return datetime.datetime.utcnow().replace(tzinfo=tz.tzutc()) + + +class Ticker(gevent.greenlet.Greenlet): + def __init__(self, period, callback, *args, **kwargs): + super(Ticker, self).__init__(*args, **kwargs) + self._period = period + self._callback = callback + self._complete = gevent.event.Event() + + def stop(self): + self._complete.set() + + def _run(self): + while not self._complete.is_set(): + self._callback() + self._complete.wait(self._period) + + +def memoize(function): + def wrapper(*args): + self = args[0] + if not hasattr(self, "_memo"): + self._memo = {} + + if args in self._memo: + return self._memo[args] + else: + rv = function(*args) + self._memo[args] = rv + return rv + return wrapper diff --git a/src/pybind/mgr/calamari_rest/views/__init__.py b/src/pybind/mgr/calamari_rest/views/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/pybind/mgr/calamari_rest/views/exceptions.py b/src/pybind/mgr/calamari_rest/views/exceptions.py new file mode 100644 index 000000000000..9279d2c383fc --- /dev/null +++ b/src/pybind/mgr/calamari_rest/views/exceptions.py @@ -0,0 +1,13 @@ + +""" +Convenience classes for raising HTTP status codes as exceptions, +in addition to rest_framework's builtin exception classes +""" + +from rest_framework import status +from rest_framework.exceptions import APIException + + +class ServiceUnavailable(APIException): + status_code = status.HTTP_503_SERVICE_UNAVAILABLE + default_detail = "Service unavailable" diff --git a/src/pybind/mgr/calamari_rest/views/paginated_mixin.py b/src/pybind/mgr/calamari_rest/views/paginated_mixin.py new file mode 100644 index 000000000000..45b4083ad4d7 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/views/paginated_mixin.py @@ -0,0 +1,36 @@ +from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger +from rest_framework.exceptions import ParseError +from rest_framework.pagination import PaginationSerializer + + +class PaginatedMixin(object): + default_page_size = 10 + + @property + def _pagination_serializer(self): + if not hasattr(self, '__pagination_serializer'): + class LocalPaginationSerializer(PaginationSerializer): + class Meta: + object_serializer_class = self.serializer_class + + self.__pagination_serializer = LocalPaginationSerializer + return self.__pagination_serializer + + def _paginate(self, request, objects): + # Pagination is, of course, separate to databaseyness, so you might think + # to put this in a different mixin. However, the *way* you do pagination + # with LIMIT et al is rather coupled to the database, so here we are. + + page_number = request.GET.get('page', 1) + page_size = request.GET.get('page_size', self.default_page_size) + + # The django paginator conveniently works for sqlalchemy querysets because + # they both have .count() and support array slicing + try: + paginator = Paginator(objects, page_size) + page = paginator.page(page_number) + except (ValueError, EmptyPage, PageNotAnInteger) as e: + # Raise 400 is 'page' or 'page_size' were bad + raise ParseError(str(e)) + ps = self._pagination_serializer(instance=page, context={'request': request}) + return ps.data diff --git a/src/pybind/mgr/calamari_rest/views/rpc_view.py b/src/pybind/mgr/calamari_rest/views/rpc_view.py new file mode 100644 index 000000000000..7399bbbaa379 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/views/rpc_view.py @@ -0,0 +1,298 @@ + + +""" +Helpers for writing django views and rest_framework ViewSets that get +their data from cthulhu with zeroRPC +""" + + +from calamari_rest.manager.osd_request_factory import OsdRequestFactory +from calamari_rest.manager.pool_request_factory import PoolRequestFactory + +from rest_framework import viewsets, status +from rest_framework.views import APIView + +from rest_framework.response import Response + +from calamari_rest.config import CalamariConfig + +from calamari_rest.types import OsdMap, SYNC_OBJECT_STR_TYPE, OSD, OSD_MAP, POOL, CLUSTER, CRUSH_RULE, ServiceId,\ + NotFound, SERVER +config = CalamariConfig() + +from rest import global_instance as rest_plugin + +from rest import logger +log = logger() + + +class DataObject(object): + """ + A convenience for converting dicts from the backend into + objects, because django_rest_framework expects objects + """ + def __init__(self, data): + self.__dict__.update(data) + + +class MgrClient(object): + cluster_monitor = None + + def __init__(self): + self._request_factories = { + OSD: OsdRequestFactory, + POOL: PoolRequestFactory + } + + def get_sync_object(self, object_type, path=None): + return rest_plugin().get_sync_object(object_type, path) + + def get_metadata(self, svc_type, svc_id): + return rest_plugin().get_metadata(svc_type, svc_id) + + def get(self, object_type, object_id): + """ + Get one object from a particular cluster. + """ + + if object_type == OSD: + return self._osd_resolve(object_id) + elif object_type == POOL: + return self._pool_resolve(object_id) + else: + raise NotImplementedError(object_type) + + def get_valid_commands(self, object_type, object_ids): + """ + Determine what commands can be run on OSD object_ids + """ + if object_type != OSD: + raise NotImplementedError(object_type) + + try: + valid_commands = self.get_request_factory( + object_type).get_valid_commands(object_ids) + except KeyError as e: + raise NotFound(object_type, str(e)) + + return valid_commands + + def _osd_resolve(self, osd_id): + osdmap = self.get_sync_object(OsdMap) + + try: + return osdmap.osds_by_id[osd_id] + except KeyError: + raise NotFound(OSD, osd_id) + + def _pool_resolve(self, pool_id): + osdmap = self.get_sync_object(OsdMap) + + try: + return osdmap.pools_by_id[pool_id] + except KeyError: + raise NotFound(POOL, pool_id) + + def list_requests(self, filter_args): + state = filter_args.get('state', None) + fsid = filter_args.get('fsid', None) + requests = rest_plugin().requests.get_all() + return sorted([self._dump_request(r) + for r in requests + if (state is None or r.state == state) and (fsid is None or r.fsid == fsid)], + lambda a, b: cmp(b['requested_at'], a['requested_at'])) + + def _dump_request(self, request): + """UserRequest to JSON-serializable form""" + return { + 'id': request.id, + 'state': request.state, + 'error': request.error, + 'error_message': request.error_message, + 'status': request.status, + 'headline': request.headline, + 'requested_at': request.requested_at.isoformat(), + 'completed_at': request.completed_at.isoformat() if request.completed_at else None + } + + def get_request(self, request_id): + """ + Get a JSON representation of a UserRequest + """ + try: + return self._dump_request(rest_plugin().requests.get_by_id(request_id)) + except KeyError: + raise NotFound('request', request_id) + + def cancel_request(self, request_id): + try: + rest_plugin().requests.cancel(request_id) + return self.get_request(request_id) + except KeyError: + raise NotFound('request', request_id) + + def list(self, object_type, list_filter): + """ + Get many objects + """ + + osd_map = self.get_sync_object(OsdMap).data + if osd_map is None: + return [] + if object_type == OSD: + result = osd_map['osds'] + if 'id__in' in list_filter: + result = [o for o in result if o['osd'] in list_filter['id__in']] + if 'pool' in list_filter: + try: + osds_in_pool = self.get_sync_object(OsdMap).osds_by_pool[list_filter['pool']] + except KeyError: + raise NotFound("Pool {0} does not exist".format(list_filter['pool'])) + else: + result = [o for o in result if o['osd'] in osds_in_pool] + + return result + elif object_type == POOL: + return osd_map['pools'] + elif object_type == CRUSH_RULE: + return osd_map['crush']['rules'] + else: + raise NotImplementedError(object_type) + + def request_delete(self, obj_type, obj_id): + return self._request('delete', obj_type, obj_id) + + def request_create(self, obj_type, attributes): + return self._request('create', obj_type, attributes) + + def request_update(self, command, obj_type, obj_id, attributes): + return self._request(command, obj_type, obj_id, attributes) + + def request_apply(self, obj_type, obj_id, command): + return self._request(command, obj_type, obj_id) + + def update(self, object_type, object_id, attributes): + """ + Modify an object in a cluster. + """ + + if object_type == OSD: + # Run a resolve to throw exception if it's unknown + self._osd_resolve(object_id) + if 'id' not in attributes: + attributes['id'] = object_id + + return self.request_update('update', OSD, object_id, attributes) + elif object_type == POOL: + self._pool_resolve(object_id) + if 'id' not in attributes: + attributes['id'] = object_id + + return self.request_update('update', POOL, object_id, attributes) + elif object_type == OSD_MAP: + return self.request_update('update_config', OSD, object_id, attributes) + + else: + raise NotImplementedError(object_type) + + def get_request_factory(self, object_type): + try: + return self._request_factories[object_type]() + except KeyError: + raise ValueError("{0} is not one of {1}".format(object_type, self._request_factories.keys())) + + def _request(self, method, obj_type, *args, **kwargs): + """ + Create and submit UserRequest for an apply, create, update or delete. + """ + + # nosleep during preparation phase (may touch ClusterMonitor/ServerMonitor state) + request_factory = self.get_request_factory(obj_type) + request = getattr(request_factory, method)(*args, **kwargs) + + if request: + # sleeps permitted during terminal phase of submitting, because we're + # doing I/O to the salt master to kick off + rest_plugin().requests.submit(request) + return { + 'request_id': request.id + } + else: + return None + + def server_get(self, fqdn): + return rest_plugin().get_server(fqdn) + + def server_list(self): + return rest_plugin().list_servers() + + +from rest_framework.permissions import IsAuthenticated, BasePermission + + +class IsRoleAllowed(BasePermission): + def has_permission(self, request, view): + return True + + # TODO: reinstate read vs. read/write limitations on API keys + has_permission = False + # if request.user.groups.filter(name='readonly').exists(): + # has_permission = request.method in SAFE_METHODS + # view.headers['Allow'] = ', '.join(SAFE_METHODS) + # elif request.user.groups.filter(name='read/write').exists(): + # has_permission = True + # elif request.user.is_superuser: + # has_permission = True + # + # return has_permission + +class RPCView(APIView): + serializer_class = None + log = log + permission_classes = [IsAuthenticated, IsRoleAllowed] + + def get_authenticators(self): + return rest_plugin().get_authenticators() + + def __init__(self, *args, **kwargs): + super(RPCView, self).__init__(*args, **kwargs) + self.client = MgrClient() + + @property + def help(self): + return self.__doc__ + + @property + def help_summary(self): + return "" + + def handle_exception(self, exc): + try: + return super(RPCView, self).handle_exception(exc) + except NotFound as e: + return Response(str(e), status=status.HTTP_404_NOT_FOUND) + + def metadata(self, request): + ret = super(RPCView, self).metadata(request) + + actions = {} + # TODO: get the fields marked up with whether they are: + # - [allowed|required|forbidden] during [creation|update] (6 possible kinds of field) + # e.g. on a pool + # id is forbidden during creation and update + # pg_num is required during create and optional during update + # pgp_num is optional during create or update + # nothing is required during update + if hasattr(self, 'update'): + if self.serializer_class: + actions['PATCH'] = self.serializer_class().metadata() + if hasattr(self, 'create'): + if self.serializer_class: + actions['POST'] = self.serializer_class().metadata() + ret['actions'] = actions + + return ret + + +class RPCViewSet(viewsets.ViewSetMixin, RPCView): + pass diff --git a/src/pybind/mgr/calamari_rest/views/v2.py b/src/pybind/mgr/calamari_rest/views/v2.py new file mode 100644 index 000000000000..50060335d892 --- /dev/null +++ b/src/pybind/mgr/calamari_rest/views/v2.py @@ -0,0 +1,770 @@ +from collections import defaultdict +import json +import logging +import shlex + +from django.http import Http404 +from rest_framework.exceptions import ParseError, APIException, PermissionDenied +from rest_framework.response import Response +from rest_framework.decorators import api_view +from rest_framework import status +from django.contrib.auth.decorators import login_required + + +from calamari_rest.serializers.v2 import PoolSerializer, CrushRuleSetSerializer, CrushRuleSerializer, \ + ServerSerializer, SaltKeySerializer, RequestSerializer, \ + ClusterSerializer, EventSerializer, LogTailSerializer, OsdSerializer, ConfigSettingSerializer, MonSerializer, OsdConfigSerializer, \ + CliSerializer +#from calamari_rest.views.database_view_set import DatabaseViewSet +from calamari_rest.views.exceptions import ServiceUnavailable +#from calamari_rest.views.paginated_mixin import PaginatedMixin +#from calamari_rest.views.remote_view_set import RemoteViewSet +from calamari_rest.views.rpc_view import RPCViewSet, DataObject +from calamari_rest.types import CRUSH_RULE, POOL, OSD, USER_REQUEST_COMPLETE, USER_REQUEST_SUBMITTED, \ + OSD_IMPLEMENTED_COMMANDS, MON, OSD_MAP, SYNC_OBJECT_TYPES, ServiceId, severity_from_str, SEVERITIES, \ + OsdMap, Config, MonMap, MonStatus + + +class Event(object): + pass + +from rest import logger +log = logger() + + +#class RequestViewSet(RPCViewSet, PaginatedMixin): +class RequestViewSet(RPCViewSet): + """ +Calamari server requests, tracking long-running operations on the Calamari server. Some +API resources return a ``202 ACCEPTED`` response with a request ID, which you can use with +this resource to learn about progress and completion of an operation. This resource is +paginated. + +May optionally filter by state by passing a ``?state=`` GET parameter, where +state is one of 'complete', 'submitted'. + +The returned records are ordered by the 'requested_at' attribute, in descending order (i.e. +the first page of results contains the most recent requests). + +To cancel a request while it is running, send an empty POST to ``request//cancel``. + """ + serializer_class = RequestSerializer + + def cancel(self, request, request_id): + user_request = DataObject(self.client.cancel_request(request_id)) + return Response(self.serializer_class(user_request).data) + + def retrieve(self, request, **kwargs): + request_id = kwargs['request_id'] + user_request = DataObject(self.client.get_request(request_id)) + return Response(self.serializer_class(user_request).data) + + def list(self, request, **kwargs): + fsid = kwargs.get('fsid', None) + filter_state = request.GET.get('state', None) + valid_states = [USER_REQUEST_COMPLETE, USER_REQUEST_SUBMITTED] + if filter_state is not None and filter_state not in valid_states: + raise ParseError("State must be one of %s" % ", ".join(valid_states)) + + requests = self.client.list_requests({'state': filter_state, 'fsid': fsid}) + if False: + # FIXME reinstate pagination, broke in DRF 2.x -> 3.x + return Response(self._paginate(request, requests)) + else: + return Response(requests) + + +class CrushRuleViewSet(RPCViewSet): + """ +A CRUSH ruleset is a collection of CRUSH rules which are applied +together to a pool. + """ + serializer_class = CrushRuleSerializer + + def list(self, request): + rules = self.client.list(CRUSH_RULE, {}) + osds_by_rule_id = self.client.get_sync_object(OsdMap, ['osds_by_rule_id']) + for rule in rules: + rule['osd_count'] = len(osds_by_rule_id[rule['rule_id']]) + return Response(CrushRuleSerializer([DataObject(r) for r in rules], many=True).data) + + +class CrushRuleSetViewSet(RPCViewSet): + """ +A CRUSH rule is used by Ceph to decide where to locate placement groups on OSDs. + """ + serializer_class = CrushRuleSetSerializer + + def list(self, request): + rules = self.client.list(CRUSH_RULE, {}) + osds_by_rule_id = self.client.get_sync_object(OsdMap, ['osds_by_rule_id']) + rulesets_data = defaultdict(list) + for rule in rules: + rule['osd_count'] = len(osds_by_rule_id[rule['rule_id']]) + rulesets_data[rule['ruleset']].append(rule) + + rulesets = [DataObject({ + 'id': rd_id, + 'rules': [DataObject(r) for r in rd_rules] + }) for (rd_id, rd_rules) in rulesets_data.items()] + + return Response(CrushRuleSetSerializer(rulesets, many=True).data) + + +class SaltKeyViewSet(RPCViewSet): + """ +Ceph servers authentication with the Calamari using a key pair. Before +Calamari accepts messages from a server, the server's key must be accepted. + """ + serializer_class = SaltKeySerializer + + def list(self, request): + return Response(self.serializer_class(self.client.minion_status(None), many=True).data) + + def partial_update(self, request, minion_id): + serializer = self.serializer_class(data=request.DATA) + if serializer.is_valid(request.method): + self._partial_update(minion_id, serializer.get_data()) + return Response(status=status.HTTP_204_NO_CONTENT) + else: + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + + def _partial_update(self, minion_id, data): + valid_status = ['accepted', 'rejected'] + if 'status' not in data: + raise ParseError({'status': "This field is mandatory"}) + elif data['status'] not in valid_status: + raise ParseError({'status': "Must be one of %s" % ",".join(valid_status)}) + else: + key = self.client.minion_get(minion_id) + transition = [key['status'], data['status']] + if transition == ['pre', 'accepted']: + self.client.minion_accept(minion_id) + elif transition == ['pre', 'rejected']: + self.client.minion_reject(minion_id) + else: + raise ParseError({'status': ["Transition {0}->{1} is invalid".format( + transition[0], transition[1] + )]}) + + def _validate_list(self, request): + keys = request.DATA + if not isinstance(keys, list): + raise ParseError("Bulk PATCH must send a list") + for key in keys: + if 'id' not in key: + raise ParseError("Items in bulk PATCH must have 'id' attribute") + + def list_partial_update(self, request): + self._validate_list(request) + + keys = request.DATA + log.debug("KEYS %s" % keys) + for key in keys: + self._partial_update(key['id'], key) + + return Response(status=status.HTTP_204_NO_CONTENT) + + def destroy(self, request, minion_id): + self.client.minion_delete(minion_id) + return Response(status=status.HTTP_204_NO_CONTENT) + + def list_destroy(self, request): + self._validate_list(request) + keys = request.DATA + for key in keys: + self.client.minion_delete(key['id']) + + return Response(status=status.HTTP_204_NO_CONTENT) + + def retrieve(self, request, minion_id): + return Response(self.serializer_class(self.client.minion_get(minion_id)).data) + + +class PoolDataObject(DataObject): + """ + Slightly dressed up version of the raw pool from osd dump + """ + + FLAG_HASHPSPOOL = 1 + FLAG_FULL = 2 + + @property + def hashpspool(self): + return bool(self.flags & self.FLAG_HASHPSPOOL) + + @property + def full(self): + return bool(self.flags & self.FLAG_FULL) + + +class RequestReturner(object): + """ + Helper for ViewSets that sometimes need to return a request handle + """ + def _return_request(self, request): + if request: + return Response(request, status=status.HTTP_202_ACCEPTED) + else: + return Response(status=status.HTTP_304_NOT_MODIFIED) + + +class NullableDataObject(DataObject): + """ + A DataObject which synthesizes Nones for any attributes it doesn't have + """ + def __getattr__(self, item): + if not item.startswith('_'): + return self.__dict__.get(item, None) + else: + raise AttributeError + + +class ConfigViewSet(RPCViewSet): + """ +Configuration settings from a Ceph Cluster. + """ + serializer_class = ConfigSettingSerializer + + def list(self, request): + ceph_config = self.client.get_sync_object(Config).data + settings = [DataObject({'key': k, 'value': v}) for (k, v) in ceph_config.items()] + return Response(self.serializer_class(settings, many=True).data) + + def retrieve(self, request, key): + ceph_config = self.client.get_sync_object(Config).data + try: + setting = DataObject({'key': key, 'value': ceph_config[key]}) + except KeyError: + raise Http404("Key '%s' not found" % key) + else: + return Response(self.serializer_class(setting).data) + + +def _config_to_bool(config_val): + return {'true': True, 'false': False}[config_val.lower()] + + +class PoolViewSet(RPCViewSet, RequestReturner): + """ +Manage Ceph storage pools. + +To get the default values which will be used for any fields omitted from a POST, do +a GET with the ?defaults argument. The returned pool object will contain all attributes, +but those without static defaults will be set to null. + + """ + serializer_class = PoolSerializer + + def _defaults(self): + # Issue overlapped RPCs first + ceph_config = self.client.get_sync_object(Config) + rules = self.client.list(CRUSH_RULE, {}) + + if not ceph_config: + return Response("Cluster configuration unavailable", status=status.HTTP_503_SERVICE_UNAVAILABLE) + + if not rules: + return Response("No CRUSH rules exist, pool creation is impossible", + status=status.HTTP_503_SERVICE_UNAVAILABLE) + + # Ceph does not reliably inform us of a default ruleset that exists, so we check + # what it tells us against the rulesets we know about. + ruleset_ids = sorted(list(set([r['ruleset'] for r in rules]))) + if int(ceph_config['osd_pool_default_crush_rule']) in ruleset_ids: + # This is the ceph<0.80 setting + default_ruleset = ceph_config['osd_pool_default_crush_rule'] + elif int(ceph_config.get('osd_pool_default_crush_replicated_ruleset', -1)) in ruleset_ids: + # This is the ceph>=0.80 + default_ruleset = ceph_config['osd_pool_default_crush_replicated_ruleset'] + else: + # Ceph may have an invalid default set which + # would cause undefined behaviour in pool creation (#8373) + # In this case, pick lowest numbered ruleset as default + default_ruleset = ruleset_ids[0] + + defaults = NullableDataObject({ + 'size': int(ceph_config['osd_pool_default_size']), + 'crush_ruleset': int(default_ruleset), + 'min_size': int(ceph_config['osd_pool_default_min_size']), + 'hashpspool': _config_to_bool(ceph_config['osd_pool_default_flag_hashpspool']), + # Crash replay interval is zero by default when you create a pool, but when ceph creates + # its own data pool it applies 'osd_default_data_pool_replay_window'. If we add UI for adding + # pools to a filesystem, we should check that those data pools have this set. + 'crash_replay_interval': 0, + 'quota_max_objects': 0, + 'quota_max_bytes': 0 + }) + + return Response(PoolSerializer(defaults).data) + + def list(self, request): + if 'defaults' in request.GET: + return self._defaults() + + pools = [PoolDataObject(p) for p in self.client.list(POOL, {})] + return Response(PoolSerializer(pools, many=True).data) + + def retrieve(self, request, pool_id): + pool = PoolDataObject(self.client.get(POOL, int(pool_id))) + return Response(PoolSerializer(pool).data) + + def create(self, request): + serializer = self.serializer_class(data=request.DATA) + if serializer.is_valid(request.method): + response = self._validate_semantics(None, serializer.get_data()) + if response is not None: + return response + + create_response = self.client.create(POOL, serializer.get_data()) + + # TODO: handle case where the creation is rejected for some reason (should + # be passed an errors dict for a clean failure, or a zerorpc exception + # for a dirty failure) + assert 'request_id' in create_response + return Response(create_response, status=status.HTTP_202_ACCEPTED) + else: + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + + def update(self, request, pool_id): + serializer = self.serializer_class(data=request.DATA) + if serializer.is_valid(request.method): + response = self._validate_semantics(pool_id, serializer.get_data()) + if response is not None: + return response + + return self._return_request(self.client.update(POOL, int(pool_id), serializer.get_data())) + else: + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + + def destroy(self, request, pool_id): + delete_response = self.client.delete(POOL, int(pool_id), status=status.HTTP_202_ACCEPTED) + return Response(delete_response, status=status.HTTP_202_ACCEPTED) + + def _validate_semantics(self, pool_id, data): + errors = defaultdict(list) + self._check_name_unique(data, errors) + self._check_crush_ruleset(data, errors) + self._check_pgp_less_than_pg_num(data, errors) + self._check_pg_nums_dont_decrease(pool_id, data, errors) + self._check_pg_num_inside_config_bounds(data, errors) + + if errors.items(): + if 'name' in errors: + return Response(errors, status=status.HTTP_409_CONFLICT) + else: + return Response(errors, status=status.HTTP_400_BAD_REQUEST) + + def _check_pg_nums_dont_decrease(self, pool_id, data, errors): + if pool_id is not None: + detail = self.client.get(POOL, int(pool_id)) + for field in ['pg_num', 'pgp_num']: + expanded_field = 'pg_placement_num' if field == 'pgp_num' else 'pg_num' + if field in data and data[field] < detail[expanded_field]: + errors[field].append('must be >= than current {field}'.format(field=field)) + + def _check_crush_ruleset(self, data, errors): + if 'crush_ruleset' in data: + rules = self.client.list(CRUSH_RULE, {}) + rulesets = set(r['ruleset'] for r in rules) + if data['crush_ruleset'] not in rulesets: + errors['crush_ruleset'].append("CRUSH ruleset {0} not found".format(data['crush_ruleset'])) + + def _check_pg_num_inside_config_bounds(self, data, errors): + ceph_config = self.client.get_sync_object(Config).data + if not ceph_config: + return Response("Cluster configuration unavailable", status=status.HTTP_503_SERVICE_UNAVAILABLE) + if 'pg_num' in data and data['pg_num'] > int(ceph_config['mon_max_pool_pg_num']): + errors['pg_num'].append('requested pg_num must be <= than current limit of {max}'.format(max=ceph_config['mon_max_pool_pg_num'])) + + def _check_pgp_less_than_pg_num(self, data, errors): + if 'pgp_num' in data and 'pg_num' in data and data['pg_num'] < data['pgp_num']: + errors['pgp_num'].append('must be >= to pg_num') + + def _check_name_unique(self, data, errors): + if 'name' in data and data['name'] in [x.pool_name for x in [PoolDataObject(p) for p in self.client.list(POOL, {})]]: + errors['name'].append('Pool with name {name} already exists'.format(name=data['name'])) + + +class OsdViewSet(RPCViewSet, RequestReturner): + """ +Manage Ceph OSDs. + +Apply ceph commands to an OSD by doing a POST with no data to +api/v2/cluster//osd//command/ +where is one of ("scrub", "deep-scrub", "repair") + +e.g. Initiate a scrub on OSD 0 by POSTing {} to api/v2/cluster//osd/0/command/scrub + +Filtering is available on this resource: + +:: + + # Pass a ``pool`` URL parameter set to a pool ID to filter by pool, like this: + /api/v2/cluster//osd?pool=1 + + # Pass a series of ``id__in[]`` parameters to specify a list of OSD IDs + # that you wish to receive. + /api/v2/cluster//osd?id__in[]=2&id__in[]=3 + + """ + serializer_class = OsdSerializer + + def list(self, request): + return self._list(request) + + def _list(self, request): + # Get data needed for filtering + list_filter = {} + + if 'pool' in request.GET: + try: + pool_id = int(request.GET['pool']) + except ValueError: + return Response("Pool ID must be an integer", status=status.HTTP_400_BAD_REQUEST) + list_filter['pool'] = pool_id + + if 'id__in[]' in request.GET: + try: + ids = request.GET.getlist("id__in[]") + list_filter['id__in'] = [int(i) for i in ids] + except ValueError: + return Response("Invalid OSD ID in list", status=status.HTTP_400_BAD_REQUEST) + + # Get data + osds = self.client.list(OSD, list_filter) + osd_to_pools = self.client.get_sync_object(OsdMap, ['osd_pools']) + crush_nodes = self.client.get_sync_object(OsdMap, ['osd_tree_node_by_id']) + osd_metadata = self.client.get_sync_object(OsdMap, ['osd_metadata']) + + osd_id_to_hostname = dict( + [(int(osd_id), osd_meta["hostname"]) for osd_id, osd_meta in + osd_metadata.items()]) + + # Get data depending on OSD list + osd_commands = self.client.get_valid_commands(OSD, [x['osd'] for x in osds]) + + # Build OSD data objects + for o in osds: + # An OSD being in the OSD map does not guarantee its presence in the CRUSH + # map, as "osd crush rm" and "osd rm" are separate operations. + try: + o.update({'reweight': float(crush_nodes[o['osd']]['reweight'])}) + except KeyError: + log.warning("No CRUSH data available for OSD {0}".format(o['osd'])) + o.update({'reweight': 0.0}) + + o['server'] = osd_id_to_hostname.get(o['osd'], None) + + for o in osds: + o['pools'] = osd_to_pools[o['osd']] + + for o in osds: + o.update(osd_commands[o['osd']]) + + return Response(self.serializer_class([DataObject(o) for o in osds], many=True).data) + + def retrieve(self, request, osd_id): + osd = self.client.get_sync_object(OsdMap, ['osds_by_id', int(osd_id)]) + crush_node = self.client.get_sync_object(OsdMap, ['osd_tree_node_by_id', int(osd_id)]) + osd['reweight'] = float(crush_node['reweight']) + + osd_metadata = self.client.get_sync_object(OsdMap, ['osd_metadata']) + + osd_id_to_hostname = dict( + [(int(osd_id), osd_meta["hostname"]) for osd_id, osd_meta in + osd_metadata.items()]) + + + osd['server'] = osd_id_to_hostname.get(osd['osd'], None) + + pools = self.client.get_sync_object(OsdMap, ['osd_pools', int(osd_id)]) + osd['pools'] = pools + + osd_commands = self.client.get_valid_commands(OSD, [int(osd_id)]) + osd.update(osd_commands[int(osd_id)]) + + return Response(self.serializer_class(DataObject(osd)).data) + + def update(self, request, osd_id): + serializer = self.serializer_class(data=request.DATA) + if serializer.is_valid(request.method): + return self._return_request(self.client.update(OSD, int(osd_id), + serializer.get_data())) + else: + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + + def apply(self, request, osd_id, command): + if command in self.client.get_valid_commands(OSD, [int(osd_id)]).get(int(osd_id)).get('valid_commands'): + return Response(self.client.apply(OSD, int(osd_id), command), status=202) + else: + return Response('{0} not valid on {1}'.format(command, osd_id), status=403) + + def get_implemented_commands(self, request): + return Response(OSD_IMPLEMENTED_COMMANDS) + + def get_valid_commands(self, request, osd_id=None): + osds = [] + if osd_id is None: + osds = self.client.get_sync_object(OsdMap, ['osds_by_id']).keys() + else: + osds.append(int(osd_id)) + + return Response(self.client.get_valid_commands(OSD, osds)) + + def validate_command(self, request, osd_id, command): + valid_commands = self.client.get_valid_commands(OSD, [int(osd_id)]).get(int(osd_id)).get('valid_commands') + + return Response({'valid': command in valid_commands}) + + +class OsdConfigViewSet(RPCViewSet, RequestReturner): + """ +Manage flags in the OsdMap + """ + serializer_class = OsdConfigSerializer + + def osd_config(self, request): + osd_map = self.client.get_sync_object(OsdMap, ['flags']) + return Response(osd_map) + + def update(self, request): + + serializer = self.serializer_class(data=request.DATA) + if not serializer.is_valid(request.method): + return Response(serializer.errors, status=403) + + response = self.client.update(OSD_MAP, None, serializer.get_data()) + + return self._return_request(response) + + +class SyncObject(RPCViewSet): + """ +These objects are the raw data received by the Calamari server from the Ceph cluster, +such as the cluster maps + """ + + def retrieve(self, request, sync_type): + return Response(self.client.get_sync_object(sync_type)) + + def describe(self, request): + return Response([s.str for s in SYNC_OBJECT_TYPES]) + + +class DebugJob(RPCViewSet, RequestReturner): + """ +For debugging and automated testing only. + """ + def create(self, request, fqdn): + cmd = request.DATA['cmd'] + args = request.DATA['args'] + + # Avoid this debug interface being an arbitrary execution mechanism. + if not cmd.startswith("ceph.selftest"): + raise PermissionDenied("Command '%s' is not a self test command".format(cmd)) + + return self._return_request(self.client.debug_job(fqdn, cmd, args)) + + +class ServerViewSet(RPCViewSet): + """ +Servers that we've learned about via the daemon metadata reported by +Ceph OSDs, MDSs, mons. + """ + serializer_class = ServerSerializer + + def retrieve(self, request, fqdn): + return Response( + self.serializer_class( + DataObject(self.client.server_get(fqdn))).data + ) + + def list(self, request): + servers = self.client.server_list() + return Response(self.serializer_class( + [DataObject(s) for s in servers], + many=True).data) + + +if False: + class EventViewSet(DatabaseViewSet, PaginatedMixin): + """ + Events generated by Calamari server in response to messages from + servers and Ceph clusters. This resource is paginated. + + Note that events are not visible synchronously with respect to + all other API resources. For example, you might read the OSD + map, see an OSD is down, then quickly read the events and find + that the event about the OSD going down is not visible yet (though + it would appear very soon after). + + The ``severity`` attribute mainly follows a typical INFO, WARN, ERROR + hierarchy. However, we have an additional level between INFO and WARN + called RECOVERY. Where something going bad in the system is usually + a WARN message, the opposite state transition is usually a RECOVERY + message. + + This resource supports "more severe than" filtering on the severity + attribute. Pass the desired severity threshold as a URL parameter + in a GET, such as ``?severity=RECOVERY`` to show everything but INFO. + + """ + serializer_class = EventSerializer + + @property + def queryset(self): + return self.session.query(Event).order_by(Event.when.desc()) + + def _filter_by_severity(self, request, queryset=None): + if queryset is None: + queryset = self.queryset + severity_str = request.GET.get("severity", "INFO") + try: + severity = severity_from_str(severity_str) + except KeyError: + raise ParseError("Invalid severity '%s', must be on of %s" % (severity_str, + ",".join(SEVERITIES.values()))) + + return queryset.filter(Event.severity <= severity) + + def list(self, request): + return Response(self._paginate(request, self._filter_by_severity(request))) + + def list_cluster(self, request): + return Response(self._paginate(request, self._filter_by_severity(request, self.queryset.filter_by(fsid=fsid)))) + + def list_server(self, request, fqdn): + return Response(self._paginate(request, self._filter_by_severity(request, self.queryset.filter_by(fqdn=fqdn)))) + + +if False: + class LogTailViewSet(RemoteViewSet): + """ + A primitive remote log viewer. + + Logs are retrieved on demand from the Ceph servers, so this resource will return a 503 error if no suitable + server is available to get the logs. + + GETs take an optional ``lines`` parameter for the number of lines to retrieve. + """ + serializer_class = LogTailSerializer + + def get_cluster_log(self, request): + """ + Retrieve the cluster log from one of a cluster's mons (expect it to be in /var/log/ceph/ceph.log) + """ + + # Number of lines to get + lines = request.GET.get('lines', 40) + + # Resolve FSID to name + name = self.client.get_cluster(fsid)['name'] + + # Execute remote operation synchronously + result = self.run_mon_job("log_tail.tail", ["ceph/{name}.log".format(name=name), lines]) + + return Response({'lines': result}) + + def list_server_logs(self, request, fqdn): + return Response(sorted(self.run_job(fqdn, "log_tail.list_logs", ["."]))) + + def get_server_log(self, request, fqdn, log_path): + lines = request.GET.get('lines', 40) + return Response({'lines': self.run_job(fqdn, "log_tail.tail", [log_path, lines])}) + + +class MonViewSet(RPCViewSet): + """ +Ceph monitor services. + +Note that the ID used to retrieve a specific mon using this API resource is +the monitor *name* as opposed to the monitor *rank*. + +The quorum status reported here is based on the last mon status reported by +the Ceph cluster, and also the status of each mon daemon queried by Calamari. + +For debugging mons which are failing to join the cluster, it may be +useful to show users data from the /status sub-url, which returns the +"mon_status" output from the daemon. + + """ + serializer_class = MonSerializer + + def _get_mons(self): + monmap_mons = self.client.get_sync_object(MonMap).data['mons'] + mon_status = self.client.get_sync_object(MonStatus).data + + for mon in monmap_mons: + mon['in_quorum'] = mon['rank'] in mon_status['quorum'] + mon['server'] = self.client.get_metadata("mon", mon['name'])['hostname'] + mon['leader'] = mon['rank'] == mon_status['quorum'][0] + + return monmap_mons + + def retrieve(self, request, mon_id): + mons = self._get_mons() + try: + mon = [m for m in mons if m['name'] == mon_id][0] + except IndexError: + raise Http404("Mon '%s' not found" % mon_id) + + return Response(self.serializer_class(DataObject(mon)).data) + + def list(self, request): + mons = self._get_mons() + return Response( + self.serializer_class([DataObject(m) for m in mons], + many=True).data) + + +if False: + class CliViewSet(RemoteViewSet): + """ + Access the `ceph` CLI tool remotely. + + To achieve the same result as running "ceph osd dump" at a shell, an + API consumer may POST an object in either of the following formats: + + :: + + {'command': ['osd', 'dump']} + + {'command': 'osd dump'} + + + The response will be a 200 status code if the command executed, regardless + of whether it was successful, to check the result of the command itself + read the ``status`` attribute of the returned data. + + The command will be executed on the first available mon server, retrying + on subsequent mon servers if no response is received. Due to this retry + behaviour, it is possible for the command to be run more than once in + rare cases; since most ceph commands are idempotent this is usually + not a problem. + """ + serializer_class = CliSerializer + + def create(self, request): + # Validate + try: + command = request.DATA['command'] + except KeyError: + raise ParseError("'command' field is required") + else: + if not (isinstance(command, basestring) or isinstance(command, list)): + raise ParseError("'command' must be a string or list") + + # Parse string commands to list + if isinstance(command, basestring): + command = shlex.split(command) + + name = self.client.get_cluster(fsid)['name'] + result = self.run_mon_job("ceph.ceph_command", [name, command]) + log.debug("CliViewSet: result = '%s'" % result) + + if not isinstance(result, dict): + # Errors from salt like "module not available" come back as strings + raise APIException("Remote error: %s" % str(result)) + + return Response(self.serializer_class(DataObject(result)).data) diff --git a/src/pybind/mgr/rest.py b/src/pybind/mgr/rest.py new file mode 100644 index 000000000000..b2efa36cf3cd --- /dev/null +++ b/src/pybind/mgr/rest.py @@ -0,0 +1,256 @@ + +""" +A RESTful API for Ceph +""" + +# We must share a global reference to this instance, because it is the +# gatekeeper to all accesses to data from the C++ side (e.g. the REST API +# request handlers need to see it) +_global_instance = {'plugin': None} +def global_instance(): + assert _global_instance['plugin'] is not None + return _global_instance['plugin'] + + +def logger(): + # The logger name corresponds to the module name (courtesy of + # MgrModule.__init__ + return logging.getLogger("rest") + + +import os +import logging +import logging.config +import json +import uuid +import errno +import sys + +import cherrypy +from django.core.servers.basehttp import get_internal_wsgi_application + +from mgr_module import MgrModule + +from calamari_rest.manager.request_collection import RequestCollection +from calamari_rest.types import OsdMap, NotFound, Config, FsMap, MonMap, \ + PgSummary, Health, MonStatus + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "calamari_rest.settings") + +django_log = logging.getLogger("django.request") +django_log.addHandler(logging.StreamHandler()) +django_log.setLevel(logging.DEBUG) + + +def recurse_refs(root, path): + if isinstance(root, dict): + for k, v in root.items(): + recurse_refs(v, path + "->%s" % k) + elif isinstance(root, list): + for n, i in enumerate(root): + recurse_refs(i, path + "[%d]" % n) + + logger().info("%s %d (%s)" % (path, sys.getrefcount(root), root.__class__)) + + +class Module(MgrModule): + COMMANDS = [ + { + "cmd": "enable_auth " + "name=val,type=CephChoices,strings=true|false", + "desc": "Set whether to authenticate API access by key", + "perm": "rw" + }, + { + "cmd": "auth_key_create " + "name=key_name,type=CephString", + "desc": "Create an API key with this name", + "perm": "rw" + }, + { + "cmd": "auth_key_delete " + "name=key_name,type=CephString", + "desc": "Delete an API key with this name", + "perm": "rw" + }, + { + "cmd": "auth_key_list", + "desc": "List all API keys", + "perm": "rw" + }, + ] + + def __init__(self, *args, **kwargs): + super(Module, self).__init__(*args, **kwargs) + _global_instance['plugin'] = self + self.log.info("Constructing module {0}: instance {1}".format( + __name__, _global_instance)) + self.requests = RequestCollection() + + self.keys = {} + self.enable_auth = True + + def notify(self, notify_type, notify_id): + # FIXME: don't bother going and get_sync_object'ing the map + # unless there is actually someone waiting for it (find out inside + # requests.on_map) + self.log.info("Notify {0}".format(notify_type)) + if notify_type == "command": + self.requests.on_completion(notify_id) + elif notify_type == "osd_map": + self.requests.on_map(OsdMap, self.get_sync_object(OsdMap)) + elif notify_type == "mon_map": + self.requests.on_map(MonMap, self.get_sync_object(MonMap)) + elif notify_type == "pg_summary": + self.requests.on_map(PgSummary, self.get_sync_object(PgSummary)) + else: + self.log.warning("Unhandled notification type '{0}'".format(notify_type)) + + def get_sync_object(self, object_type, path=None): + if object_type == OsdMap: + data = self.get("osd_map") + + assert data is not None + + data['tree'] = self.get("osd_map_tree") + data['crush'] = self.get("osd_map_crush") + data['crush_map_text'] = self.get("osd_map_crush_map_text") + data['osd_metadata'] = self.get("osd_metadata") + obj = OsdMap(data['epoch'], data) + elif object_type == Config: + data = self.get("config") + obj = Config(0, data) + elif object_type == MonMap: + data = self.get("mon_map") + obj = MonMap(data['epoch'], data) + elif object_type == FsMap: + data = self.get("fs_map") + obj = FsMap(data['epoch'], data) + elif object_type == PgSummary: + data = self.get("pg_summary") + self.log.debug("JSON: {0}".format(data['json'])) + obj = PgSummary(0, json.loads(data['json'])) + elif object_type == Health: + data = self.get("health") + obj = Health(0, json.loads(data['json'])) + elif object_type == MonStatus: + data = self.get("mon_status") + obj = MonStatus(0, json.loads(data['json'])) + else: + raise NotImplementedError(object_type) + + # TODO: move 'path' handling up into C++ land so that we only + # Pythonize the part we're interested in + if path: + try: + for part in path: + if isinstance(obj, dict): + obj = obj[part] + else: + obj = getattr(obj, part) + except (AttributeError, KeyError) as e: + raise NotFound(object_type, path) + + return obj + + def get_authenticators(self): + """ + For the benefit of django rest_framework APIView classes + """ + return [self._auth_cls()] + + def serve(self): + self.keys = self._load_keys() + self.enable_auth = self.get_config_json("enable_auth") + if self.enable_auth is None: + self.enable_auth = True + + app = get_internal_wsgi_application() + + from rest_framework import authentication + + class KeyUser(object): + def __init__(self, username): + self.username = username + + def is_authenticated(self): + return True + + # Take a local reference to use inside the APIKeyAuthentication + # class definition + log = self.log + + class APIKeyAuthentication(authentication.BaseAuthentication): + def authenticate(self, request): + if not global_instance().enable_auth: + return KeyUser("anonymous"), None + + username = request.META.get('HTTP_X_USERNAME') + if not username: + log.warning("Rejecting: no X_USERNAME") + return None + + if username not in global_instance().keys: + log.warning("Rejecting: username does not exist") + return None + + api_key = request.META.get('HTTP_X_APIKEY') + expect_key = global_instance().keys[username] + if api_key != expect_key: + log.warning("Rejecting: wrong API key") + return None + + log.debug("Accepted for user {0}".format(username)) + return KeyUser(username), None + + self._auth_cls = APIKeyAuthentication + + cherrypy.config.update({ + 'server.socket_port': 8002, + 'engine.autoreload.on': False + }) + cherrypy.tree.graft(app, '/') + + cherrypy.engine.start() + cherrypy.engine.block() + + def _generate_key(self): + return uuid.uuid4().__str__() + + def _load_keys(self): + loaded_keys = self.get_config_json("keys") + self.log.debug("loaded_keys: {0}".format(loaded_keys)) + if loaded_keys is None: + return {} + else: + return loaded_keys + + def _save_keys(self): + self.set_config_json("keys", self.keys) + + def handle_command(self, cmd): + self.log.info("handle_command: {0}".format(json.dumps(cmd, indent=2))) + prefix = cmd['prefix'] + if prefix == "enable_auth": + enable = cmd['val'] == "true" + self.set_config_json("enable_auth", enable) + self.enable_auth = enable + return 0, "", "" + elif prefix == "auth_key_create": + if cmd['key_name'] in self.keys: + return 0, self.keys[cmd['key_name']], "" + else: + self.keys[cmd['key_name']] = self._generate_key() + self._save_keys() + + return 0, self.keys[cmd['key_name']], "" + elif prefix == "auth_key_delete": + if cmd['key_name'] in self.keys: + del self.keys[cmd['key_name']] + self._save_keys() + + return 0, "", "" + elif prefix == "auth_key_list": + return 0, json.dumps(self._load_keys(), indent=2), "" + else: + return -errno.EINVAL, "", "Command not found '{0}'".format(prefix)