]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
pybind/mgr/localpool: module to automagically create localized pools
authorSage Weil <sage@redhat.com>
Wed, 6 Sep 2017 19:34:50 +0000 (15:34 -0400)
committerSage Weil <sage@redhat.com>
Mon, 25 Sep 2017 17:30:37 +0000 (13:30 -0400)
By default, this will create a pool per rack, 3x replication, with a host
failure domain.  Those parameters can be customized via mgr config-key
options.

Signed-off-by: Sage Weil <sage@redhat.com>
(cherry picked from commit 1a0f42b70a4c9fa68dc47f2f521d0f1e8f5bb220)

doc/mgr/localpool.rst [new file with mode: 0644]
src/pybind/mgr/localpool/__init__.py [new file with mode: 0644]
src/pybind/mgr/localpool/module.py [new file with mode: 0644]

diff --git a/doc/mgr/localpool.rst b/doc/mgr/localpool.rst
new file mode 100644 (file)
index 0000000..e0f31ce
--- /dev/null
@@ -0,0 +1,34 @@
+Local pool plugin
+=================
+
+The *localpool* plugin can automatically create RADOS pools that are
+localized to a subset of the overall cluster.  For example, by default, it will
+create a pool for each distinct rack in the cluster.  This can be useful for some
+deployments that want to distribute some data locally as well as globally across the cluster .
+
+Enabling
+--------
+
+The *localpool* module is enabled with::
+
+  ceph mgr module enable localpool
+
+Configuring
+-----------
+
+The *localpool* module understands the following options:
+
+* **subtree** (default: `rack`): which CRUSH subtree type the module
+  should create a pool for.
+* **failure_domain** (default: `host`): what failure domain we should
+  separate data replicas across.
+* **pg_num** (default: `128`): number of PGs to create for each pool
+* **num_rep** (default: `3`): number of replicas for each pool.
+  (Currently, pools are always replicated.)
+* **prefix** (default: `by-$subtreetype-`): prefix for the pool name.
+
+These options are set via the config-key interface.  For example, to
+change the replication level to 2x with only 64 PGs, ::
+
+  ceph config-key set mgr/localpool/num_rep 2
+  ceph config-key set mgr/localpool/pg_num 64
diff --git a/src/pybind/mgr/localpool/__init__.py b/src/pybind/mgr/localpool/__init__.py
new file mode 100644 (file)
index 0000000..79f5b86
--- /dev/null
@@ -0,0 +1,2 @@
+
+from module import *  # NOQA
diff --git a/src/pybind/mgr/localpool/module.py b/src/pybind/mgr/localpool/module.py
new file mode 100644 (file)
index 0000000..18e19bb
--- /dev/null
@@ -0,0 +1,79 @@
+from mgr_module import MgrModule, CommandResult
+import json
+import threading
+
+class Module(MgrModule):
+    def __init__(self, *args, **kwargs):
+        super(Module, self).__init__(*args, **kwargs)
+        self.serve_event = threading.Event()
+
+    def notify(self, notify_type, notify_id):
+        if notify_type == 'osd_map':
+            self.handle_osd_map()
+
+    def handle_osd_map(self):
+        """
+        Check pools on each OSDMap change
+        """
+        subtree_type = self.get_config('subtree') or 'rack'
+        failure_domain = self.get_config('failure_domain') or 'host'
+        pg_num = self.get_config('pg_num') or '128'
+        num_rep = self.get_config('num_rep') or '2'
+        prefix = self.get_config('prefix') or 'by-' + subtree_type + '-'
+
+        osdmap = self.get("osd_map")
+        lpools = []
+        for pool in osdmap['pools']:
+            if pool['pool_name'].find(prefix) == 0:
+                lpools.append(pool['pool_name'])
+
+        self.log.debug('localized pools = %s', lpools)
+        subtrees = []
+        tree = self.get('osd_map_tree')
+        for node in tree['nodes']:
+            if node['type'] == subtree_type:
+                subtrees.append(node['name'])
+                pool_name = prefix + node['name']
+                if pool_name not in lpools:
+                    self.log.info('Creating localized pool %s', pool_name)
+                    #
+                    result = CommandResult("")
+                    self.send_command(result, "mon", "", json.dumps({
+                        "prefix": "osd crush rule create-replicated",
+                        "format": "json",
+                        "name": pool_name,
+                        "root": node['name'],
+                        "type": failure_domain,
+                    }), "")
+                    r, outb, outs = result.wait()
+
+                    result = CommandResult("")
+                    self.send_command(result, "mon", "", json.dumps({
+                        "prefix": "osd pool create",
+                        "format": "json",
+                        "pool": pool_name,
+                        'rule': pool_name,
+                        "pool_type": 'replicated',
+                        'pg_num': str(pg_num),
+                    }), "")
+                    r, outb, outs = result.wait()
+
+                    result = CommandResult("")
+                    self.send_command(result, "mon", "", json.dumps({
+                        "prefix": "osd pool set",
+                        "format": "json",
+                        "pool": pool_name,
+                        'var': 'size',
+                        "val": str(num_rep),
+                    }), "")
+                    r, outb, outs = result.wait()
+
+        # TODO remove pools for hosts that don't exist?
+
+    def serve(self):
+        self.handle_osd_map()
+        self.serve_event.wait()
+        self.serve_event.clear()
+
+    def shutdown(self):
+        self.serve_event.set()