]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/vstart_runner: add unsharing network namespace support
authorXiubo Li <xiubli@redhat.com>
Tue, 3 Mar 2020 13:31:29 +0000 (08:31 -0500)
committerXiubo Li <xiubli@redhat.com>
Tue, 14 Apr 2020 11:47:03 +0000 (07:47 -0400)
This will isolate the network namespace for each mount point with
a private ip address and iptables, etc.

For the kill() stuff it will just do DOWN the veth interface instead
of sending ipmi request for kernel mount and kill the fuse processes
for the fuse mount. This could avoid sending the socket FIN to the
ceph cluster.

Fixes: https://tracker.ceph.com/issues/44044
Signed-off-by: Xiubo Li <xiubli@redhat.com>
doc/dev/developer_guide/running-tests-locally.rst
qa/cephfs/begin.yaml
qa/tasks/ceph_fuse.py
qa/tasks/cephfs/fuse_mount.py
qa/tasks/cephfs/kernel_mount.py
qa/tasks/cephfs/mount.py
qa/tasks/kclient.py
qa/tasks/vstart_runner.py

index f80a6f52b339c62f166051a48ebee471bf241a53..6a1f6e09758a3d405c266de3529e6f15befd2b24 100644 (file)
@@ -102,6 +102,8 @@ vstart_runner.py can take the following options -
 --teardown                  tears Ceph cluster down after test(s) has finished
                             runnng
 --kclient                   use the kernel cephfs client instead of FUSE
+--brxnet=<net/mask>         specify a new net/mask for the mount clients' network
+                            namespace container (Default: 192.168.0.0/16)
 
 .. note:: If using the FUSE client, ensure that the fuse package is installed
           and enabled on the system and that ``user_allow_other`` is added
index 0f3beb8fbd03e0f057e976bc44167b40ed1d9d3a..b06a4b27a58ac29eeeee2473654dca99796f0715 100644 (file)
@@ -13,6 +13,9 @@ tasks:
         - flex
         - libelf-dev
         - libssl-dev
+        - network-manager
+        - iproute2
+        - util-linux
         # for xfstests-dev
         - dump
         - indent
@@ -21,6 +24,9 @@ tasks:
         - flex
         - elfutils-libelf-devel
         - openssl-devel
+        - NetworkManager
+        - iproute
+        - util-linux
         # for xfstests-dev
         - libacl-devel
         - libaio-devel
index 1439ccffd566921ed3ae189d647f8d06abb14188..34dd36da10a67b5554bef2a7767130423c313894 100644 (file)
@@ -42,12 +42,16 @@ def task(ctx, config):
     this operation on. This lets you e.g. set up one client with
     ``ceph-fuse`` and another with ``kclient``.
 
+    ``brxnet`` should be a Private IPv4 Address range, default range is
+    [192.168.0.0/16]
+
     Example that mounts all clients::
 
         tasks:
         - ceph:
         - ceph-fuse:
         - interactive:
+        - brxnet: [192.168.0.0/16]
 
     Example that uses both ``kclient` and ``ceph-fuse``::
 
@@ -106,6 +110,8 @@ def task(ctx, config):
     mounted_by_me = {}
     skipped = {}
 
+    brxnet = config.get("brxnet", None)
+
     # Construct any new FuseMount instances
     for id_, remote in clients:
         client_config = config.get("client.%s" % id_)
@@ -120,7 +126,7 @@ def task(ctx, config):
             continue
 
         if id_ not in all_mounts:
-            fuse_mount = FuseMount(ctx, client_config, testdir, auth_id, remote)
+            fuse_mount = FuseMount(ctx, client_config, testdir, auth_id, remote, brxnet)
             all_mounts[id_] = fuse_mount
         else:
             # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client
index c23e1f54caf291a40fd8f2414db16b05c36750b5..ac4da5b360d40acdbb3413646d04491c06f22902 100644 (file)
@@ -2,7 +2,7 @@ from io import BytesIO
 import json
 import time
 import logging
-
+import re
 import six
 
 from textwrap import dedent
@@ -17,8 +17,8 @@ log = logging.getLogger(__name__)
 
 
 class FuseMount(CephFSMount):
-    def __init__(self, ctx, client_config, test_dir, client_id, client_remote):
-        super(FuseMount, self).__init__(ctx, test_dir, client_id, client_remote)
+    def __init__(self, ctx, client_config, test_dir, client_id, client_remote, brxnet):
+        super(FuseMount, self).__init__(ctx, test_dir, client_id, client_remote, brxnet)
 
         self.client_config = client_config if client_config else {}
         self.fuse_daemon = None
@@ -31,6 +31,7 @@ class FuseMount(CephFSMount):
         if mountpoint is not None:
             self.mountpoint = mountpoint
         self.setupfs(name=mount_fs_name)
+        self.setup_netns()
 
         try:
             return self._mount(mount_path, mount_fs_name, mount_options)
@@ -91,6 +92,10 @@ class FuseMount(CephFSMount):
             )
             cwd = None # misc.get_valgrind_args chdir for us
 
+        netns_prefix = ['sudo', 'nsenter',
+                        '--net=/var/run/netns/{0}'.format(self.netns_name)]
+        run_cmd = netns_prefix + run_cmd
+
         run_cmd.extend(fuse_cmd)
 
         def list_connections():
@@ -161,6 +166,8 @@ class FuseMount(CephFSMount):
 
         self.gather_mount_info()
 
+        self.mounted = True
+
     def gather_mount_info(self):
         status = self.admin_socket(['status'])
         self.id = status['id']
@@ -247,7 +254,7 @@ class FuseMount(CephFSMount):
         try:
             log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name))
             self.client_remote.run(
-                args=[
+                args = [
                     'sudo',
                     'fusermount',
                     '-u',
@@ -329,6 +336,7 @@ class FuseMount(CephFSMount):
         try:
             # Permit a timeout, so that we do not block forever
             run.wait([self.fuse_daemon], timeout)
+
         except MaxWhileTries:
             log.error("process failed to terminate after unmount. This probably"
                       " indicates a bug within ceph-fuse.")
@@ -337,50 +345,8 @@ class FuseMount(CephFSMount):
             if require_clean:
                 raise
 
-        self.cleanup()
-
-    def cleanup(self):
-        """
-        Remove the mount point.
-
-        Prerequisite: the client is not mounted.
-        """
-        stderr = BytesIO()
-        try:
-            self.client_remote.run(
-                args=[
-                    'rmdir',
-                    '--',
-                    self.mountpoint,
-                ],
-                cwd=self.test_dir,
-                stderr=stderr,
-                timeout=(60*5),
-                check_status=False,
-            )
-        except CommandFailedError:
-            if b"No such file or directory" in stderr.getvalue():
-                pass
-            else:
-                raise
-
-    def kill(self):
-        """
-        Terminate the client without removing the mount point.
-        """
-        log.info('Killing ceph-fuse connection on {name}...'.format(name=self.client_remote.name))
-        self.fuse_daemon.stdin.close()
-        try:
-            self.fuse_daemon.wait()
-        except CommandFailedError:
-            pass
-
-    def kill_cleanup(self):
-        """
-        Follow up ``kill`` to get to a clean unmounted state.
-        """
-        log.info('Cleaning up killed ceph-fuse connection')
-        self.umount()
+        self.cleanup_netns()
+        self.mounted = False
         self.cleanup()
 
     def teardown(self):
@@ -398,6 +364,9 @@ class FuseMount(CephFSMount):
             except CommandFailedError:
                 pass
 
+        self.cleanup_netns()
+        self.mounted = False
+
         # Indiscriminate, unlike the touchier cleanup()
         self.client_remote.run(
             args=[
index 38d44ab04b99f4086e04f645c5798a8605a8bc9d..2dc07ca973c31bafe5eef8951d60cc0f387dfa7c 100644 (file)
@@ -17,19 +17,16 @@ UMOUNT_TIMEOUT = 300
 
 
 class KernelMount(CephFSMount):
-    def __init__(self, ctx, test_dir, client_id, client_remote,
-                 ipmi_user, ipmi_password, ipmi_domain):
-        super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote)
+    def __init__(self, ctx, test_dir, client_id, client_remote, brxnet):
+        super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote, brxnet)
 
         self.mounted = False
-        self.ipmi_user = ipmi_user
-        self.ipmi_password = ipmi_password
-        self.ipmi_domain = ipmi_domain
 
     def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]):
         if mountpoint is not None:
             self.mountpoint = mountpoint
         self.setupfs(name=mount_fs_name)
+        self.setup_netns()
 
         log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
             id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
@@ -55,6 +52,8 @@ class KernelMount(CephFSMount):
                 'adjust-ulimits',
                 'ceph-coverage',
                 '{tdir}/archive/coverage'.format(tdir=self.test_dir),
+                'nsenter',
+                '--net=/var/run/netns/{0}'.format(self.netns_name),
                 '/bin/mount',
                 '-t',
                 'ceph',
@@ -91,19 +90,9 @@ class KernelMount(CephFSMount):
             ], timeout=(15*60))
             raise e
 
-        rproc = self.client_remote.run(
-            args=[
-                'rmdir',
-                '--',
-                self.mountpoint,
-            ],
-            wait=False
-        )
-        run.wait([rproc], UMOUNT_TIMEOUT)
         self.mounted = False
-
-    def cleanup(self):
-        pass
+        self.cleanup_netns()
+        self.cleanup()
 
     def umount_wait(self, force=False, require_clean=False, timeout=900):
         """
@@ -118,10 +107,20 @@ class KernelMount(CephFSMount):
             if not force:
                 raise
 
-            self.kill()
-            self.kill_cleanup()
-
-        self.mounted = False
+            # force delete the netns and umount
+            self.cleanup_netns()
+            self.client_remote.run(
+                args=['sudo',
+                      'umount',
+                      '-f',
+                      '-l',
+                      self.mountpoint
+                ],
+                timeout=(15*60))
+
+            self.mounted = False
+            self.cleanup_netns()
+            self.cleanup()
 
     def is_mounted(self):
         return self.mounted
@@ -138,57 +137,6 @@ class KernelMount(CephFSMount):
         if self.mounted:
             self.umount()
 
-    def kill(self):
-        """
-        The Ceph kernel client doesn't have a mechanism to kill itself (doing
-        that in side the kernel would be weird anyway), so we reboot the whole node
-        to get the same effect.
-
-        We use IPMI to reboot, because we don't want the client to send any
-        releases of capabilities.
-        """
-
-        con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
-                                                self.ipmi_user,
-                                                self.ipmi_password,
-                                                self.ipmi_domain)
-        con.hard_reset(wait_for_login=False)
-
-        self.mounted = False
-
-    def kill_cleanup(self):
-        assert not self.mounted
-
-        # We need to do a sleep here because we don't know how long it will
-        # take for a hard_reset to be effected.
-        time.sleep(30)
-
-        try:
-            # Wait for node to come back up after reboot
-            misc.reconnect(None, 300, [self.client_remote])
-        except:
-            # attempt to get some useful debug output:
-            con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
-                                                    self.ipmi_user,
-                                                    self.ipmi_password,
-                                                    self.ipmi_domain)
-            con.check_status(timeout=60)
-            raise
-
-        # Remove mount directory
-        self.client_remote.run(args=['uptime'], timeout=10)
-
-        # Remove mount directory
-        self.client_remote.run(
-            args=[
-                'rmdir',
-                '--',
-                self.mountpoint,
-            ],
-            timeout=(5*60),
-            check_status=False,
-        )
-
     def _find_debug_dir(self):
         """
         Find the debugfs folder for this mount
index d3cb0a429edd3fb0cdda33b961ab9015eb15a240..d913dcbcb64fdba3a9ad47511d98caca10d92cbf 100644 (file)
@@ -8,15 +8,18 @@ import time
 from six import StringIO
 from textwrap import dedent
 import os
+import re
+from IPy import IP
 from teuthology.orchestra import run
 from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
 from tasks.cephfs.filesystem import Filesystem
+import platform
 
 log = logging.getLogger(__name__)
 
 
 class CephFSMount(object):
-    def __init__(self, ctx, test_dir, client_id, client_remote):
+    def __init__(self, ctx, test_dir, client_id, client_remote, brxnet):
         """
         :param test_dir: Global teuthology test dir
         :param client_id: Client ID, the 'foo' in client.foo
@@ -30,11 +33,32 @@ class CephFSMount(object):
         self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id)
         self._mountpoint = None
         self.fs = None
+        self._netns_name = None
+        self.nsid = -1
+        if brxnet is None:
+            self.ceph_brx_net = '192.168.0.0/16'
+        else:
+            self.ceph_brx_net = brxnet
 
         self.test_files = ['a', 'b', 'c']
 
         self.background_procs = []
 
+        # On Centos/Redhat 8 the 'brctl' has been deprecated. But the
+        # 'nmcli' in ubuntu 18.04 is buggy to setup the network bridge
+        # and there is no workaround, will continue to use the 'brctl'
+        args = ["bash", "-c",
+                "cat /etc/os-release"]
+        p = self.client_remote.run(args=args, stderr=BytesIO(),
+                                   stdout=BytesIO(), timeout=(5*60))
+        distro = re.findall(r'NAME="Ubuntu"', p.stdout.getvalue())
+        version = re.findall(r'VERSION_ID="18.04"', p.stdout.getvalue())
+        self.use_brctl = len(distro) is not 0 and len(version) is not 0
+        
+    def _parse_netns_name(self):
+        self._netns_name = '-'.join(["ceph-ns",
+                                     re.sub(r'/+', "-", self.mountpoint)])
+
     @property
     def mountpoint(self):
         if self._mountpoint == None:
@@ -47,6 +71,19 @@ class CephFSMount(object):
         if not isinstance(path, str):
             raise RuntimeError('path should be of str type.')
         self._mountpoint = path
+        self._parse_netns_name()
+
+    @property
+    def netns_name(self):
+        if self._netns_name == None:
+            self._parse_netns_name()
+        return self._netns_name
+
+    @netns_name.setter
+    def netns_name(self, name):
+        if not isinstance(path, str):
+            raise RuntimeError('path should be of str type.')
+        self._netns_name = name
 
     def is_mounted(self):
         raise NotImplementedError()
@@ -60,6 +97,320 @@ class CephFSMount(object):
         self.fs.wait_for_daemons()
         log.info('Ready to start {}...'.format(type(self).__name__))
 
+    def _bringup_network_manager_service(self):
+        args = ["sudo", "bash", "-c",
+                "systemctl start NetworkManager"]
+        self.client_remote.run(args=args, timeout=(5*60))
+
+    def _setup_brx_and_nat(self):
+        # The ip for ceph-brx should be
+        ip = IP(self.ceph_brx_net)[-2]
+        mask = self.ceph_brx_net.split('/')[1]
+        brd = IP(self.ceph_brx_net).broadcast()
+
+        brx = self.client_remote.run(args=['ip', 'addr'], stderr=BytesIO(),
+                                     stdout=BytesIO(), timeout=(5*60))
+        brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue())
+        if brx:
+            # If the 'ceph-brx' already exists, then check whether
+            # the new net is conflicting with it
+            _ip, _mask = brx[0].split()[1].split('/', 1)
+            if _ip != "{}".format(ip) or _mask != mask:
+                raise RuntimeError("Conflict with existing ceph-brx {0}, new {1}/{2}".format(brx[0].split()[1], ip, mask))
+            return 
+
+        log.info("Setuping the 'ceph-brx' with {0}/{1}".format(ip, mask))
+
+        # Setup the ceph-brx and always use the last valid IP
+        if self.use_brctl == True:
+            args = ["sudo", "bash", "-c", "brctl addbr ceph-brx"]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c", "ip link set ceph-brx up"]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "ip addr add {0}/{1} brd {2} dev ceph-brx".format(ip, mask, brd)]
+            self.client_remote.run(args=args, timeout=(5*60))
+        else:
+            self._bringup_network_manager_service()
+            args = ["sudo", "bash", "-c",
+                    "nmcli connection add type bridge con-name ceph-brx ifname ceph-brx stp no"]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "nmcli connection modify ceph-brx ipv4.addresses {0}/{1} ipv4.method manual".format(ip, mask)]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c", "nmcli connection up ceph-brx"]
+            self.client_remote.run(args=args, timeout=(5*60))
+        
+        # Save the ip_forward
+        self.client_remote.run(args=['touch', '/tmp/python-ceph-brx'],
+                               timeout=(5*60))
+        p = self.client_remote.run(args=['cat', '/proc/sys/net/ipv4/ip_forward'],
+                                   stderr=BytesIO(), stdout=BytesIO(),
+                                   timeout=(5*60))
+        val = p.stdout.getvalue().strip()
+        args = ["sudo", "bash", "-c",
+                "echo {} > /tmp/python-ceph-brx".format(val)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "echo 1 > /proc/sys/net/ipv4/ip_forward"]
+        self.client_remote.run(args=args, timeout=(5*60))
+        
+        # Setup the NAT
+        p = self.client_remote.run(args=['route'], stderr=BytesIO(),
+                                   stdout=BytesIO(), timeout=(5*60))
+        p = re.findall(r'default .*', p.stdout.getvalue())
+        if p == False:
+            raise RuntimeError("No default gw found")
+        gw = p[0].split()[7]
+        args = ["sudo", "bash", "-c",
+                "iptables -A FORWARD -o {0} -i ceph-brx -j ACCEPT".format(gw)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "iptables -A FORWARD -i {0} -o ceph-brx -j ACCEPT".format(gw)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "iptables -t nat -A POSTROUTING -s {0}/{1} -o {2} -j MASQUERADE".format(ip, mask, gw)]
+        self.client_remote.run(args=args, timeout=(5*60))
+
+    def _setup_netns(self):
+        p = self.client_remote.run(args=['ip', 'netns', 'list'],
+                                   stderr=BytesIO(), stdout=BytesIO(),
+                                   timeout=(5*60))
+        p = p.stdout.getvalue().strip()
+        if re.match(self.netns_name, p) is not None:
+            raise RuntimeError("the netns '{}' already exists!".format(self.netns_name))
+
+        # Get the netns name list
+        netns_list = re.findall(r'[^()\s][-.\w]+[^():\s]', p)
+
+        # Get an uniq netns id
+        nsid = 0
+        while True:
+            p = self.client_remote.run(args=['ip', 'netns', 'list-id'],
+                                       stderr=BytesIO(), stdout=BytesIO(),
+                                       timeout=(5*60))
+            p = re.search(r"nsid {} ".format(nsid), p.stdout.getvalue())
+            if p is None:
+                break
+
+            nsid += 1
+
+        self.nsid = nsid;
+
+        # Add one new netns and set it id
+        args = ["sudo", "bash", "-c",
+                "ip netns add {0}".format(self.netns_name)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "ip netns set {0} {1}".format(self.netns_name, nsid)]
+        self.client_remote.run(args=args, timeout=(5*60))
+
+        # Get one ip address for netns
+        ips = IP(self.ceph_brx_net)
+        for ip in ips:
+            found = False
+            if ip == ips[0]:
+                continue
+            if ip == ips[-2]:
+                raise RuntimeError("we have ran out of the ip addresses")
+
+            for ns in netns_list:
+                ns_name = ns.split()[0]
+                args = ["sudo", "bash", "-c",
+                        "ip netns exec {0} ip addr".format(ns_name)]
+                p = self.client_remote.run(args=args, stderr=BytesIO(),
+                                           stdout=BytesIO(), timeout=(5*60))
+                q = re.search("{0}".format(ip), p.stdout.getvalue())
+                if q is not None:
+                    found = True
+                    break
+
+            if found == False:
+                break
+
+        mask = self.ceph_brx_net.split('/')[1]
+        brd = IP(self.ceph_brx_net).broadcast()
+
+        log.info("Setuping the netns '{0}' with {1}/{2}".format(self.netns_name, ip, mask))
+
+        # Setup the veth interfaces
+        args = ["sudo", "bash", "-c",
+                "ip link add veth0 netns {0} type veth peer name brx.{1}".format(self.netns_name, nsid)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "ip netns exec {0} ip addr add {1}/{2} brd {3} dev veth0".format(self.netns_name, ip, mask, brd)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "ip netns exec {0} ip link set veth0 up".format(self.netns_name)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "ip netns exec {0} ip link set lo up".format(self.netns_name)]
+        self.client_remote.run(args=args, timeout=(5*60))
+
+        brxip = IP(self.ceph_brx_net)[-2]
+        args = ["sudo", "bash", "-c",
+                "ip netns exec {0} ip route add default via {1}".format(self.netns_name, brxip)]
+        self.client_remote.run(args=args, timeout=(5*60))
+
+        # Bring up the brx interface and join it to 'ceph-brx'
+        if self.use_brctl == True:
+            args = ["sudo", "bash", "-c",
+                    "ip link set brx.{0} up".format(nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "brctl addif ceph-brx brx.{0}".format(nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+        else:
+            self._bringup_network_manager_service()
+            args = ["sudo", "bash", "-c",
+                    "nmcli connection add type bridge-slave con-name brx.{0} ifname brx.{0} master ceph-brx".format(nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "nmcli connection up brx.{0}".format(self.nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+
+    def _cleanup_netns(self):
+        if self.nsid == -1:
+            return
+        log.info("Removing the netns '{0}'".format(self.netns_name))
+
+        # Delete the netns and the peer veth interface
+        if self.use_brctl == True:
+            args = ["sudo", "bash", "-c",
+                    "ip link set brx.{0} down".format(self.nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "brctl delif ceph-brx brx.{0}".format(self.nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "ip link delete brx.{0}".format(self.nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+        else:
+            self._bringup_network_manager_service()
+            args = ["sudo", "bash", "-c",
+                    "nmcli connection down brx.{0}".format(self.nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "nmcli connection delete brx.{0}".format(self.nsid)]
+            self.client_remote.run(args=args, timeout=(5*60))
+
+        args = ["sudo", "bash", "-c",
+                "ip netns delete {0}".format(self.netns_name)]
+        self.client_remote.run(args=args, timeout=(5*60))
+
+        self.nsid = -1
+
+    def _cleanup_brx_and_nat(self):
+        brx = self.client_remote.run(args=['ip', 'addr'], stderr=BytesIO(),
+                                     stdout=BytesIO(), timeout=(5*60))
+        brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue())
+        if not brx:
+            return
+
+        # If we are the last netns, will delete the ceph-brx
+        if self.use_brctl == True:
+            p = self.client_remote.run(args=['brctl', 'show', 'ceph-brx'],
+                                       stderr=BytesIO(), stdout=BytesIO(),
+                                       timeout=(5*60))
+        else:
+            self._bringup_network_manager_service()
+            p = self.client_remote.run(args=['nmcli', 'connection', 'show'],
+                                       stderr=BytesIO(), stdout=BytesIO(),
+                                       timeout=(5*60))
+        _list = re.findall(r'brx\.', p.stdout.getvalue().strip())
+        if len(_list) != 0:
+            return
+
+        log.info("Removing the 'ceph-brx'")
+
+        if self.use_brctl == True:
+            args = ["sudo", "bash", "-c",
+                    "ip link set ceph-brx down"]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "brctl delbr ceph-brx"]
+            self.client_remote.run(args=args, timeout=(5*60))
+        else:
+            args = ["sudo", "bash", "-c",
+                    "nmcli connection down ceph-brx"]
+            self.client_remote.run(args=args, timeout=(5*60))
+            args = ["sudo", "bash", "-c",
+                    "nmcli connection delete ceph-brx"]
+            self.client_remote.run(args=args, timeout=(5*60))
+
+        # Drop the iptables NAT rules
+        ip = IP(self.ceph_brx_net)[-2]
+        mask = self.ceph_brx_net.split('/')[1]
+
+        p = self.client_remote.run(args=['route'], stderr=BytesIO(),
+                                   stdout=BytesIO(), timeout=(5*60))
+        p = re.findall(r'default .*', p.stdout.getvalue())
+        if p == False:
+            raise RuntimeError("No default gw found")
+        gw = p[0].split()[7]
+        args = ["sudo", "bash", "-c",
+                "iptables -D FORWARD -o {0} -i ceph-brx -j ACCEPT".format(gw)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "iptables -D FORWARD -i {0} -o ceph-brx -j ACCEPT".format(gw)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        args = ["sudo", "bash", "-c",
+                "iptables -t nat -D POSTROUTING -s {0}/{1} -o {2} -j MASQUERADE".format(ip, mask, gw)]
+        self.client_remote.run(args=args, timeout=(5*60))
+
+        # Restore the ip_forward
+        p = self.client_remote.run(args=['cat', '/tmp/python-ceph-brx'],
+                                   stderr=BytesIO(), stdout=BytesIO(),
+                                   timeout=(5*60))
+        val = p.stdout.getvalue().strip()
+        args = ["sudo", "bash", "-c",
+                "echo {} > /proc/sys/net/ipv4/ip_forward".format(val)]
+        self.client_remote.run(args=args, timeout=(5*60))
+        self.client_remote.run(args=['rm', '-f', '/tmp/python-ceph-brx'],
+                               timeout=(5*60))
+
+    def setup_netns(self):
+        """
+        Setup the netns for the mountpoint.
+        """
+        log.info("Setting the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+        self._setup_brx_and_nat()
+        self._setup_netns()
+
+    def cleanup_netns(self):
+        """
+        Cleanup the netns for the mountpoint.
+        """
+        log.info("Cleaning the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+        self._cleanup_netns()
+        self._cleanup_brx_and_nat()
+
+    def suspend_netns(self):
+        """
+        Suspend the netns veth interface.
+        """
+        if self.nsid == -1:
+            return
+
+        log.info("Suspending the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+
+        args = ["sudo", "bash", "-c",
+                "ip link set brx.{0} down".format(self.nsid)]
+        self.client_remote.run(args=args, timeout=(5*60))
+
+    def resume_netns(self):
+        """
+        Resume the netns veth interface.
+        """
+        if self.nsid == -1:
+            return
+
+        log.info("Resuming the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+
+        args = ["sudo", "bash", "-c",
+                "ip link set brx.{0} up".format(self.nsid)]
+        self.client_remote.run(args=args, timeout=(5*60))
+
     def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]):
         raise NotImplementedError()
 
@@ -78,14 +429,46 @@ class CephFSMount(object):
         """
         raise NotImplementedError()
 
-    def kill_cleanup(self):
-        raise NotImplementedError()
-
     def kill(self):
-        raise NotImplementedError()
+        """
+        Suspend the netns veth interface to make the client disconnected
+        from the ceph cluster
+        """
+        log.info('Killing connection on {0}...'.format(self.client_remote.name))
+        self.suspend_netns()
+
+    def kill_cleanup(self):
+        """
+        Follow up ``kill`` to get to a clean unmounted state.
+        """
+        log.info('Cleaning up killed connection on {0}'.format(self.client_remote.name))
+        self.umount_wait(force=True)
+        self.cleanup()
 
     def cleanup(self):
-        raise NotImplementedError()
+        """
+        Remove the mount point.
+
+        Prerequisite: the client is not mounted.
+        """
+        stderr = BytesIO()
+        try:
+            self.client_remote.run(
+                args=[
+                    'rmdir',
+                    '--',
+                    self.mountpoint,
+                ],
+                cwd=self.test_dir,
+                stderr=stderr,
+                timeout=(60*5),
+                check_status=False,
+            )
+        except CommandFailedError:
+            if "No such file or directory" in stderr.getvalue():
+                pass
+            else:
+                raise
 
     def wait_until_mounted(self):
         raise NotImplementedError()
index 50d557f3ce75523b8bf368e2fbb8fbf83eb68ac5..efc6cb47ff28f1608af0e0c82509b6aeeb6bade1 100644 (file)
@@ -22,12 +22,16 @@ def task(ctx, config):
     this operation on. This lets you e.g. set up one client with
     ``ceph-fuse`` and another with ``kclient``.
 
+    ``brxnet`` should be a Private IPv4 Address range, default range is
+    [192.168.0.0/16]
+
     Example that mounts all clients::
 
         tasks:
         - ceph:
         - kclient:
         - interactive:
+        - brxnet: [192.168.0.0/16]
 
     Example that uses both ``kclient` and ``ceph-fuse``::
 
@@ -86,9 +90,7 @@ def task(ctx, config):
             test_dir,
             id_,
             remote,
-            ctx.teuthology_config.get('ipmi_user', None),
-            ctx.teuthology_config.get('ipmi_password', None),
-            ctx.teuthology_config.get('ipmi_domain', None)
+            ctx.teuthology_config.get('brxnet', None),
         )
 
         mounts[id_] = kernel_mount
index 919ac92c58b4fe47cd44e58d68041a22ad3980c9..42e09f188f72b5b37853dfa90ddcdb39b1bbfd6b 100644 (file)
@@ -43,6 +43,7 @@ import os
 import time
 import sys
 import errno
+from IPy import IP
 from unittest import suite, loader
 import unittest
 import platform
@@ -514,8 +515,8 @@ def safe_kill(pid):
 
 
 class LocalKernelMount(KernelMount):
-    def __init__(self, ctx, test_dir, client_id):
-        super(LocalKernelMount, self).__init__(ctx, test_dir, client_id, LocalRemote(), None, None, None)
+    def __init__(self, ctx, test_dir, client_id, brxnet):
+        super(LocalKernelMount, self).__init__(ctx, test_dir, client_id, LocalRemote(), brxnet)
 
     @property
     def config_path(self):
@@ -656,6 +657,7 @@ class LocalKernelMount(KernelMount):
 
     def mount(self, mount_path=None, mount_fs_name=None, mount_options=[]):
         self.setupfs(name=mount_fs_name)
+        self.setup_netns()
 
         log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
             id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
@@ -684,6 +686,8 @@ class LocalKernelMount(KernelMount):
         self.client_remote.run(
             args=[
                 'sudo',
+                'nsenter',
+                '--net=/var/run/netns/{0}'.format(self.netns_name),
                 './bin/mount.ceph',
                 ':{mount_path}'.format(mount_path=mount_path),
                 self.mountpoint,
@@ -709,8 +713,8 @@ class LocalKernelMount(KernelMount):
                                       wait=False)
 
 class LocalFuseMount(FuseMount):
-    def __init__(self, ctx, test_dir, client_id):
-        super(LocalFuseMount, self).__init__(ctx, None, test_dir, client_id, LocalRemote())
+    def __init__(self, ctx, test_dir, client_id, brxnet):
+        super(LocalFuseMount, self).__init__(ctx, None, test_dir, client_id, LocalRemote(), brxnet)
 
     @property
     def config_path(self):
@@ -823,6 +827,7 @@ class LocalFuseMount(FuseMount):
         if mountpoint is not None:
             self.mountpoint = mountpoint
         self.setupfs(name=mount_fs_name)
+        self.setup_netns()
 
         self.client_remote.run(args=['mkdir', '-p', self.mountpoint])
 
@@ -863,7 +868,9 @@ class LocalFuseMount(FuseMount):
         prefix += mount_options;
 
         self.fuse_daemon = self.client_remote.run(args=
-                                            prefix + [
+                                            ['nsenter',
+                                             '--net=/var/run/netns/{0}'.format(self.netns_name),
+                                            ] + prefix + [
                                                 "-f",
                                                 "--name",
                                                 "client.{0}".format(self.client_id),
@@ -1201,6 +1208,7 @@ class InteractiveFailureResult(unittest.TextTestResult):
 def enumerate_methods(s):
     log.info("e: {0}".format(s))
     for t in s._tests:
+       print("t {0}, s._tests {1}".format(t, s._tests))
         if isinstance(t, suite.BaseTestSuite):
             for sub in enumerate_methods(t):
                 yield sub
@@ -1232,7 +1240,10 @@ def scan_tests(modules):
     max_required_mgr = 0
     require_memstore = False
 
+    print("module = {}".format(overall_suite))
     for suite_, case in enumerate_methods(overall_suite):
+       print("suite {0}".format(suite_))
+       print("case {0}".format(case))
         max_required_mds = max(max_required_mds,
                                getattr(case, "MDSS_REQUIRED", 0))
         max_required_clients = max(max_required_clients,
@@ -1310,6 +1321,7 @@ def exec_test():
     global opt_log_ps_output
     opt_log_ps_output = False
     use_kernel_client = False
+    opt_brxnet= None
 
     args = sys.argv[1:]
     flags = [a for a in args if a.startswith("-")]
@@ -1331,6 +1343,18 @@ def exec_test():
             clear_old_log()
         elif f == "--kclient":
             use_kernel_client = True
+        elif '--brxnet' in f:
+            if re.search(r'=[0-9./]+', f) is None:
+                log.error("--brxnet=<ip/mask> option needs one argument: '{0}'".format(f))
+                sys.exit(-1)
+            opt_brxnet=f.split('=')[1]
+            try:  
+                IP(opt_brxnet)  
+                if IP(opt_brxnet).iptype() is 'PUBLIC':
+                    raise RuntimeError('is public')
+            except Exception as  e:  
+                log.error("Invalid ip '{0}' {1}".format(opt_brxnet, e))
+                sys.exit(-1)
         else:
             log.error("Unknown option '{0}'".format(f))
             sys.exit(-1)
@@ -1415,9 +1439,9 @@ def exec_test():
             open("./keyring", "a").write(p.stdout.getvalue())
 
         if use_kernel_client:
-            mount = LocalKernelMount(ctx, test_dir, client_id)
+            mount = LocalKernelMount(ctx, test_dir, client_id, opt_brxnet)
         else:
-            mount = LocalFuseMount(ctx, test_dir, client_id)
+            mount = LocalFuseMount(ctx, test_dir, client_id, opt_brxnet)
 
         mounts.append(mount)
         if os.path.exists(mount.mountpoint):