import time
import logging
from io import BytesIO, StringIO
+import yaml
from tasks.mgr.mgr_test_case import MgrTestCase
from teuthology import contextutil
}
}))
+ def apply_ganesha_spec(self, spec):
+ """
+ apply spec and wait for redeploy otherwise it will reset any conf changes
+ :param spec: ganesha daemon spec (YAML)
+ """
+ ganesha_daemon_pid_init = (self.ctx.cluster.run(args=["sudo", "pgrep", "ganesha.nfsd"],
+ stdout=StringIO(),
+ stderr=StringIO()))[0].stdout.getvalue().strip()
+ self.ctx.cluster.run(args=['ceph', 'orch', 'apply', '-i', '-'],
+ stdin=spec)
+ with contextutil.safe_while(sleep=4, tries=15) as proceed:
+ while proceed():
+ try:
+ ganesha_daemon_pid = (self.ctx.cluster.run(args=["sudo", "pgrep", "ganesha.nfsd"],
+ stdout=StringIO(),
+ stderr=StringIO()))[0].stdout.getvalue().strip()
+ if ganesha_daemon_pid != ganesha_daemon_pid_init:
+ # new pid i.e. redeployment done
+ break
+ except CommandFailedError:
+ # no pid if the redeployment is in progress
+ log.info('waiting for ganesha daemon redeployment')
+
+ def enable_libcephfs_logging(self, cluster_name):
+ """
+ enable ceph client logs by adding a volume mount to ganesha daemon's
+ unit.run using `ceph orch apply -i <spec>` and adding client log path
+ to /var/lib/ceph/{fsid}/{ganesha_daemon}/config
+ :param cluster_name: nfs cluster name
+ """
+ fsid = self._cmd("fsid").strip()
+
+ # add volume mount for ceph client logging from /var/log/ceph/$fsid:/var/log/ceph:z
+ ganesha_spec = self._cmd("orch", "ls", "--service-name",
+ f"nfs.{cluster_name}", "--export").strip()
+ parsed_ganesha_spec = yaml.safe_load(ganesha_spec)
+ original_ganesha_spec = yaml.dump(parsed_ganesha_spec)
+ parsed_ganesha_spec["extra_container_args"] = ["-v",
+ f"/var/log/ceph/{fsid}:/var/log/ceph:z"]
+ debug_enabled_ganesha_spec = yaml.dump(parsed_ganesha_spec).replace("- -v", '- "-v"').replace(
+ f"- /var/log/ceph/{fsid}:/var/log/ceph:z", f'- "/var/log/ceph/{fsid}:/var/log/ceph:z"')
+ log.debug(f"debug enabled ganesha spec: {debug_enabled_ganesha_spec}")
+
+ self.apply_ganesha_spec(debug_enabled_ganesha_spec)
+
+ # add client debug to /var/lib/ceph/$fsid/$ganesha_daemon/config
+ ganesha_daemon = ((self._orch_cmd("ps", "--daemon-type", "nfs")).split("\n")[1].split(' ')[0]).strip()
+ GANESHA_CONF_FILE_PATH = f"/var/lib/ceph/{fsid}/{ganesha_daemon}/config"
+
+ original_ganesha_conf = (self.ctx.cluster.run(args=["sudo", "cat", GANESHA_CONF_FILE_PATH],
+ stdout=StringIO(),
+ stderr=StringIO()))[0].stdout.getvalue().strip()
+ if "[client]" not in original_ganesha_conf:
+ s = f"[client]\n\tdebug client = 20\n\tlog file = /var/log/ceph/ceph-client.nfs.{cluster_name}.log"
+ self._sys_cmd(["echo", Raw(f'"{s}"'), Raw("|"), "sudo", "tee", Raw("-a"), GANESHA_CONF_FILE_PATH])
+ # restart ganesha daemon for the changes to take effect
+ self._orch_cmd("restart", f"nfs.{cluster_name}")
+
+ # ensure log level and file path exists
+ ganesha_conf_debug_enabled = (self.ctx.cluster.run(args=["sudo", "cat", GANESHA_CONF_FILE_PATH],
+ stdout=StringIO(),
+ stderr=StringIO()))[0].stdout.getvalue().strip()
+ self.assertIn("[client]", ganesha_conf_debug_enabled)
+ self.assertIn("debug client = 20", ganesha_conf_debug_enabled)
+ self.assertIn(f"log file = /var/log/ceph/ceph-client.nfs.{cluster_name}.log",
+ ganesha_conf_debug_enabled)
+
+ def check_libcephfs_log():
+ LIBCEPHFS_LOG_FILE_PATH = f"/var/log/ceph/{fsid}/ceph-client.nfs.{cluster_name}.log"
+ libcephfs_log = (self.ctx.cluster.run(args=["sudo", "cat",
+ LIBCEPHFS_LOG_FILE_PATH,
+ Raw("|"), "tail", "-n", "2"],
+ check_status=False,
+ stdout=StringIO(),
+ stderr=StringIO()))
+ if libcephfs_log[0].returncode != 0:
+ log.debug(f"failed to read {LIBCEPHFS_LOG_FILE_PATH}, retrying")
+ return False
+ if len(libcephfs_log[0].stdout.getvalue().strip()) == 0:
+ log.debug(f"log file {LIBCEPHFS_LOG_FILE_PATH} empty, retrying")
+ return False
+ return True
+
+ # usually appears in no time, sometimes might take a second or two for the log file to appear
+ self.wait_until_true(check_libcephfs_log, timeout=60)
+
+ return original_ganesha_spec, GANESHA_CONF_FILE_PATH, original_ganesha_conf
+
+ def disable_libcephfs_logging(self, cluster_name, ganesha_spec, conf_path, ganesha_conf):
+ """
+ disable ceph client logs by reverting back to the primary ganesha spec and removing debug level
+ and file path from /var/lib/ceph/{fsid}/{ganesha_daemon}/config
+ :param cluster_name: nfs cluster name
+ :param ganesha_spec: primary spec (spec prior to adding debug volume mount)
+ :param conf_path: ganesha conf file path
+ :param ganesha_conf: primary ganesha conf (conf prior to adding debug level and path)
+ """
+ self.apply_ganesha_spec(ganesha_spec)
+
+ # remove ceph client debug info from ganesha conf
+ conf_content = (self.ctx.cluster.run(args=["sudo", "cat", conf_path],
+ stdout=StringIO(),
+ stderr=StringIO()))[0].stdout.getvalue().strip()
+ if "[client]" in conf_content:
+ self.ctx.cluster.run(args=['sudo', 'truncate', Raw("-s"), "0", conf_path])
+ self._sys_cmd(["echo", Raw(f'"{ganesha_conf}"'), Raw("|"), "sudo", "tee", conf_path])
+ default_conf = (self.ctx.cluster.run(args=["sudo", "cat", conf_path],
+ stdout=StringIO(),
+ stderr=StringIO()))[0].stdout.getvalue().strip()
+ self.assertNotIn("[client]", default_conf)
+ self.assertNotIn("debug client = 20", default_conf)
+ self.assertNotIn(f"log file = /var/log/ceph/ceph-client.nfs.{cluster_name}.log", default_conf)
+ # restart ganesha daemon for the changes to take effect
+ self._orch_cmd("restart", f"nfs.{cluster_name}")
+
def test_create_and_delete_cluster(self):
'''
Test successful creation and deletion of the nfs cluster.
Test async io using fio. Expect completion without hang or crash
'''
self._test_create_cluster()
+ ganesha_spec, conf_path, conf = self.enable_libcephfs_logging(self.cluster_id)
self._create_export(export_id='1', create_fs=True,
extra_cmd=['--pseudo-path', self.pseudo_path])
port, ip = self._get_port_ip_info()
self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed')
self._test_fio(self.pseudo_path, port, ip)
+ self.disable_libcephfs_logging(self.cluster_id, ganesha_spec, conf_path, conf)
self._test_delete_cluster()
def test_cluster_info(self):