from tasks.cephfs.caps_helper import CapTester
log = logging.getLogger(__name__)
+MDS_RESTART_GRACE = 60
class TestLabeledPerfCounters(CephFSTestCase):
CLIENTS_REQUIRED = 2
MDSS_REQUIRED = 1
- def test_per_client_labeled_perf_counters(self):
+ def _get_counters_for(self, filesystem, client_id):
+ dump = self.fs.rank_tell(["counter", "dump"])
+ per_client_metrics_key = f'mds_client_metrics-{filesystem}'
+ counters = [c["counters"] for \
+ c in dump[per_client_metrics_key] if c["labels"]["client"] == client_id]
+ return counters[0]
+
+ def test_per_client_labeled_perf_counters_on_client_disconnect(self):
+ """
+ That the per-client labelled metrics are unavailable during client disconnect
+ """
+ mount_a_id = f'client.{self.mount_a.get_global_id()}'
+ self.mount_a.teardown()
+ with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_a_id}') as proceed:
+ while proceed():
+ dump = self.fs.rank_tell(["counter", "dump"])
+ per_client_metrics_key = f"mds_client_metrics-{dump['mds_client_metrics'][0]['labels']['fs_name']}"
+ clients = [c["labels"]["client"] for c in dump.get(per_client_metrics_key, {})]
+ if clients and mount_a_id not in clients:
+ # success, no metrics.
+ return True
+
+ def test_per_client_labeled_perf_counters_on_client_reconnect(self):
"""
- That the per-client labelled perf counters depict the clients
- performaing IO.
+ That the per-client labelled metrics are generated during client reconnect
"""
- def get_counters_for(filesystem, client_id):
- dump = self.fs.rank_tell(["counter", "dump"])
- per_client_metrics_key = f'mds_client_metrics-{filesystem}'
- counters = [c["counters"] for \
- c in dump[per_client_metrics_key] if c["labels"]["client"] == client_id]
- return counters[0]
+ # fail active mds and wait for reconnect
+ mds = self.fs.get_active_names()[0]
+ self.mds_cluster.mds_fail(mds)
+ self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+ mount_a_id = f'client.{self.mount_a.get_global_id()}'
+ mount_b_id = f'client.{self.mount_b.get_global_id()}'
+ fs_suffix = ""
+
+ with safe_while(sleep=1, tries=30, action='wait for counters') as proceed:
+ while proceed():
+ dump = self.fs.rank_tell(["counter", "dump"])
+ fs_suffix = dump['mds_client_metrics'][0]['labels']['fs_name']
+ per_client_metrics_key = f"mds_client_metrics-{fs_suffix}"
+ clients = [c["labels"]["client"] for c in dump.get(per_client_metrics_key, {})]
+ if mount_a_id in clients and mount_b_id in clients:
+ # success, got metrics.
+ break # break to continue the test
+
+ # Post reconnecting, validate the io perf counters
+ # write workload
+ self.mount_a.create_n_files("test_dir/test_file", 1000, sync=True)
+ with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_a_id}') as proceed:
+ while proceed():
+ counters_dump_a = self._get_counters_for(fs_suffix, mount_a_id)
+ if counters_dump_a["total_write_ops"] > 0 and counters_dump_a["total_write_size"] > 0 and \
+ counters_dump_a["avg_write_latency"] >= 0 and counters_dump_a["avg_metadata_latency"] >= 0 and \
+ counters_dump_a["opened_files"] >= 0 and counters_dump_a["opened_inodes"] > 0 and \
+ counters_dump_a["cap_hits"] > 0 and counters_dump_a["dentry_lease_hits"] > 0 and \
+ counters_dump_a["pinned_icaps"] > 0:
+ break # break to continue the test
+
+ # read from the other client
+ for i in range(100):
+ self.mount_b.open_background(basename=f'test_dir/test_file_{i}', write=False)
+ with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_b_id}') as proceed:
+ while proceed():
+ counters_dump_b = self._get_counters_for(fs_suffix, mount_b_id)
+ if counters_dump_b["total_read_ops"] >= 0 and counters_dump_b["total_read_size"] >= 0 and \
+ counters_dump_b["avg_read_latency"] >= 0 and counters_dump_b["avg_metadata_latency"] >= 0 and \
+ counters_dump_b["opened_files"] >= 0 and counters_dump_b["opened_inodes"] >= 0 and \
+ counters_dump_b["cap_hits"] > 0 and counters_dump_a["dentry_lease_hits"] > 0 and \
+ counters_dump_b["pinned_icaps"] > 0:
+ break # break to continue the test
+ self.mount_a.teardown()
+ self.mount_b.teardown()
+ def test_per_client_labeled_perf_counters_io(self):
+ """
+ That the per-client labelled perf counters depict the clients performing IO.
+ """
# sleep a bit so that we get updated clients...
sleep(10)
# write workload
self.mount_a.create_n_files("test_dir/test_file", 1000, sync=True)
with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_a_id}') as proceed:
- counters_dump_a = get_counters_for(fs_suffix, mount_a_id)
while proceed():
- if counters_dump_a["total_write_ops"] > 0 and counters_dump_a["total_write_size"] > 0:
- return True
+ counters_dump_a = self._get_counters_for(fs_suffix, mount_a_id)
+ if counters_dump_a["total_write_ops"] > 0 and counters_dump_a["total_write_size"] > 0 and \
+ counters_dump_a["avg_write_latency"] >= 0 and counters_dump_a["avg_metadata_latency"] >= 0 and \
+ counters_dump_a["opened_files"] >= 0 and counters_dump_a["opened_inodes"] > 0 and \
+ counters_dump_a["cap_hits"] > 0 and counters_dump_a["dentry_lease_hits"] > 0 and \
+ counters_dump_a["pinned_icaps"] > 0:
+ break # break to continue the test
# read from the other client
for i in range(100):
self.mount_b.open_background(basename=f'test_dir/test_file_{i}', write=False)
with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_b_id}') as proceed:
- counters_dump_b = get_counters_for(fs_suffix, mount_b_id)
while proceed():
- if counters_dump_b["total_read_ops"] > 0 and counters_dump_b["total_read_size"] > 0:
- return True
-
- self.fs.teardown()
+ counters_dump_b = self._get_counters_for(fs_suffix, mount_b_id)
+ if counters_dump_b["total_read_ops"] >= 0 and counters_dump_b["total_read_size"] >= 0 and \
+ counters_dump_b["avg_read_latency"] >= 0 and counters_dump_b["avg_metadata_latency"] >= 0 and \
+ counters_dump_b["opened_files"] >= 0 and counters_dump_b["opened_inodes"] >= 0 and \
+ counters_dump_b["cap_hits"] > 0 and counters_dump_a["dentry_lease_hits"] > 0 and \
+ counters_dump_b["pinned_icaps"] > 0:
+ break # break to continue the test
+ self.mount_a.teardown()
+ self.mount_b.teardown()
class TestAdminCommands(CephFSTestCase):
"""