]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks: add thrash test for persistent write log cache
authorYin Congmin <congmin.yin@intel.com>
Fri, 7 Jan 2022 07:03:44 +0000 (15:03 +0800)
committerYin Congmin <congmin.yin@intel.com>
Wed, 13 Jul 2022 05:31:02 +0000 (13:31 +0800)
add thrash test for persistent write log cache. run rbd bench
on persistent write log cache, thrashes rbd bench, test the
recovery function of persistent write log cache.

Signed-off-by: Yin Congmin <congmin.yin@intel.com>
qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml
qa/tasks/persistent_write_log_cache_thrash.py [new file with mode: 0644]

index 63a0c9dcf25585d4905126c3231ca8bea9f5eeed..a991308c69174189f2372640668ce7c803039b33 100644 (file)
@@ -2,9 +2,8 @@ tasks:
 - rbd.create_image:
     client.0:
       image_name: testimage
-      image_size: 10
+      image_size: 10240
       image_format: 2
-- exec:
+- persistent_write_log_cache_thrash:
     client.0:
-      - "timeout 10s rbd bench --io-pattern rand --io-type write testimage || true"
-      - "rbd bench --io-type write --io-pattern rand --io-total 32M testimage"
+      image_name: testimage
diff --git a/qa/tasks/persistent_write_log_cache_thrash.py b/qa/tasks/persistent_write_log_cache_thrash.py
new file mode 100644 (file)
index 0000000..d487c76
--- /dev/null
@@ -0,0 +1,96 @@
+"""
+persistent write log cache thrash task
+"""
+import contextlib
+import logging
+import random
+import json
+import time
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+DEFAULT_NUM_ITERATIONS = 20
+IO_PATTERNS = ("full-seq", "rand")
+IO_SIZES = ('4K', '16K', '128K', '1024K')
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def thrashes_rbd_bench_on_persistent_cache(ctx, config):
+    """
+    thrashes rbd bench on persistent write log cache.
+    It can test recovery feature of persistent write log cache.
+    """
+    log.info("thrashes rbd bench on persistent write log cache")
+
+    client, client_config = list(config.items())[0]
+    (remote,) = ctx.cluster.only(client).remotes.keys()
+    client_config = client_config if client_config is not None else dict()
+    image_name = client_config.get('image_name', 'testimage')
+    num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS)
+
+    for i in range(num_iterations):
+        log.info("start rbd bench")
+        # rbd bench could not specify the run time so set a large enough test size.
+        remote.run(
+            args=[
+                'rbd', 'bench',
+                '--io-type', 'write',
+                '--io-pattern', random.choice(IO_PATTERNS),
+                '--io-size', random.choice(IO_SIZES),
+                '--io-total', '100G',
+                image_name,
+                ],
+            wait=False,
+        )
+        # Wait a few seconds for the rbd bench process to run
+        # and complete the pwl cache initialization
+        time.sleep(10)
+        log.info("dump cache state when rbd bench running.")
+        remote.sh(['rbd', 'status', image_name, '--format=json'])
+        log.info("sleep...")
+        time.sleep(random.randint(10, 60))
+        log.info("rbd bench crash.")
+        remote.run(
+            args=[
+                'killall', '-9', 'rbd',
+                ],
+            check_status=False,
+        )
+        log.info("wait for watch timeout.")
+        time.sleep(40)
+        log.info("check cache state after crash.")
+        out = remote.sh(['rbd', 'status', image_name, '--format=json'])
+        rbd_status = json.loads(out)
+        assert len(rbd_status['watchers']) == 0
+        assert rbd_status['persistent_cache']['present'] == True
+        assert rbd_status['persistent_cache']['empty'] == False
+        assert rbd_status['persistent_cache']['clean'] == False
+        log.info("check dirty cache file.")
+        remote.run(
+            args=[
+                'test', '-e', rbd_status['persistent_cache']['path'],
+                ]
+        )
+    try:
+        yield
+    finally:
+        log.info("cleanup")
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    This is task for testing persistent write log cache thrash.
+    """
+    assert isinstance(config, dict), \
+            "task persistent_write_log_cache_thrash only supports a dictionary for configuration"
+
+    managers = []
+    config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    managers.append(
+        lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config)
+        )
+
+    with contextutil.nested(*managers):
+        yield