]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: add reproducer for bug with concurrent versioned object deletes
authorCory Snyder <csnyder@1111systems.com>
Fri, 12 Jan 2024 14:41:31 +0000 (14:41 +0000)
committerKonstantin Shalygin <k0ste@k0ste.ru>
Tue, 20 Aug 2024 12:56:41 +0000 (19:56 +0700)
Adds a test case to reproduce a scenario where concurrent versioned
object deletes can cause leftover OLH entries to be left behind.

Signed-off-by: Cory Snyder <csnyder@1111systems.com>
(cherry picked from commit b65046e37f62f2c65b17ba6f3434a19d3d68c983)

qa/workunits/rgw/test_rgw_versioning.py
src/common/options/rgw.yaml.in
src/rgw/rgw_rados.cc

index fc69e138d41fa9bb045eaaf38a088f95bc1115e8..f175203ea0bf385b8c31056d2b0d7909caadca50 100755 (executable)
@@ -5,6 +5,7 @@ import json
 import uuid
 import botocore
 import time
+import threading
 from common import exec_cmd, create_user, boto_connect
 from botocore.config import Config
 
@@ -100,7 +101,33 @@ def main():
         exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err')
     get_resp = bucket.Object(key).get()
     assert put_resp.e_tag == get_resp['ETag'], 'get did not return null version with correct etag'
-        
+
+    # TESTCASE 'verify that concurrent delete requests do not leave behind olh entries'
+    log.debug('TEST: verify that concurrent delete requests do not leave behind olh entries\n')
+    bucket.object_versions.all().delete()
+    
+    key = 'concurrent-delete'
+    # create a delete marker
+    resp = bucket.Object(key).delete()
+    version_id = resp['ResponseMetadata']['HTTPHeaders']['x-amz-version-id']
+    try:
+        exec_cmd('ceph config set client rgw_debug_inject_latency_bi_unlink 2')
+        time.sleep(1)
+
+        def do_delete():
+            connection.ObjectVersion(bucket.name, key, version_id).delete()
+            
+        t2 = threading.Thread(target=do_delete)
+        t2.start()
+        do_delete()
+        t2.join()
+    finally:
+        exec_cmd('ceph config rm client rgw_debug_inject_latency_bi_unlink')
+    out = exec_cmd(f'radosgw-admin bucket check olh --bucket {bucket.name} --dump-keys')
+    num_leftover_olh_entries = len(json.loads(out))
+    assert num_leftover_olh_entries == 0, \
+      'Found leftover olh entries after concurrent deletes'
+
     # Clean up
     log.debug("Deleting bucket {}".format(BUCKET_NAME))
     bucket.object_versions.all().delete()
index ff0ae566e006bb883d0f9e3e3e439d2038e7e990..af81004c479e86207f299e3c08392e228ded508f 100644 (file)
@@ -2510,6 +2510,16 @@ options:
   - rgw
   - rgw
   min: 30
+- name: rgw_debug_inject_latency_bi_unlink
+  type: uint
+  level: dev
+  desc: Latency (in seconds) injected before rgw bucket index unlink op calls to simulate
+    queueing latency and validate behavior of simultaneuous delete requests which
+    target the same object.
+  default: 0
+  with_legacy: true
+  services:
+  - rgw
 - name: rgw_debug_inject_set_olh_err
   type: uint
   level: dev
index 157c5c993d23e9da6bfea537b117c41a3d7467e3..586a53cda3636e491fefb7f0f9ad4309f15f3819 100644 (file)
@@ -7860,6 +7860,12 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& o
     }
 
     string olh_tag(state->olh_tag.c_str(), state->olh_tag.length());
+    
+    if (cct->_conf->rgw_debug_inject_latency_bi_unlink) {
+      // simulates queue latency for unlink ops to validate behavior with
+      // concurrent delete requests for the same object version instance
+      std::this_thread::sleep_for(cct->_conf->rgw_debug_inject_latency_bi_unlink * std::chrono::seconds{1});
+    }
 
     ret = bucket_index_unlink_instance(dpp, bucket_info, target_obj, op_tag, olh_tag, olh_epoch, zones_trace);
     if (ret < 0) {