From: Cory Snyder Date: Fri, 12 Jan 2024 14:41:31 +0000 (+0000) Subject: rgw: add reproducer for bug with concurrent versioned object deletes X-Git-Tag: v17.2.8~137^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=52a79e2dfd34cba31b6e052723f81769d569da1c;p=ceph.git rgw: add reproducer for bug with concurrent versioned object deletes Adds a test case to reproduce a scenario where concurrent versioned object deletes can cause leftover OLH entries to be left behind. Signed-off-by: Cory Snyder (cherry picked from commit b65046e37f62f2c65b17ba6f3434a19d3d68c983) --- diff --git a/qa/workunits/rgw/test_rgw_versioning.py b/qa/workunits/rgw/test_rgw_versioning.py index fc69e138d41f..f175203ea0bf 100755 --- a/qa/workunits/rgw/test_rgw_versioning.py +++ b/qa/workunits/rgw/test_rgw_versioning.py @@ -5,6 +5,7 @@ import json import uuid import botocore import time +import threading from common import exec_cmd, create_user, boto_connect from botocore.config import Config @@ -100,7 +101,33 @@ def main(): exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err') get_resp = bucket.Object(key).get() assert put_resp.e_tag == get_resp['ETag'], 'get did not return null version with correct etag' - + + # TESTCASE 'verify that concurrent delete requests do not leave behind olh entries' + log.debug('TEST: verify that concurrent delete requests do not leave behind olh entries\n') + bucket.object_versions.all().delete() + + key = 'concurrent-delete' + # create a delete marker + resp = bucket.Object(key).delete() + version_id = resp['ResponseMetadata']['HTTPHeaders']['x-amz-version-id'] + try: + exec_cmd('ceph config set client rgw_debug_inject_latency_bi_unlink 2') + time.sleep(1) + + def do_delete(): + connection.ObjectVersion(bucket.name, key, version_id).delete() + + t2 = threading.Thread(target=do_delete) + t2.start() + do_delete() + t2.join() + finally: + exec_cmd('ceph config rm client rgw_debug_inject_latency_bi_unlink') + out = exec_cmd(f'radosgw-admin bucket check olh --bucket {bucket.name} --dump-keys') + num_leftover_olh_entries = len(json.loads(out)) + assert num_leftover_olh_entries == 0, \ + 'Found leftover olh entries after concurrent deletes' + # Clean up log.debug("Deleting bucket {}".format(BUCKET_NAME)) bucket.object_versions.all().delete() diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index ff0ae566e006..af81004c479e 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -2510,6 +2510,16 @@ options: - rgw - rgw min: 30 +- name: rgw_debug_inject_latency_bi_unlink + type: uint + level: dev + desc: Latency (in seconds) injected before rgw bucket index unlink op calls to simulate + queueing latency and validate behavior of simultaneuous delete requests which + target the same object. + default: 0 + with_legacy: true + services: + - rgw - name: rgw_debug_inject_set_olh_err type: uint level: dev diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 157c5c993d23..586a53cda363 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -7860,6 +7860,12 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& o } string olh_tag(state->olh_tag.c_str(), state->olh_tag.length()); + + if (cct->_conf->rgw_debug_inject_latency_bi_unlink) { + // simulates queue latency for unlink ops to validate behavior with + // concurrent delete requests for the same object version instance + std::this_thread::sleep_for(cct->_conf->rgw_debug_inject_latency_bi_unlink * std::chrono::seconds{1}); + } ret = bucket_index_unlink_instance(dpp, bucket_info, target_obj, op_tag, olh_tag, olh_epoch, zones_trace); if (ret < 0) {