From 1fa7163b97a259cc1db78f535305ad09844ba131 Mon Sep 17 00:00:00 2001 From: Shilpa Jagannath Date: Mon, 16 Dec 2024 15:28:36 -0500 Subject: [PATCH] rgw/trim: fix ENOENT return response from bucket sync status query. only handle them when the bucket metadata is deleted. there is a case when we get enoent when status objects have not been created yet, for example when bucket metadata is created and synced but no data exists yet and bucket sync status won't be initialized. these don't need special handling. Signed-off-by: Shilpa Jagannath --- src/rgw/driver/rados/rgw_trim_bilog.cc | 16 ++++++++++------ src/test/rgw/rgw_multi/tests.py | 21 +++++++++++---------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/rgw/driver/rados/rgw_trim_bilog.cc b/src/rgw/driver/rados/rgw_trim_bilog.cc index 056c868ae78..93dbf6d64d0 100644 --- a/src/rgw/driver/rados/rgw_trim_bilog.cc +++ b/src/rgw/driver/rados/rgw_trim_bilog.cc @@ -458,6 +458,7 @@ class RGWReadRemoteStatusShardsCR : public RGWCoroutine { rgw::sal::RadosStore* const store; CephContext *cct; RGWHTTPManager *http; + const RGWBucketInfo* bucket_info; std::string bucket_instance; const rgw_zone_id zid; const std::string& zone_id; @@ -468,12 +469,13 @@ public: rgw::sal::RadosStore* const store, CephContext *cct, RGWHTTPManager *http, + const RGWBucketInfo* bucket_info, std::string bucket_instance, const rgw_zone_id zid, const std::string& zone_id, StatusShards *p) : RGWCoroutine(cct), dpp(dpp), store(store), - cct(cct), http(http), bucket_instance(bucket_instance), + cct(cct), http(http), bucket_info(bucket_info), bucket_instance(bucket_instance), zid(zid), zone_id(zone_id), p(p) {} int operate(const DoutPrefixProvider *dpp) override { @@ -503,7 +505,7 @@ public: if (retcode < 0 && retcode != -ENOENT) { return set_cr_error(retcode); - } else if (retcode == -ENOENT) { + } else if (retcode == -ENOENT && bucket_info->layout.logs.front().layout.type == rgw::BucketLogType::Deleted) { p->generation = UINT64_MAX; ldpp_dout(dpp, 10) << "INFO: could not read shard status for bucket:" << bucket_instance << " from zone: " << zid.id << dendl; @@ -584,12 +586,14 @@ class BucketTrimInstanceCR : public RGWCoroutine { if (clean_info) return 0; - if (pbucket_info->layout.logs.front().gen < totrim.gen) { + bool deleted_type = (pbucket_info->layout.logs.back().layout.type == rgw::BucketLogType::Deleted); + if (pbucket_info->layout.logs.front().gen < totrim.gen || + (pbucket_info->layout.logs.front().gen <= totrim.gen && deleted_type)) { clean_info = {*pbucket_info, {}}; auto log = clean_info->first.layout.logs.cbegin(); clean_info->second = *log; - if (clean_info->first.layout.logs.size() == 1) { + if (clean_info->first.layout.logs.size() == 1 && !deleted_type) { ldpp_dout(dpp, -1) << "Critical error! Attempt to remove only log generation! " << "log.gen=" << log->gen << ", totrim.gen=" << totrim.gen @@ -635,7 +639,7 @@ int take_min_status( if (peer->shards.size() != status->size()) { ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << "ERROR: shards don't match. peer shard:" << peer->shards.size() << " my shards:" << status->size() - << "for generation:" << peer->generation << dendl; + << "for generation:" << peer->generation << dendl; // all peers must agree on the number of shards return -EINVAL; } @@ -732,7 +736,7 @@ int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp) auto p = peer_status.begin(); for (auto& zid : zids) { - spawn(new RGWReadRemoteStatusShardsCR(dpp, store, cct, http, bucket_instance, zid, zone_id, &*p), false); + spawn(new RGWReadRemoteStatusShardsCR(dpp, store, cct, http, pbucket_info, bucket_instance, zid, zone_id, &*p), false); ++p; } } diff --git a/src/test/rgw/rgw_multi/tests.py b/src/test/rgw/rgw_multi/tests.py index 2e09f9a5e7b..febdb3c3c60 100644 --- a/src/test/rgw/rgw_multi/tests.py +++ b/src/test/rgw/rgw_multi/tests.py @@ -111,8 +111,8 @@ def bilog_list(zone, bucket, args = None): return json.loads(bilog) def bilog_autotrim(zone, args = None): - cmd = ['bilog', 'autotrim'] + zone.zone_args() - zone.cluster.admin(cmd, read_only=True) + cmd = ['bilog', 'autotrim'] + (args or []) + zone.zone_args() + zone.cluster.admin(cmd, debug_rgw=20) def bucket_layout(zone, bucket, args = None): (bl_output,_) = zone.cluster.admin(['bucket', 'layout', '--bucket', bucket] + (args or [])) @@ -1626,6 +1626,7 @@ def test_encrypted_object_sync(): key = bucket2.get_key('testobj-sse-kms') eq(data, key.get_contents_as_string(encoding='ascii')) +@attr('bucket_trim') def test_bucket_index_log_trim(): zonegroup = realm.master_zonegroup() zonegroup_conns = ZonegroupConns(zonegroup) @@ -1810,10 +1811,10 @@ def test_bucket_log_trim_after_delete_bucket_primary_reshard(): primary.conn.delete_bucket(test_bucket.name) zonegroup_data_checkpoint(zonegroup_conns) - bilog_autotrim(primary.zone) + bilog_autotrim(primary.zone, ['--rgw-sync-log-trim-max-buckets', '50'],) time.sleep(config.checkpoint_delay) - bilog_autotrim(primary.zone) + bilog_autotrim(primary.zone, ['--rgw-sync-log-trim-max-buckets', '50'],) for zonegroup in realm.current_period.zonegroups: zonegroup_conns = ZonegroupConns(zonegroup) @@ -1821,11 +1822,11 @@ def test_bucket_log_trim_after_delete_bucket_primary_reshard(): for zone in zonegroup_conns.zones: log.info('trimming on zone=%s', zone.name) - bilog_autotrim(zone.zone) + bilog_autotrim(zone.zone, ['--rgw-sync-log-trim-max-buckets', '50'],) time.sleep(config.checkpoint_delay) # run bilog trim twice on primary zone where the bucket was resharded - bilog_autotrim(primary.zone) + bilog_autotrim(primary.zone, ['--rgw-sync-log-trim-max-buckets', '50'],) for zonegroup in realm.current_period.zonegroups: for zone in zonegroup_conns.zones: @@ -1872,10 +1873,10 @@ def test_bucket_log_trim_after_delete_bucket_secondary_reshard(): primary.conn.delete_bucket(test_bucket.name) zonegroup_data_checkpoint(zonegroup_conns) - bilog_autotrim(secondary.zone) + bilog_autotrim(secondary.zone, ['--rgw-sync-log-trim-max-buckets', '50'],) time.sleep(config.checkpoint_delay) - bilog_autotrim(secondary.zone) + bilog_autotrim(secondary.zone, ['--rgw-sync-log-trim-max-buckets', '50'],) for zonegroup in realm.current_period.zonegroups: zonegroup_conns = ZonegroupConns(zonegroup) @@ -1883,11 +1884,11 @@ def test_bucket_log_trim_after_delete_bucket_secondary_reshard(): for zone in zonegroup_conns.zones: log.info('trimming on zone=%s', zone.name) - bilog_autotrim(zone.zone) + bilog_autotrim(zone.zone, ['--rgw-sync-log-trim-max-buckets', '50'],) time.sleep(config.checkpoint_delay) # run bilog trim twice on primary zone where the bucket was resharded - bilog_autotrim(secondary.zone) + bilog_autotrim(secondary.zone, ['--rgw-sync-log-trim-max-buckets', '50'],) time.sleep(config.checkpoint_delay) for zonegroup in realm.current_period.zonegroups: -- 2.39.5