From 0521c2ae830d54c4fee37ef5f1ea77ed298c27a0 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Tue, 26 Apr 2022 12:46:08 -0400 Subject: [PATCH] rgw: "bucket check --fix" should delete damaged multipart uploads from bi As one of the steps in `radosgw-admin bucket check --fix ...` it looks for bucket index entries for incomplete multipart uploads that do not have a corresponding ".meta" entry in the same bucket index. It then intends to delete those entries, however the function that it calls to perform the bucket index deletions was flawed and did not direct the removals to the appropriate shard(s), but instead a non-existant oid. This commit determines the appropriate shard for each of the entries to be removed and asynchronously issues a librados call to omap_rm_keys. Signed-off-by: J. Eric Ivancich --- src/rgw/rgw_admin.cc | 2 + src/rgw/rgw_bucket.cc | 2 + src/rgw/rgw_rados.cc | 86 ++++++++++++++++++++++++--------- src/rgw/rgw_rados.h | 7 ++- src/rgw/services/svc_bi_rados.h | 22 +++++++++ 5 files changed, 91 insertions(+), 28 deletions(-) diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index a128f8be5f628..127a4b9b9b923 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -7728,6 +7728,8 @@ next: rgw_obj_index_key index_key; key.get_index_key(&index_key); oid_list.push_back(index_key); + + // note: under rados this removes directly from rados index objects ret = bucket->remove_objs_from_index(dpp(), oid_list); if (ret < 0) { cerr << "ERROR: remove_obj_from_index() returned error: " << cpp_strerror(-ret) << std::endl; diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 5a6eb77499279..7b915e0119088 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -557,6 +557,7 @@ int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState& op_state, if (objs_to_unlink.size() > listing_max_entries) { if (fix_index) { + // note: under rados this removes directly from rados index objects int r = bucket->remove_objs_from_index(dpp, objs_to_unlink); if (r < 0) { set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " + @@ -572,6 +573,7 @@ int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState& op_state, } if (fix_index) { + // note: under rados this removes directly from rados index objects int r = bucket->remove_objs_from_index(dpp, objs_to_unlink); if (r < 0) { set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " + diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index a9cd4d63a94c4..c1acd1bb30f11 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -9096,43 +9096,81 @@ int RGWRados::cls_obj_usage_log_clear(const DoutPrefixProvider *dpp, string& oid } -int RGWRados::remove_objs_from_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, list& oid_list) +// note: this removes entries from the rados bucket index objects +// without going through CLS; this is known to be called from +// "radosgw-admin unlink" and "radosgw-admin bucket check --fix" +int RGWRados::remove_objs_from_index(const DoutPrefixProvider *dpp, + RGWBucketInfo& bucket_info, + const std::list& entry_key_list) { const bool bitx = cct->_conf->rgw_bucket_index_transaction_instrumentation; - ldout_bitx(bitx, cct, 10) << "ENTERING " << __func__ << ": bucket=" << bucket_info.bucket << - " oid_list.size()=" << oid_list.size() << dendl_bitx; - ldout_bitx(bitx, cct, 25) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl_bitx; + ldout_bitx(bitx, dpp, 10) << "ENTERING " << __func__ << ": bucket=" << bucket_info.bucket << + " entry_key_list.size()=" << entry_key_list.size() << dendl_bitx; + ldout_bitx(bitx, dpp, 25) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl_bitx; + + const rgw::bucket_index_layout_generation& latest_layout = bucket_info.layout.current_index; + if (latest_layout.layout.type != rgw::BucketIndexType::Normal || + latest_layout.layout.normal.hash_type != rgw::BucketHashType::Mod) { + ldout_bitx(bitx, dpp, 0) << "ERROR: " << __func__ << " index for bucket=" << + bucket_info.bucket << " is not a normal modulo index" << dendl_bitx; + return -EINVAL; + } + const uint32_t num_shards = latest_layout.layout.normal.num_shards; RGWSI_RADOS::Pool index_pool; - string dir_oid; + std::map index_oids; + int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, &index_pool, + &index_oids, nullptr); + if (r < 0) { + ldout_bitx(bitx, dpp, 0) << "ERROR: " << __func__ << + " open_bucket_index returned " << r << dendl_bitx; + return r; + } - uint8_t suggest_flag = (svc.zone->get_zone().log_data ? CEPH_RGW_DIR_SUGGEST_LOG_OP : 0); + // split up removals by shard + std::map> sharded_removals; + for (const auto& entry_key : entry_key_list) { + const rgw_obj_key obj_key(entry_key); + const uint32_t shard = + RGWSI_BucketIndex_RADOS::bucket_shard_index(obj_key, num_shards); - int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, &index_pool, &dir_oid); - if (r < 0) - return r; + // entry_key already combines namespace and name, so we first have + // to break that apart before we can then combine with instance + std::string name; + std::string ns; // namespace + rgw_obj_key::parse_index_key(entry_key.name, &name, &ns); + rgw_obj_key full_key(name, entry_key.instance, ns); + std::string combined_key = full_key.get_oid(); - bufferlist updates; + sharded_removals[shard].insert(combined_key); - for (auto iter = oid_list.begin(); iter != oid_list.end(); ++iter) { - rgw_bucket_dir_entry entry; - entry.key = *iter; - ldout_bitx(bitx, cct, 5) << "INFO: " << __func__ << - ": encoding removal of bucket=" << bucket_info.bucket << - " entry=" << entry.key << " in updates" << dendl_bitx; - entry.ver.epoch = (uint64_t)-1; // ULLONG_MAX, needed to that objclass doesn't skip out request - updates.append(CEPH_RGW_REMOVE | suggest_flag); - encode(entry, updates); + ldout_bitx(bitx, dpp, 20) << "INFO: " << __func__ << + ": removal from bucket index, bucket=" << bucket_info.bucket << + " key=" << combined_key << " designated for shard " << shard << + dendl_bitx; } - ldout_bitx(bitx, cct, 10) << "INFO: " << __func__ << - ": calling dir_suggest on shards of dir=" << bucket_info.bucket << dendl_bitx; + for (const auto& removals : sharded_removals) { + const int shard = removals.first; + const std::string& oid = index_oids[shard]; - bufferlist out; - r = index_pool.ioctx().exec(dir_oid, RGW_CLASS, RGW_DIR_SUGGEST_CHANGES, updates, out); + ldout_bitx(bitx, dpp, 10) << "INFO: " << __func__ << + ": removal from bucket index, bucket=" << bucket_info.bucket << + ", shard=" << shard << ", oid=" << oid << ", num_keys=" << + removals.second.size() << dendl_bitx; + + r = index_pool.ioctx().omap_rm_keys(oid, removals.second); + if (r < 0) { + ldout_bitx(bitx, dpp, 0) << "ERROR: " << __func__ << + ": omap_rm_keys returned ret=" << r << + dendl_bitx; + return r; + } + } - ldout_bitx(bitx, cct, 10) << + ldout_bitx(bitx, dpp, 5) << "EXITING " << __func__ << " and returning " << r << dendl_bitx; + return r; } diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index f145c54da3d2e..0fd7c4ec7a08e 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -64,9 +64,6 @@ struct get_obj_data; #define PUT_OBJ_EXCL 0x02 #define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL) -#define RGW_OBJ_NS_MULTIPART "multipart" -#define RGW_OBJ_NS_SHADOW "shadow" - static inline void prepend_bucket_marker(const rgw_bucket& bucket, const std::string& orig_oid, std::string& oid) { if (bucket.marker.empty() || orig_oid.empty()) { @@ -1485,7 +1482,9 @@ public: std::map *calculated_stats); int bucket_rebuild_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info); int bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry); - int remove_objs_from_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, std::list& oid_list); + int remove_objs_from_index(const DoutPrefixProvider *dpp, + RGWBucketInfo& bucket_info, + const std::list& oid_list); int move_rados_obj(const DoutPrefixProvider *dpp, librados::IoCtx& src_ioctx, const std::string& src_oid, const std::string& src_locator, diff --git a/src/rgw/services/svc_bi_rados.h b/src/rgw/services/svc_bi_rados.h index 85eb3e5b7fede..337836203bcc0 100644 --- a/src/rgw/services/svc_bi_rados.h +++ b/src/rgw/services/svc_bi_rados.h @@ -22,6 +22,7 @@ #include "svc_bi.h" #include "svc_rados.h" +#include "svc_tier_rados.h" struct rgw_bucket_dir_header; @@ -32,6 +33,12 @@ class RGWSI_BILog_RADOS; #define RGW_SHARDS_PRIME_0 7877 #define RGW_SHARDS_PRIME_1 65521 +/* + * Defined Bucket Index Namespaces + */ +#define RGW_OBJ_NS_MULTIPART "multipart" +#define RGW_OBJ_NS_SHADOW "shadow" + class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex { friend class RGWSI_BILog_RADOS; @@ -96,6 +103,21 @@ public: return rgw_shards_mod(sid2, num_shards); } + static uint32_t bucket_shard_index(const rgw_obj_key& obj_key, + int num_shards) + { + std::string sharding_key; + if (obj_key.ns == RGW_OBJ_NS_MULTIPART) { + RGWMPObj mp; + mp.from_meta(obj_key.name); + sharding_key = mp.get_key(); + } else { + sharding_key = obj_key.name; + } + + return bucket_shard_index(sharding_key, num_shards); + } + int init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info); int clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info); -- 2.39.5