From: Harsimran Singh Date: Wed, 3 Sep 2025 14:09:59 +0000 (+0530) Subject: rgw: Addressing Review Comments about Object Count and extra Placeholders X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9a192a4b079a64ca0042d7eb0073debc224fc8e1;p=ceph-ci.git rgw: Addressing Review Comments about Object Count and extra Placeholders Signed-off-by: Harsimran Singh --- diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 9a1999441b9..06b3e02d266 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -1,6 +1,7 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- // vim: ts=8 sw=2 sts=2 expandtab ft=cpp +#include #include #include #include @@ -63,6 +64,7 @@ #endif #include "rgw_torrent.h" #include "rgw_usage_perf.h" +#include "rgw_user.h" #include "rgw_cksum_pipe.h" #include "rgw_lua_data_filter.h" #include "rgw_lua.h" @@ -2262,41 +2264,40 @@ void RGWOp::update_usage_stats_if_needed() { if (!usage_counters) { return; } - + // Only update for successful operations if (op_ret != 0) { return; } - + // Check if this is an operation that changes usage bool is_put_op = (dynamic_cast(this) != nullptr) || (dynamic_cast(this) != nullptr) || (dynamic_cast(this) != nullptr) || (dynamic_cast(this) != nullptr); - + bool is_delete_op = (dynamic_cast(this) != nullptr) || (dynamic_cast(this) != nullptr); - + if (!is_put_op && !is_delete_op) { - return; // Not an operation that changes usage + return; } - + // Update bucket statistics if we have bucket info if (s->bucket && usage_counters) { try { - // Use the bucket's sync_owner_stats to get current stats - // This updates the bucket's internal stats + // Use sync_owner_stats to get current bucket stats RGWBucketEnt ent; int ret = s->bucket->sync_owner_stats(this, null_yield, &ent); - + if (ret >= 0) { - // Update bucket usage statistics using the entry data + // Update bucket stats with accurate counts usage_counters->update_bucket_stats( s->bucket->get_name(), ent.size, // Total bytes used ent.count // Total number of objects ); - + ldout(s->cct, 20) << "Updated bucket stats for " << s->bucket->get_name() << ": bytes=" << ent.size << ", objects=" << ent.count << dendl; @@ -2309,29 +2310,72 @@ void RGWOp::update_usage_stats_if_needed() { ldout(s->cct, 5) << "Exception updating bucket stats: " << e.what() << dendl; } } - - // Update user statistics if we have user info + + // Update user statistics if (s->user && usage_counters) { try { - // For user stats, we'll use the bucket owner stats as a proxy - // since there's no direct get_user_stats method - if (s->bucket) { - RGWBucketEnt ent; - int ret = s->bucket->sync_owner_stats(this, null_yield, &ent); - - if (ret >= 0) { - // This gives us at least partial user stats - // In production, you might want to aggregate across all user's buckets - usage_counters->update_user_stats( - s->user->get_id().id, - ent.size, // Using bucket size as proxy - ent.count // Using bucket object count as proxy - ); - - ldout(s->cct, 20) << "Updated user stats for " << s->user->get_id().id - << " (based on bucket " << s->bucket->get_name() << ")" - << ": bytes=" << ent.size - << ", objects=" << ent.count << dendl; + // Initialize user stats + RGWStorageStats user_stats; + user_stats.size_utilized = 0; + user_stats.num_objects = 0; + + // Try to list buckets and sum up stats + rgw::sal::BucketList buckets; + rgw_owner owner(s->user->get_id()); + + // list_buckets signature: (dpp, owner, marker, end_marker, max, need_stats, buckets, y) + int ret = driver->list_buckets(this, owner, "", "", "", 1000, true, buckets, null_yield); + + if (ret >= 0) { + // Sum up stats from all buckets + // The buckets.buckets container holds RGWBucketEnt objects directly + for (const auto& bucket_ent : buckets.buckets) { + user_stats.size_utilized += bucket_ent.size; + user_stats.num_objects += bucket_ent.count; + } + + // Update user stats with the total + usage_counters->update_user_stats( + s->user->get_id().id, + user_stats.size_utilized, + user_stats.num_objects + ); + + ldout(s->cct, 20) << "Updated user stats for " << s->user->get_id().id + << " (from " << buckets.buckets.size() << " buckets)" + << ": bytes=" << user_stats.size_utilized + << ", objects=" << user_stats.num_objects << dendl; + } else { + // Fallback: Use current bucket stats as approximation + if (s->bucket) { + RGWBucketEnt ent; + ret = s->bucket->sync_owner_stats(this, null_yield, &ent); + + if (ret >= 0) { + // Get existing cached user stats + auto cached_stats = usage_counters->get_user_stats(s->user->get_id().id); + + uint64_t total_bytes = ent.size; + uint64_t total_objects = ent.count; + + // If we have cached stats and this is a different bucket, try to accumulate + if (cached_stats.has_value()) { + // Use the maximum of cached and current values + // This prevents losing data when we can't list all buckets + total_bytes = std::max(cached_stats->bytes_used, ent.size); + total_objects = std::max(cached_stats->num_objects, ent.count); + } + + usage_counters->update_user_stats( + s->user->get_id().id, + total_bytes, + total_objects + ); + + ldout(s->cct, 20) << "Updated user stats (partial) for " << s->user->get_id().id + << ": bytes=" << total_bytes + << ", objects=" << total_objects << dendl; + } } } } catch (const std::exception& e) { @@ -2340,7 +2384,6 @@ void RGWOp::update_usage_stats_if_needed() { } } - int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y) { RGWSLOInfo slo_info; diff --git a/src/rgw/rgw_usage_perf.cc b/src/rgw/rgw_usage_perf.cc index 255ee4319d7..ccf36239711 100644 --- a/src/rgw/rgw_usage_perf.cc +++ b/src/rgw/rgw_usage_perf.cc @@ -62,39 +62,39 @@ void UsagePerfCounters::create_global_counters() { } PerfCounters* UsagePerfCounters::create_user_counters(const std::string& user_id) { + std::string name = "rgw_user_" + user_id; - + // Sanitize name for perf counters (replace non-alphanumeric with underscore) for (char& c : name) { if (!std::isalnum(c) && c != '_') { c = '_'; } } - - PerfCountersBuilder b(cct, name, l_rgw_usage_first, l_rgw_usage_last); - - // Add placeholder counters for unused indices - for (int i = l_rgw_usage_first + 1; i < l_rgw_user_used_bytes; ++i) { - b.add_u64(i, "placeholder", "placeholder", nullptr, 0, unit_t(0)); - } - - b.add_u64(l_rgw_user_used_bytes, "used_bytes", + + // Create a separate enum range for user-specific counters + enum { + l_rgw_user_first = 930000, // Different range from main counters + l_rgw_user_bytes, + l_rgw_user_objects, + l_rgw_user_last + }; + + PerfCountersBuilder b(cct, name, l_rgw_user_first, l_rgw_user_last); + + b.add_u64(l_rgw_user_bytes, "used_bytes", "Bytes used by user", nullptr, 0, unit_t(UNIT_BYTES)); - b.add_u64(l_rgw_user_num_objects, "num_objects", + b.add_u64(l_rgw_user_objects, "num_objects", "Number of objects owned by user", nullptr, 0, unit_t(0)); - - // Add remaining placeholder counters - for (int i = l_rgw_user_num_objects + 1; i < l_rgw_usage_last; ++i) { - b.add_u64(i, "placeholder", "placeholder", nullptr, 0, unit_t(0)); - } - + PerfCounters* counters = b.create_perf_counters(); cct->get_perfcounters_collection()->add(counters); - + return counters; } PerfCounters* UsagePerfCounters::create_bucket_counters(const std::string& bucket_name) { + std::string name = "rgw_bucket_" + bucket_name; // Sanitize name for perf counters @@ -103,27 +103,25 @@ PerfCounters* UsagePerfCounters::create_bucket_counters(const std::string& bucke c = '_'; } } - - PerfCountersBuilder b(cct, name, l_rgw_usage_first, l_rgw_usage_last); - - // Add placeholder counters for unused indices - for (int i = l_rgw_usage_first + 1; i < l_rgw_bucket_used_bytes; ++i) { - b.add_u64(i, "placeholder", "placeholder", nullptr, 0, unit_t(0)); - } - - b.add_u64(l_rgw_bucket_used_bytes, "used_bytes", + + // Create a separate enum range for bucket-specific counters + enum { + l_rgw_bucket_first = 940000, // Different range from main counters + l_rgw_bucket_bytes, + l_rgw_bucket_objects, + l_rgw_bucket_last + }; + + PerfCountersBuilder b(cct, name, l_rgw_bucket_first, l_rgw_bucket_last); + + b.add_u64(l_rgw_bucket_bytes, "used_bytes", "Bytes used in bucket", nullptr, 0, unit_t(UNIT_BYTES)); - b.add_u64(l_rgw_bucket_num_objects, "num_objects", + b.add_u64(l_rgw_bucket_objects, "num_objects", "Number of objects in bucket", nullptr, 0, unit_t(0)); - - // Add remaining placeholder counters - for (int i = l_rgw_bucket_num_objects + 1; i < l_rgw_usage_last; ++i) { - b.add_u64(i, "placeholder", "placeholder", nullptr, 0, unit_t(0)); - } - + PerfCounters* counters = b.create_perf_counters(); cct->get_perfcounters_collection()->add(counters); - + return counters; } @@ -227,19 +225,30 @@ void UsagePerfCounters::update_user_stats(const std::string& user_id, } } + // Define local enum for user-specific counter indices + // This avoids needing placeholders in the global enum + enum { + l_rgw_user_first = 930000, // Start at a high number to avoid conflicts + l_rgw_user_bytes, // 930001 + l_rgw_user_objects, // 930002 + l_rgw_user_last // 930003 + }; + // Update or create perf counters { std::unique_lock lock(counters_mutex); auto it = user_perf_counters.find(user_id); if (it == user_perf_counters.end()) { + // Counter doesn't exist, create it PerfCounters* counters = create_user_counters(user_id); user_perf_counters[user_id] = counters; it = user_perf_counters.find(user_id); } - it->second->set(l_rgw_user_used_bytes, bytes_used); - it->second->set(l_rgw_user_num_objects, num_objects); + // Set the values using the local enum indices + it->second->set(l_rgw_user_bytes, bytes_used); + it->second->set(l_rgw_user_objects, num_objects); } ldout(cct, 20) << "Updated user stats: " << user_id @@ -262,19 +271,30 @@ void UsagePerfCounters::update_bucket_stats(const std::string& bucket_name, } } + // Define local enum for bucket-specific counter indices + // This avoids needing placeholders in the global enum + enum { + l_rgw_bucket_first = 940000, // Different range from user counters + l_rgw_bucket_bytes, // 940001 + l_rgw_bucket_objects, // 940002 + l_rgw_bucket_last // 940003 + }; + // Update or create perf counters { std::unique_lock lock(counters_mutex); auto it = bucket_perf_counters.find(bucket_name); if (it == bucket_perf_counters.end()) { + // Counter doesn't exist, create it PerfCounters* counters = create_bucket_counters(bucket_name); bucket_perf_counters[bucket_name] = counters; it = bucket_perf_counters.find(bucket_name); } - it->second->set(l_rgw_bucket_used_bytes, bytes_used); - it->second->set(l_rgw_bucket_num_objects, num_objects); + // Set the values using the local enum indices + it->second->set(l_rgw_bucket_bytes, bytes_used); + it->second->set(l_rgw_bucket_objects, num_objects); } ldout(cct, 20) << "Updated bucket stats: " << bucket_name