]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
Addressing Review Comments about Object Count and extra Placeholders
authorHarsimran Singh <hsthukral51@gmail.com>
Wed, 3 Sep 2025 14:09:59 +0000 (19:39 +0530)
committerThomas Serlin <tserlin@redhat.com>
Mon, 22 Sep 2025 19:18:18 +0000 (15:18 -0400)
Resolves: rhbz#2036531

Signed-off-by: Harsimran Singh <hsthukral51@gmail.com>
(cherry picked from commit 1fa2992815f547b8013bc6c32162b9a7b04a0835)

src/rgw/rgw_op.cc
src/rgw/rgw_usage_perf.cc

index 9b34883589b6d5867513c3e65adad677ef51a6bd..c27e446df4c43ebc862b1669f6c1b463bf289bb6 100644 (file)
@@ -1,6 +1,7 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab ft=cpp
 
+#include <algorithm>
 #include <errno.h>
 #include <optional>
 #include <stdlib.h>
@@ -63,6 +64,7 @@
 #include "rgw_sal_rados.h"
 #include "rgw_torrent.h"
 #include "rgw_usage_perf.h"
+#include "rgw_user.h"
 #include "rgw_cksum_pipe.h"
 #include "rgw_lua_data_filter.h"
 #include "rgw_lua.h"
@@ -2249,41 +2251,40 @@ void RGWOp::update_usage_stats_if_needed() {
   if (!usage_counters) {
     return;
   }
-  
+
   // Only update for successful operations
   if (op_ret != 0) {
     return;
   }
-  
+
   // Check if this is an operation that changes usage
   bool is_put_op = (dynamic_cast<RGWPutObj*>(this) != nullptr) ||
                    (dynamic_cast<RGWPostObj*>(this) != nullptr) ||
                    (dynamic_cast<RGWCopyObj*>(this) != nullptr) ||
                    (dynamic_cast<RGWCompleteMultipart*>(this) != nullptr);
-  
+
   bool is_delete_op = (dynamic_cast<RGWDeleteObj*>(this) != nullptr) ||
                       (dynamic_cast<RGWDeleteMultiObj*>(this) != nullptr);
-  
+
   if (!is_put_op && !is_delete_op) {
-    return;  // Not an operation that changes usage
+    return;
   }
-  
+
   // Update bucket statistics if we have bucket info
   if (s->bucket && usage_counters) {
     try {
-      // Use the bucket's sync_owner_stats to get current stats
-      // This updates the bucket's internal stats
+      // Use sync_owner_stats to get current bucket stats
       RGWBucketEnt ent;
       int ret = s->bucket->sync_owner_stats(this, null_yield, &ent);
-      
+
       if (ret >= 0) {
-        // Update bucket usage statistics using the entry data
+        // Update bucket stats with accurate counts
         usage_counters->update_bucket_stats(
           s->bucket->get_name(),
           ent.size,          // Total bytes used
           ent.count          // Total number of objects
         );
-        
+  
         ldout(s->cct, 20) << "Updated bucket stats for " << s->bucket->get_name()
                          << ": bytes=" << ent.size
                          << ", objects=" << ent.count << dendl;
@@ -2296,29 +2297,72 @@ void RGWOp::update_usage_stats_if_needed() {
       ldout(s->cct, 5) << "Exception updating bucket stats: " << e.what() << dendl;
     }
   }
-  
-  // Update user statistics if we have user info
+
+  // Update user statistics
   if (s->user && usage_counters) {
     try {
-      // For user stats, we'll use the bucket owner stats as a proxy
-      // since there's no direct get_user_stats method
-      if (s->bucket) {
-        RGWBucketEnt ent;
-        int ret = s->bucket->sync_owner_stats(this, null_yield, &ent);
-        
-        if (ret >= 0) {
-          // This gives us at least partial user stats
-          // In production, you might want to aggregate across all user's buckets
-          usage_counters->update_user_stats(
-            s->user->get_id().id,
-            ent.size,          // Using bucket size as proxy
-            ent.count          // Using bucket object count as proxy
-          );
-          
-          ldout(s->cct, 20) << "Updated user stats for " << s->user->get_id().id
-                           << " (based on bucket " << s->bucket->get_name() << ")"
-                           << ": bytes=" << ent.size
-                           << ", objects=" << ent.count << dendl;
+      // Initialize user stats
+      RGWStorageStats user_stats;
+      user_stats.size_utilized = 0;
+      user_stats.num_objects = 0;
+
+      // Try to list buckets and sum up stats
+      rgw::sal::BucketList buckets;
+      rgw_owner owner(s->user->get_id());
+
+      // list_buckets signature: (dpp, owner, marker, end_marker, max, need_stats, buckets, y)
+      int ret = driver->list_buckets(this, owner, "", "", "", 1000, true, buckets, null_yield);
+
+      if (ret >= 0) {
+        // Sum up stats from all buckets
+        // The buckets.buckets container holds RGWBucketEnt objects directly
+        for (const auto& bucket_ent : buckets.buckets) {
+          user_stats.size_utilized += bucket_ent.size;
+          user_stats.num_objects += bucket_ent.count;
+        }
+
+        // Update user stats with the total
+        usage_counters->update_user_stats(
+          s->user->get_id().id,
+          user_stats.size_utilized,
+          user_stats.num_objects
+        );
+
+        ldout(s->cct, 20) << "Updated user stats for " << s->user->get_id().id
+                         << " (from " << buckets.buckets.size() << " buckets)"
+                         << ": bytes=" << user_stats.size_utilized
+                         << ", objects=" << user_stats.num_objects << dendl;
+      } else {
+        // Fallback: Use current bucket stats as approximation
+        if (s->bucket) {
+          RGWBucketEnt ent;
+          ret = s->bucket->sync_owner_stats(this, null_yield, &ent);
+
+          if (ret >= 0) {
+            // Get existing cached user stats
+            auto cached_stats = usage_counters->get_user_stats(s->user->get_id().id);
+
+            uint64_t total_bytes = ent.size;
+            uint64_t total_objects = ent.count;
+
+            // If we have cached stats and this is a different bucket, try to accumulate
+            if (cached_stats.has_value()) {
+              // Use the maximum of cached and current values
+              // This prevents losing data when we can't list all buckets
+              total_bytes = std::max(cached_stats->bytes_used, ent.size);
+              total_objects = std::max(cached_stats->num_objects, ent.count);
+            }
+
+            usage_counters->update_user_stats(
+              s->user->get_id().id,
+              total_bytes,
+              total_objects
+            );
+
+            ldout(s->cct, 20) << "Updated user stats (partial) for " << s->user->get_id().id
+                             << ": bytes=" << total_bytes
+                             << ", objects=" << total_objects << dendl;
+          }
         }
       }
     } catch (const std::exception& e) {
@@ -2327,7 +2371,6 @@ void RGWOp::update_usage_stats_if_needed() {
   }
 }
 
-
 int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y)
 {
   RGWSLOInfo slo_info;
index 255ee4319d7336ad19661393af53446482e3397e..ccf36239711688980c342c2862b3702b3bf0acdd 100644 (file)
@@ -62,39 +62,39 @@ void UsagePerfCounters::create_global_counters() {
 }
 
 PerfCounters* UsagePerfCounters::create_user_counters(const std::string& user_id) {
+
   std::string name = "rgw_user_" + user_id;
-  
+
   // Sanitize name for perf counters (replace non-alphanumeric with underscore)
   for (char& c : name) {
     if (!std::isalnum(c) && c != '_') {
       c = '_';
     }
   }
-  
-  PerfCountersBuilder b(cct, name, l_rgw_usage_first, l_rgw_usage_last);
-  
-  // Add placeholder counters for unused indices
-  for (int i = l_rgw_usage_first + 1; i < l_rgw_user_used_bytes; ++i) {
-    b.add_u64(i, "placeholder", "placeholder", nullptr, 0, unit_t(0));
-  }
-  
-  b.add_u64(l_rgw_user_used_bytes, "used_bytes",
+
+  // Create a separate enum range for user-specific counters
+  enum {
+    l_rgw_user_first = 930000,  // Different range from main counters
+    l_rgw_user_bytes,
+    l_rgw_user_objects,
+    l_rgw_user_last
+  };
+
+  PerfCountersBuilder b(cct, name, l_rgw_user_first, l_rgw_user_last);
+
+  b.add_u64(l_rgw_user_bytes, "used_bytes",
            "Bytes used by user", nullptr, 0, unit_t(UNIT_BYTES));
-  b.add_u64(l_rgw_user_num_objects, "num_objects",
+  b.add_u64(l_rgw_user_objects, "num_objects",
            "Number of objects owned by user", nullptr, 0, unit_t(0));
-  
-  // Add remaining placeholder counters
-  for (int i = l_rgw_user_num_objects + 1; i < l_rgw_usage_last; ++i) {
-    b.add_u64(i, "placeholder", "placeholder", nullptr, 0, unit_t(0));
-  }
-  
+
   PerfCounters* counters = b.create_perf_counters();
   cct->get_perfcounters_collection()->add(counters);
-  
+
   return counters;
 }
 
 PerfCounters* UsagePerfCounters::create_bucket_counters(const std::string& bucket_name) {
+
   std::string name = "rgw_bucket_" + bucket_name;
   
   // Sanitize name for perf counters
@@ -103,27 +103,25 @@ PerfCounters* UsagePerfCounters::create_bucket_counters(const std::string& bucke
       c = '_';
     }
   }
-  
-  PerfCountersBuilder b(cct, name, l_rgw_usage_first, l_rgw_usage_last);
-  
-  // Add placeholder counters for unused indices
-  for (int i = l_rgw_usage_first + 1; i < l_rgw_bucket_used_bytes; ++i) {
-    b.add_u64(i, "placeholder", "placeholder", nullptr, 0, unit_t(0));
-  }
-  
-  b.add_u64(l_rgw_bucket_used_bytes, "used_bytes",
+
+  // Create a separate enum range for bucket-specific counters
+  enum {
+    l_rgw_bucket_first = 940000,  // Different range from main counters
+    l_rgw_bucket_bytes,
+    l_rgw_bucket_objects,
+    l_rgw_bucket_last
+  };
+
+  PerfCountersBuilder b(cct, name, l_rgw_bucket_first, l_rgw_bucket_last);
+
+  b.add_u64(l_rgw_bucket_bytes, "used_bytes",
            "Bytes used in bucket", nullptr, 0, unit_t(UNIT_BYTES));
-  b.add_u64(l_rgw_bucket_num_objects, "num_objects",
+  b.add_u64(l_rgw_bucket_objects, "num_objects",
            "Number of objects in bucket", nullptr, 0, unit_t(0));
-  
-  // Add remaining placeholder counters
-  for (int i = l_rgw_bucket_num_objects + 1; i < l_rgw_usage_last; ++i) {
-    b.add_u64(i, "placeholder", "placeholder", nullptr, 0, unit_t(0));
-  }
-  
+
   PerfCounters* counters = b.create_perf_counters();
   cct->get_perfcounters_collection()->add(counters);
-  
+
   return counters;
 }
 
@@ -227,19 +225,30 @@ void UsagePerfCounters::update_user_stats(const std::string& user_id,
     }
   }
   
+  // Define local enum for user-specific counter indices
+  // This avoids needing placeholders in the global enum
+  enum {
+    l_rgw_user_first = 930000,  // Start at a high number to avoid conflicts
+    l_rgw_user_bytes,           // 930001
+    l_rgw_user_objects,          // 930002
+    l_rgw_user_last              // 930003
+  };
+  
   // Update or create perf counters
   {
     std::unique_lock lock(counters_mutex);
     
     auto it = user_perf_counters.find(user_id);
     if (it == user_perf_counters.end()) {
+      // Counter doesn't exist, create it
       PerfCounters* counters = create_user_counters(user_id);
       user_perf_counters[user_id] = counters;
       it = user_perf_counters.find(user_id);
     }
     
-    it->second->set(l_rgw_user_used_bytes, bytes_used);
-    it->second->set(l_rgw_user_num_objects, num_objects);
+    // Set the values using the local enum indices
+    it->second->set(l_rgw_user_bytes, bytes_used);
+    it->second->set(l_rgw_user_objects, num_objects);
   }
   
   ldout(cct, 20) << "Updated user stats: " << user_id 
@@ -262,19 +271,30 @@ void UsagePerfCounters::update_bucket_stats(const std::string& bucket_name,
     }
   }
   
+  // Define local enum for bucket-specific counter indices
+  // This avoids needing placeholders in the global enum
+  enum {
+    l_rgw_bucket_first = 940000,  // Different range from user counters
+    l_rgw_bucket_bytes,            // 940001
+    l_rgw_bucket_objects,          // 940002
+    l_rgw_bucket_last              // 940003
+  };
+  
   // Update or create perf counters
   {
     std::unique_lock lock(counters_mutex);
     
     auto it = bucket_perf_counters.find(bucket_name);
     if (it == bucket_perf_counters.end()) {
+      // Counter doesn't exist, create it
       PerfCounters* counters = create_bucket_counters(bucket_name);
       bucket_perf_counters[bucket_name] = counters;
       it = bucket_perf_counters.find(bucket_name);
     }
     
-    it->second->set(l_rgw_bucket_used_bytes, bytes_used);
-    it->second->set(l_rgw_bucket_num_objects, num_objects);
+    // Set the values using the local enum indices
+    it->second->set(l_rgw_bucket_bytes, bytes_used);
+    it->second->set(l_rgw_bucket_objects, num_objects);
   }
   
   ldout(cct, 20) << "Updated bucket stats: " << bucket_name