From c3cc098236b43be9fc4f217ab79c726bcbb7f623 Mon Sep 17 00:00:00 2001 From: Kamoltat Date: Fri, 2 Jun 2023 20:06:52 +0000 Subject: [PATCH] pybind/mgr/pg_autoscaler: Use bytes_used for actual_raw_used Problem We realized that `store` is not the correct value to represent `actual_raw_used` when it comes to pool(s) with `compression` enabled. https://github.com/ceph/ceph/pull/29986 was the PR that is the culprit of the issue, since it simply changed `byte_used` to `store` just because they want a per pool value of bytes_used without factoring in replication. However, they did not realized that in doing so also caused pools with compression to inherit an incorrect value for `actual_raw_used`. This also caused an incorrect value for `capacity_ratio` since the autoscaler scales PGs according to the `capacity_ratio` of each pool. The existing issue causes pool with compression to have higher `capacity_ratio` where in reality the actual utilization is less than non-compressed pools, assuming we perform I/O with the same work load on each pool evenly. Solution Use `bytes_used` instead of `store` when fetching for `actual_raw_used` and when calculating `pool_raw_used` we `max(actual_raw_used, target_bytes * raw_used_rate)` Fixes: https://tracker.ceph.com/issues/54136 Signed-off-by: Kamoltat (cherry picked from commit 3d8ac80f61cd332b53aea1fa5799f8ccd3b01b66) --- src/pybind/mgr/pg_autoscaler/module.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/pybind/mgr/pg_autoscaler/module.py b/src/pybind/mgr/pg_autoscaler/module.py index 13e1cff9e88..305a3d58dd1 100644 --- a/src/pybind/mgr/pg_autoscaler/module.py +++ b/src/pybind/mgr/pg_autoscaler/module.py @@ -561,7 +561,6 @@ class PgAutoscaler(MgrModule): raw_used_rate = osdmap.pool_raw_used_rate(pool_id) - pool_logical_used = pool_stats[pool_id]['stored'] bias = p['options'].get('pg_autoscale_bias', 1.0) target_bytes = 0 # ratio takes precedence if both are set @@ -569,10 +568,10 @@ class PgAutoscaler(MgrModule): target_bytes = p['options'].get('target_size_bytes', 0) # What proportion of space are we using? - actual_raw_used = pool_logical_used * raw_used_rate + actual_raw_used = pool_stats[pool_id]['bytes_used'] actual_capacity_ratio = float(actual_raw_used) / capacity - pool_raw_used = max(pool_logical_used, target_bytes) * raw_used_rate + pool_raw_used = max(actual_raw_used, target_bytes * raw_used_rate) capacity_ratio = float(pool_raw_used) / capacity self.log.info("effective_target_ratio {0} {1} {2} {3}".format( @@ -616,7 +615,7 @@ class PgAutoscaler(MgrModule): 'crush_root_id': root_id, 'pg_autoscale_mode': p['pg_autoscale_mode'], 'pg_num_target': p['pg_num_target'], - 'logical_used': pool_logical_used, + 'logical_used': float(actual_raw_used)/raw_used_rate, 'target_bytes': target_bytes, 'raw_used_rate': raw_used_rate, 'subtree_capacity': capacity, -- 2.39.5