]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw/admin: Add max-entries and marker to bucket list
authorTobias Urdin <tobias.urdin@binero.com>
Fri, 11 Apr 2025 08:13:57 +0000 (10:13 +0200)
committerTobias Urdin <tobias.urdin@binero.com>
Thu, 11 Sep 2025 13:41:57 +0000 (15:41 +0200)
This adds pagination to the /admin/bucket endpoint for the
Admin API.

If a user has a lot of buckets the /admin/bucket endpoint
that is listing buckets can be so long that the HTTP request
gets a timeout.

This adds the ``max-entries`` and ``marker`` query parameters
to the API to support pagination. If ``max-entries`` is given
we introduce a new format for the HTTP response body the same
way that metadata API does, if it's not given we return the
response with the same body as before and thus retaining the
backward compatibility of the API.

This adds a Python3 based test suite that tests all of this
functionality to verify the behaviour and the HTTP response
body itself.

This fixes the pagination mentioned in tracker [1] and thus
fixes (or atleast partially fixes) that.

[1] https://tracker.ceph.com/issues/22168

Fixes: https://tracker.ceph.com/issues/22168
Signed-off-by: Tobias Urdin <tobias.urdin@binero.com>
(cherry picked from commit 1d5523ec0bec916e0a87fdcb8d27b67753e477b6)

src/rgw/driver/rados/rgw_bucket.cc
src/rgw/driver/rados/rgw_bucket.h
src/rgw/driver/rados/rgw_rest_bucket.cc
src/test/rgw/test_rgw_admin_bucket.py [new file with mode: 0644]

index ebc452644a477a5f467946fcb0128eae20a5cddd..6fca78c2656c502115bf896453cddaac14c993b2 100644 (file)
@@ -1541,21 +1541,48 @@ static int list_owner_bucket_info(const DoutPrefixProvider* dpp,
                                   const rgw_owner& owner,
                                   const std::string& tenant,
                                   const std::string& marker,
+                                 uint32_t max_entries,
                                   bool show_stats,
                                   RGWFormatterFlusher& flusher)
 {
-  Formatter* formatter = flusher.get_formatter();
-  formatter->open_array_section("buckets");
+  bool max_entries_specified = (max_entries > 0);
 
   const std::string empty_end_marker;
-  const size_t max_entries = dpp->get_cct()->_conf->rgw_list_buckets_max_chunk;
+  const size_t list_buckets_max = dpp->get_cct()->_conf->rgw_list_buckets_max_chunk;
+
+  uint32_t max_items = (uint32_t)list_buckets_max;
+
+  if (max_entries_specified) {
+    /* we never want to allow max_items higher than rgw_list_buckets_max_chunk */
+    if (max_entries > list_buckets_max) {
+      max_items = list_buckets_max;
+    } else {
+      max_items = max_entries;
+    }
+  }
+
   constexpr bool no_need_stats = false; // set need_stats to false
 
   rgw::sal::BucketList listing;
   listing.next_marker = marker;
+  bool truncated = true, done = false;
+  uint64_t count = 0;
+
+  Formatter* formatter = flusher.get_formatter();
+
+  if (max_entries_specified) {
+    formatter->open_object_section("result");
+  }
+
+  formatter->open_array_section("buckets");
+
   do {
+    if (max_entries_specified && (max_entries - count) < max_items) {
+      max_items = (max_entries - count);
+    }
+
     int ret = driver->list_buckets(dpp, owner, tenant, listing.next_marker,
-                                   empty_end_marker, max_entries, no_need_stats,
+                                   empty_end_marker, max_items, no_need_stats,
                                    listing, y);
     if (ret < 0) {
       return ret;
@@ -1567,12 +1594,28 @@ static int list_owner_bucket_info(const DoutPrefixProvider* dpp,
       } else {
         formatter->dump_string("bucket", ent.bucket.name);
       }
+      count++;
+      if (max_entries_specified && count >= max_entries) {
+        done = true;
+        break;
+      }
     } // for loop
 
     flusher.flush();
-  } while (!listing.next_marker.empty());
 
-  formatter->close_section();
+    truncated = (!listing.next_marker.empty());
+  } while (truncated && !done);
+
+  formatter->close_section(); // buckets
+
+  if (max_entries_specified) {
+    encode_json("truncated", truncated, formatter);
+    encode_json("count", count, formatter);
+    if (truncated)
+      encode_json("marker", listing.next_marker, formatter);
+    formatter->close_section(); // result
+  }
+
   return 0;
 }
 
@@ -1615,10 +1658,10 @@ int RGWBucketAdminOp::info(rgw::sal::Driver* driver,
       ldpp_dout(dpp, 1) << "Listing buckets in user account "
           << info.account_id << dendl;
       ret = list_owner_bucket_info(dpp, y, driver, info.account_id, uid.tenant,
-                                   op_state.marker, show_stats, flusher);
+                                   op_state.marker, op_state.max_entries, show_stats, flusher);
     } else {
       ret = list_owner_bucket_info(dpp, y, driver, uid, uid.tenant,
-                                   op_state.marker, show_stats, flusher);
+                                   op_state.marker, op_state.max_entries, show_stats, flusher);
     }
     if (ret < 0) {
       return ret;
@@ -1637,7 +1680,7 @@ int RGWBucketAdminOp::info(rgw::sal::Driver* driver,
     }
 
     ret = list_owner_bucket_info(dpp, y, driver, account_id, info.tenant,
-                                 op_state.marker, show_stats, flusher);
+                                 op_state.marker, op_state.max_entries, show_stats, flusher);
     if (ret < 0) {
       return ret;
     }
index 82fc3b9448fa2510c9ab3bcaa763905aacc3fc32..07ba76a3209f0722fc21789c4ab65035609ac394 100644 (file)
@@ -233,6 +233,7 @@ struct RGWBucketAdminOpState {
   std::string object_name;
   std::string new_bucket_name;
   std::string marker;
+  uint32_t max_entries;
 
   bool list_buckets;
   bool stat_buckets;
index 3468aa0de7bc0cf9337d2ebd063d1a5d651df595..1870e3625f6ecd4439a6818bf55c9d1dfb884c0f 100644 (file)
@@ -46,6 +46,14 @@ void RGWOp_Bucket_Info::execute(optional_yield y)
   RESTArgs::get_string(s, "bucket", bucket, &bucket);
   RESTArgs::get_bool(s, "stats", false, &fetch_stats);
 
+  uint32_t max_entries;
+  std::string marker;
+  RESTArgs::get_uint32(s, "max-entries", 0, &max_entries);
+  RESTArgs::get_string(s, "marker", marker, &marker);
+
+  op_state.max_entries = max_entries;
+  op_state.marker = marker;
+
   op_state.set_user_id(uid);
   op_state.set_bucket_name(bucket);
   op_state.set_fetch_stats(fetch_stats);
diff --git a/src/test/rgw/test_rgw_admin_bucket.py b/src/test/rgw/test_rgw_admin_bucket.py
new file mode 100644 (file)
index 0000000..e5a3376
--- /dev/null
@@ -0,0 +1,355 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2025 Binero
+# Author: Tobias Urdin <tobias.urdin@binero.com>
+#
+# Source the src/test/detect-build-env-vars.sh script to set the
+# environment variables before running these tests.
+#
+# You need to install the requests and boto3 python3 modules to
+# run this test suite.
+
+import boto3
+from botocore.auth import HmacV1Auth
+from botocore.awsrequest import AWSRequest
+from botocore.compat import (parse_qsl, urlparse)
+import json
+import os
+import requests
+import subprocess
+import unittest
+from urllib.parse import urljoin
+
+import typing as ty
+
+
+class RGWAdminException(Exception):
+    def __init__(self, r: subprocess.CompletedProcess):
+        message = (
+            f"radosgw-admin command with args {str(r.args)} failed, "
+            f"return code: {r.returncode} stdout: "
+            f"{str(r.stdout)} stderr: {str(r.stderr)}"
+        )
+        super().__init__(message)
+
+
+class RGWUserNotFound(Exception):
+    pass
+
+
+class AWSAuth(requests.auth.AuthBase):
+    def __init__(self, session=None):
+        self.session = session or boto3.Session()
+        self.sig = HmacV1Auth(
+            credentials=self.session.get_credentials(),
+        )
+
+    def __call__(self, request):
+        url = urlparse(request.url)
+        awsrequest = AWSRequest(
+            method=request.method,
+            url=f'{url.scheme}://{url.netloc}{url.path}',
+            data=request.body,
+            params=dict(parse_qsl(url.query)),
+        )
+        self.sig.add_auth(awsrequest)
+        for key, val in request.headers.items():
+            if key not in awsrequest.headers:
+                awsrequest.headers[key] = val
+        return awsrequest.prepare()
+
+
+class TestRGWAdminHelper:
+    def __init__(self) -> None:
+        self.ceph_bin_dir = os.environ.get('CEPH_BIN', None)
+        if self.ceph_bin_dir is None:
+            raise RuntimeError(
+                "Could not find CEPH_BIN env var, you need to "
+                "source the detect-build-env-vars.sh script")
+
+        self.radosgw_admin = os.path.join(
+            self.ceph_bin_dir, 'radosgw-admin')
+
+    def _run_radosgw_admin(
+            self, args: ty.List[str]) -> subprocess.CompletedProcess:
+        """Run radosgw-admin command."""
+        cmd = [self.radosgw_admin, '--format=json']
+        cmd += args
+        return subprocess.run(cmd, capture_output=True)
+
+    def _json(self, r: subprocess.CompletedProcess) -> ty.Any:
+        """Decode and parse JSON data."""
+        data = r.stdout.decode('utf-8')
+        return json.loads(data)
+
+    def get_rgw_user(self, uid: str) -> ty.Dict:
+        """Get a RGW user using radosgw-admin."""
+        r = self._run_radosgw_admin(['user', 'info', f'--uid={uid}'])
+        if r.returncode == 22:
+            raise RGWUserNotFound()
+        if r.returncode != 0:
+            raise RGWAdminException(r)
+
+        return self._json(r)
+
+    def create_rgw_user(
+        self, uid: str, display_name: str, caps: str = ""
+    ) -> ty.Dict:
+        """Create a RGW user using radosgw-admin."""
+        args = [
+            'user', 'create', f'--uid={uid}',
+            f'--display-name={display_name}',
+        ]
+        if caps != "":
+            args += [f'--caps={caps}']
+
+        r = self._run_radosgw_admin(args)
+        if r.returncode != 0:
+            raise RGWAdminException(r)
+
+        return self._json(r)
+
+    def get_or_create_rgw_user(
+        self, uid: str, display_name: str, caps: str = ""
+    ) -> ty.Dict:
+        """Get or create RGW user using radosgw-admin."""
+        try:
+            return self.get_rgw_user(uid)
+        except RGWUserNotFound:
+            return self.create_rgw_user(uid, display_name, caps)
+
+    def set_user_max_buckets(self, uid: str, max_buckets: int) -> None:
+        """Set max-buckets for a RGW user by uid."""
+        args = [
+            'user', 'modify', f'--uid={uid}',
+            f'--max-buckets={max_buckets}'
+        ]
+        r = self._run_radosgw_admin(args)
+        if r.returncode != 0:
+            raise RGWAdminException(r)
+
+    def get_boto3_session(self, user: ty.Dict) -> boto3.session.Session:
+        """Get a boto3 session for a RGW user."""
+        assert 'keys' in user
+        assert len(user['keys']) == 1
+        assert 'access_key' in user['keys'][0]
+        assert 'secret_key' in user['keys'][0]
+
+        return boto3.session.Session(
+            aws_access_key_id=user['keys'][0]['access_key'],
+            aws_secret_access_key=user['keys'][0]['secret_key'])
+
+
+class TestRGWAdminBucket(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.helper = TestRGWAdminHelper()
+
+        admin_caps = "usage=read;metadata=read;users=read;buckets=read"
+        cls.admin_user = cls.helper.get_or_create_rgw_user(
+            'adminbucket_admin', 'Admin Bucket User', admin_caps)
+
+        cls.admin_session = cls.helper.get_boto3_session(
+            cls.admin_user)
+
+        cls.endpoint = 'http://localhost:8000'
+        cls.session = requests.Session()
+        cls.session.auth = AWSAuth(cls.admin_session)
+
+        test_user_uid = 'adminbucket_user'
+        cls.test_user = cls.helper.get_or_create_rgw_user(
+            test_user_uid, 'Admin Bucket Test User')
+        cls.helper.set_user_max_buckets(test_user_uid, 2000)
+
+        cls.test_session = cls.helper.get_boto3_session(
+            cls.test_user)
+        cls.test_client = cls.test_session.client(
+            's3', endpoint_url=cls.endpoint)
+
+        cls._populate_buckets()
+
+        cls.params = {
+            'uid': test_user_uid,
+            'stats': False,
+        }
+
+    @classmethod
+    def _populate_buckets(cls) -> None:
+        """
+        Populate 1500 buckets for the test user.
+        """
+        num_test_buckets = 1500
+
+        resp = cls.test_client.list_buckets()
+        num_buckets = len(resp['Buckets'])
+
+        print(f'Number of buckets: {num_buckets}')
+        assert num_buckets == 0 or num_buckets == num_test_buckets
+
+        if num_buckets == 0:
+            print(
+                f'Populating {num_test_buckets} buckets for test user...')
+
+            for i in range(1, num_test_buckets + 1):
+                bucket_name = f"test-bucket-{i}"
+                cls.test_client.create_bucket(Bucket=bucket_name)
+
+        # Populate a list of expected bucket
+        cls.expected_buckets = [f"test-bucket-{i}" for i in
+                                range(1, num_test_buckets + 1)]
+
+    def _get_url(
+        self, path: str = '', params: ty.Optional[ty.Dict] = None
+    ) -> ty.Dict:
+        """
+        Prepare HTTP URL and do a authenticated HTTP GET request
+        to the URL, raise exception based on HTTP status code and
+        if ok return data parsed from JSON response.
+        """
+        url = urljoin(self.endpoint, path)
+        r = self.session.get(url, params=params)
+        r.raise_for_status()
+        return r.json()
+
+    def _test_original_bucket_list(self, stats: bool = False) -> None:
+        # Expects original format without stats:
+        # [
+        #   "bucket-1",
+        #   "bucket-2"
+        #   ...
+        # ]
+        # Expects original format with stats:
+        # [
+        #   {"bucket": "bucket-1", ...},
+        #   {"bucket": "bucket-2", ...}
+        #   ...
+        # ]
+        params = self.params.copy()
+        params['stats'] = stats
+        r = self._get_url('/admin/bucket', params)
+        self.assertEqual(len(r), len(self.expected_buckets))
+
+    def test_original_bucket_list_without_stats(self) -> None:
+        self._test_original_bucket_list(stats=False)
+
+    def test_original_bucket_list_with_stats(self) -> None:
+        self._test_original_bucket_list(stats=True)
+
+    def _test_bucket_list_max_entries(self, stats: bool = False) -> None:
+        # Expects new format:
+        # {
+        #   "buckets": [
+        #     "bucket-1",
+        #     "bucket-2",
+        #     ...
+        #   ],
+        #   "count": 44,
+        #   "truncated": true,
+        #   "marker": "bucket-44"
+        # }
+        params = self.params.copy()
+        params['stats'] = stats
+        params['max-entries'] = 123
+        r = self._get_url('/admin/bucket', params)
+        for key in ['buckets', 'count', 'truncated', 'marker']:
+            self.assertIn(key, r)
+        self.assertEqual(len(r['buckets']), params['max-entries'])
+        self.assertEqual(r['count'], params['max-entries'])
+        self.assertTrue(r['truncated'])
+        if stats:
+            marker_bucket = r['buckets'][-1]['bucket']
+        else:
+            marker_bucket = r['buckets'][-1]
+        self.assertEqual(r['marker'], marker_bucket)
+
+    def test_bucket_list_max_entries_without_stats(self) -> None:
+        self._test_bucket_list_max_entries(stats=False)
+
+    def test_bucket_list_max_entries_with_stats(self) -> None:
+        self._test_bucket_list_max_entries(stats=True)
+
+    def test_bucket_list_max_entries_capped(self) -> None:
+        """
+        Test that max-entries > 1000 works when the RGW admin
+        API in the background will restrict max_items to
+        rgw_list_buckets_max_chunk (that defaults to 1000).
+        """
+        params = self.params.copy()
+        params['max-entries'] = 1200
+        r = self._get_url('/admin/bucket', params)
+        self.assertEqual(len(r['buckets']), 1200)
+        self.assertEqual(r['count'], 1200)
+        # Verify that truncated is indeed true since first
+        # iteration in backend would return 1000 buckets and
+        # the next iteration should only return 200 and say
+        # it's truncated and not read up the next 1000 buckets.
+        self.assertTrue(r['truncated'])
+        self.assertIn('marker', r)
+
+    def test_bucket_list_max_entries_negative(self) -> None:
+        """
+        Test with a negative max-entries should ignore max-entries
+        completely and return everything.
+        """
+        params = self.params.copy()
+        params['max-entries'] = -400
+        r = self._get_url('/admin/bucket', params)
+        self.assertEqual(len(r['buckets']), len(self.expected_buckets))
+        self.assertEqual(r['count'], len(self.expected_buckets))
+        self.assertFalse(r['truncated'])
+        self.assertNotIn('marker', r)
+
+    def test_bucket_list_marker_only(self) -> None:
+        """
+        Test with marker only.
+        """
+        params = self.params.copy()
+        sorted_buckets = self.expected_buckets.copy()
+        sorted_buckets.sort()
+        bucket_key = 100
+        params['marker'] = sorted_buckets[bucket_key-1]
+        r = self._get_url('/admin/bucket', params)
+        self.assertEqual(len(r), len(self.expected_buckets)-bucket_key)
+
+    def test_bucket_list_paginate_until_end(self) -> None:
+        """
+        Test to paginate through all buckets.
+        """
+        params = self.params.copy()
+        params['max-entries'] = 100
+
+        truncated = True
+        last_marker = None
+        num_buckets = 0
+        buckets = []
+
+        while truncated:
+            if last_marker is not None:
+                params['marker'] = last_marker
+
+            r = self._get_url('/admin/bucket', params)
+
+            self.assertEqual(len(r['buckets']), params['max-entries'])
+            self.assertEqual(r['count'], params['max-entries'])
+
+            buckets += r['buckets']
+            num_buckets += r['count']
+
+            if r['truncated']:
+                last_marker = r['marker']
+            else:
+                self.assertNotIn('marker', r)
+
+            truncated = r['truncated']
+
+        self.assertEqual(num_buckets, len(self.expected_buckets))
+        self.assertEqual(len(buckets), len(self.expected_buckets))
+        sorted_buckets = buckets.copy()
+        sorted_buckets.sort()
+        sorted_exp_buckets = self.expected_buckets.copy()
+        sorted_exp_buckets.sort()
+        self.assertEqual(sorted_buckets, sorted_exp_buckets)
+
+
+if __name__ == '__main__':
+    unittest.main()