From 36fecf6d1ec3a42f73effea31f5f139d1a0be4e8 Mon Sep 17 00:00:00 2001 From: Yuval Lifshitz Date: Mon, 16 Jun 2025 11:05:25 +0000 Subject: [PATCH] rgw/logging: make unique part of log file both random and incremental new format will be: 10 char incremental count (so 32bit uint fit in it). and 6 char alphanumeric random part. this should fix possible race conditions in case of multisite Fixes: https://tracker.ceph.com/issues/71608 Signed-off-by: Yuval Lifshitz (cherry picked from commit fa2bc49e2e8fb7a1d353a4b741e8d4f1edb9807d) --- doc/radosgw/bucket_logging.rst | 16 +++++++++++----- src/rgw/rgw_bucket_logging.cc | 34 +++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/doc/radosgw/bucket_logging.rst b/doc/radosgw/bucket_logging.rst index 58da7caa8e141..a82566d62e647 100644 --- a/doc/radosgw/bucket_logging.rst +++ b/doc/radosgw/bucket_logging.rst @@ -26,8 +26,9 @@ in different objects in the log bucket. - Source and log bucket must be in the same zonegroup - Source and log buckets may belong to different accounts (with proper bucket policy set) - The log bucket may have object lock enabled with default retention period - - The 16 bit unique ID part of the log object name is an incrementing counter, or a random, - alphanumeric string if the counter is not available + - The 16 byte unique ID part of the log object name is a lexicographically ordered random string, + that consists of a 10 bytes counter, and a 6 bytes random alphanumeric string. + or a random, alphanumeric string if the counter is not available .. toctree:: @@ -84,12 +85,17 @@ The following operation are supported in journal mode: | ``DeleteObjectTagging`` | ``REST.DELETE.OBJECT_TAGGING`` | No | +-------------------------------+-------------------------------------+-----------------+ +Multisite +````````` +In a multi-zone deployment, each zone will use its own log object before the log object is added to the log bucket. +After the log object is added to the log bucket (e.g after being flushed) it will be replicated to other zones. +This means that for a given time period there could be more than one log object holding relevant log records. Bucket Logging Policy --------------------- On the source bucket, only its owner is allowed to enable or disable bucket logging. For a bucket to be used as a log bucket, it must have bucket policy that allows that (even if the source bucket and the log bucket are owned by the same user or account). -The bucket policy must allow the `s3:PutObject` action for the log bucket, to be perfomed by the `logging.s3.amazonaws.com` service principal. +The bucket policy must allow the `s3:PutObject` action for the log bucket, to be performed by the `logging.s3.amazonaws.com` service principal. It should also specify the source bucket and account that are expected to write logs to it. For example: :: @@ -145,7 +151,7 @@ For example: :: - fish/2024-08-06-09-40-09-TI9ROKN05DD4HPQF + fish/2024-08-06-09-40-09-0000000002AGQ6W1 Partitioned ``````````` @@ -159,7 +165,7 @@ For example: :: - fish/testid/default/fish-bucket/2024/08/06/2024-08-06-10-11-18-0000000000000002 + fish/testid/default/fish-bucket/2024/08/06/2024-08-06-10-11-18-0000000011D1FGPA Log Records ~~~~~~~~~~~ diff --git a/src/rgw/rgw_bucket_logging.cc b/src/rgw/rgw_bucket_logging.cc index b1f1597624529..e96c71a808aa3 100644 --- a/src/rgw/rgw_bucket_logging.cc +++ b/src/rgw/rgw_bucket_logging.cc @@ -174,7 +174,7 @@ std::string configuration::to_json_str() const { // create a random string of N characters template -std::string unique_string() { +std::string random_string() { static const std::string possible_characters{"0123456789ABCDEFGHIJKLMNOPQRSTUVWXY"}; static const auto max_possible_value = possible_characters.length() - 1; std::random_device rd; @@ -185,28 +185,36 @@ std::string unique_string() { return str; } -// create an incremental string of N characters if possible -// fallback to a random string of N characters if not -template +// create a string that start with an incremenatl counter +// of INC charecters and ends with a random string of RND characters +// fallback to a random string of INC+RND characters +template std::string incremental_string(const DoutPrefixProvider *dpp, std::optional old_name) { - // for the fist time we create a string of zeros + static const auto format = fmt::format("{{:0>{}}}{{}}", INC); + uint32_t counter = 0; if (!old_name) { - return std::string(N, '0'); + const auto random_part = random_string(); + return fmt::vformat(format, fmt::make_format_args(counter, random_part)); } - const auto str_counter = old_name->substr(old_name->length() - N+1, N); + const auto str_counter = old_name->substr(old_name->length() - (INC+RND), INC); try { - auto counter = boost::lexical_cast(str_counter); + counter = boost::lexical_cast(str_counter); + // we are not concerned about overflow here, as the counter is only used to + // distinguish between different logging objects created in the same second ++counter; - static const auto format = fmt::format("{{:0>{}}}", N); - return fmt::vformat(format, fmt::make_format_args(counter)); + const auto random_part = random_string(); + return fmt::vformat(format, fmt::make_format_args(counter, random_part)); } catch (const boost::bad_lexical_cast& e) { ldpp_dout(dpp, 5) << "WARNING: failed to convert string '" << str_counter << - "' to counter. " <(); + "' to counter. " << e.what() << ". will create random temporary logging file name" << dendl; + return random_string(); } } constexpr size_t UniqueStringLength = 16; +// we need 10 characters for the counter (uint32_t) +constexpr size_t CounterStringLength = 10; +constexpr size_t RandomStringLength = UniqueStringLength - CounterStringLength; ceph::coarse_real_time time_from_name(const std::string& obj_name, const DoutPrefixProvider *dpp) { static const auto time_format_length = std::string{"YYYY-MM-DD-hh-mm-ss"}.length(); @@ -251,7 +259,7 @@ int new_logging_object(const configuration& conf, std::tm t{}; localtime_r(&tt, &t); - const auto unique = incremental_string(dpp, old_name); + const auto unique = incremental_string(dpp, old_name); switch (conf.obj_key_format) { case KeyFormat::Simple: -- 2.39.5