From: Oguzhan Ozmen Date: Fri, 13 Feb 2026 01:33:24 +0000 (+0000) Subject: rgw: handle plain-text object tags in RGWObjTags::decode() X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b2d9dbb81e9e6aa3cbb184488ccae16253586257;p=ceph.git rgw: handle plain-text object tags in RGWObjTags::decode() Objects created via multipart upload have had their tags stored as plain URL-encoded strings (e.g. "ttl_tag=23") since the tagging feature was introduced in 2017, because RGWInitMultipart::execute() never called encode_obj_tags_attr() (tracker #53016). While #53016 fixes the write path going forward, existing objects on disk still carry plain-text tags. RGWObjTags::decode() throws buffer::error on these, causing lifecycle tag matching to fail with -EIO and GetObjectTagging to return empty results. Fall back to set_from_string() when binary decoding fails, so both formats are handled transparently. Fixes: https://tracker.ceph.com/issues/74917 Signed-off-by: Oguzhan Ozmen --- diff --git a/src/rgw/rgw_tag.h b/src/rgw/rgw_tag.h index 51e2620bd42..4a1b47d3221 100644 --- a/src/rgw/rgw_tag.h +++ b/src/rgw/rgw_tag.h @@ -34,9 +34,35 @@ protected: } void decode(bufferlist::const_iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, bl); - decode(tag_map,bl); - DECODE_FINISH(bl); + // Some older objects may have stored tags as a plain URL-encoded + // string (e.g. "key=value") rather than binary ENCODE_START format. + // Try binary decode first, fall back to set_from_string() on failure. + auto start_pos = bl; + try { + DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, bl); + decode(tag_map, bl); + DECODE_FINISH(bl); + } catch (buffer::error& e) { + // Failed binary decode. Try plain-text URL-encoded tag string. + tag_map.clear(); + bl = start_pos; + std::string raw; + try { + auto remaining = bl.get_remaining(); + if (remaining > 0) { + bl.copy(remaining, raw); + // strip trailing null bytes + while (!raw.empty() && raw.back() == '\0') { + raw.pop_back(); + } + } + } catch (buffer::error&) { + throw e; + } + if (raw.empty() || set_from_string(raw) < 0) { + throw e; + } + } } void dump(Formatter *f) const; diff --git a/src/test/rgw/CMakeLists.txt b/src/test/rgw/CMakeLists.txt index 9b5ca191932..acc3ab36292 100644 --- a/src/test/rgw/CMakeLists.txt +++ b/src/test/rgw/CMakeLists.txt @@ -427,3 +427,7 @@ add_catch2_test_rgw(rgw_hex) add_executable(unittest_rgw_async_utils test_rgw_async_utils.cc) add_ceph_unittest(unittest_rgw_async_utils) target_link_libraries(unittest_rgw_async_utils ${rgw_libs} ${UNITTEST_LIBS}) + +add_executable(unittest_rgw_tag test_rgw_tag.cc) +add_ceph_unittest(unittest_rgw_tag) +target_link_libraries(unittest_rgw_tag ${rgw_libs} ${UNITTEST_LIBS}) diff --git a/src/test/rgw/test_rgw_tag.cc b/src/test/rgw/test_rgw_tag.cc new file mode 100644 index 00000000000..dec9866bb7d --- /dev/null +++ b/src/test/rgw/test_rgw_tag.cc @@ -0,0 +1,217 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "gtest/gtest.h" +#include "rgw_tag.h" + +using namespace std; + +// Helper: binary-encode an RGWObjTags into a bufferlist (the normal path) +static bufferlist encode_tags(const RGWObjTags& tags) { + bufferlist bl; + tags.encode(bl); + return bl; +} + +// Helper: store a raw plain-text string into a bufferlist the way +// rgw_get_request_metadata() does: bl.append(str.c_str(), str.size() + 1) +// (includes trailing null byte) +static bufferlist plain_text_bl(const string& s) { + bufferlist bl; + bl.append(s.c_str(), s.size() + 1); + return bl; +} + +// ---------- Binary-encoded tags (the normal path) ---------- + +TEST(RGWObjTagsDecode, BinaryEncodedSingleTag) +{ + RGWObjTags src; + src.add_tag("ttl_tag", "23"); + + bufferlist bl = encode_tags(src); + + RGWObjTags dst; + auto iter = bl.cbegin(); + dst.decode(iter); + + ASSERT_EQ(dst.count(), 1u); + auto& m = dst.get_tags(); + auto it = m.find("ttl_tag"); + ASSERT_NE(it, m.end()); + EXPECT_EQ(it->second, "23"); +} + +TEST(RGWObjTagsDecode, BinaryEncodedMultipleTags) +{ + RGWObjTags src; + src.add_tag("env", "production"); + src.add_tag("team", "storage"); + src.add_tag("ttl", "86400"); + + bufferlist bl = encode_tags(src); + + RGWObjTags dst; + auto iter = bl.cbegin(); + dst.decode(iter); + + ASSERT_EQ(dst.count(), 3u); + auto& m = dst.get_tags(); + EXPECT_EQ(m.find("env")->second, "production"); + EXPECT_EQ(m.find("team")->second, "storage"); + EXPECT_EQ(m.find("ttl")->second, "86400"); +} + +// ---------- Plain-text tags (before the fix for tracker #53016 used) ----- + +TEST(RGWObjTagsDecode, PlainTextSingleTag) +{ + // This is what rgw_get_request_metadata() stored for multipart uploads: + // the raw HTTP header value "key=value" plus a trailing null byte. + bufferlist bl = plain_text_bl("ttl_tag=23"); + + RGWObjTags dst; + auto iter = bl.cbegin(); + dst.decode(iter); + + ASSERT_EQ(dst.count(), 1u); + auto& m = dst.get_tags(); + auto it = m.find("ttl_tag"); + ASSERT_NE(it, m.end()); + EXPECT_EQ(it->second, "23"); +} + +TEST(RGWObjTagsDecode, PlainTextMultipleTags) +{ + // Multiple tags are &-separated, URL-encoded + bufferlist bl = plain_text_bl("env=production&team=storage&ttl=86400"); + + RGWObjTags dst; + auto iter = bl.cbegin(); + dst.decode(iter); + + ASSERT_EQ(dst.count(), 3u); + auto& m = dst.get_tags(); + EXPECT_EQ(m.find("env")->second, "production"); + EXPECT_EQ(m.find("team")->second, "storage"); + EXPECT_EQ(m.find("ttl")->second, "86400"); +} + +TEST(RGWObjTagsDecode, PlainTextUrlEncodedTag) +{ + // Keys/values with special characters get URL-encoded + bufferlist bl = plain_text_bl("my%20key=my%20value"); + + RGWObjTags dst; + auto iter = bl.cbegin(); + dst.decode(iter); + + ASSERT_EQ(dst.count(), 1u); + auto& m = dst.get_tags(); + auto it = m.find("my key"); + ASSERT_NE(it, m.end()); + EXPECT_EQ(it->second, "my value"); +} + +TEST(RGWObjTagsDecode, PlainTextNoValue) +{ + // Tag with key only, no '=' + bufferlist bl = plain_text_bl("orphan_key"); + + RGWObjTags dst; + auto iter = bl.cbegin(); + dst.decode(iter); + + ASSERT_EQ(dst.count(), 1u); + auto& m = dst.get_tags(); + auto it = m.find("orphan_key"); + ASSERT_NE(it, m.end()); + EXPECT_EQ(it->second, ""); +} + +// ---------- Edge cases ---------- + +TEST(RGWObjTagsDecode, PlainTextNoTrailingNull) +{ + // Plain text without trailing null (just raw bytes) + string s = "key1=val1&key2=val2"; + bufferlist bl; + bl.append(s.data(), s.size()); // no trailing '\0' + + RGWObjTags dst; + auto iter = bl.cbegin(); + dst.decode(iter); + + ASSERT_EQ(dst.count(), 2u); + auto& m = dst.get_tags(); + EXPECT_EQ(m.find("key1")->second, "val1"); + EXPECT_EQ(m.find("key2")->second, "val2"); +} + +TEST(RGWObjTagsDecode, BinaryRoundTrip) +{ + // Encode -> decode -> re-encode should produce identical bufferlist + RGWObjTags src; + src.add_tag("alpha", "1"); + src.add_tag("beta", "2"); + + bufferlist bl1 = encode_tags(src); + + RGWObjTags dst; + auto iter = bl1.cbegin(); + dst.decode(iter); + + bufferlist bl2 = encode_tags(dst); + + EXPECT_TRUE(bl1.contents_equal(bl2)); +} + +// ---------- Exception handling tests ---------- + +TEST(RGWObjTagsDecode, TagKeyTooLongThrowsOriginalException) +{ + // Create plain-text tag with key longer than max_tag_key_size (128) + // set_from_string() will return -ERR_INVALID_TAG + string long_key(200, 'k'); // 200 chars, exceeds 128 limit + string tag_string = long_key + "=value"; + bufferlist bl = plain_text_bl(tag_string); + + RGWObjTags dst; + auto iter = bl.cbegin(); + EXPECT_THROW(dst.decode(iter), buffer::error); +} + +TEST(RGWObjTagsDecode, EmptyTagKeyThrowsOriginalException) +{ + // Plain-text with empty key: "=value" - empty key is invalid + // set_from_string() returns -ERR_INVALID_TAG for empty key + bufferlist bl = plain_text_bl("=value"); + + RGWObjTags dst; + auto iter = bl.cbegin(); + EXPECT_THROW(dst.decode(iter), buffer::error); +} + +TEST(RGWObjTagsDecode, EmptyAfterNullStrippingThrowsOriginalException) +{ + // Buffer containing only null bytes: fails binary decode, then after + // stripping trailing nulls we have empty raw string -> throws original exception + bufferlist bl; + bl.append("\0\0\0", 3); // just null bytes + + RGWObjTags dst; + auto iter = bl.cbegin(); + EXPECT_THROW(dst.decode(iter), buffer::error); +} + +TEST(RGWObjTagsDecode, EmptyBufferlist) +{ + // Empty bufferlist should produce no tags and also raise exception + bufferlist bl; + + RGWObjTags dst; + auto iter = bl.cbegin(); + + EXPECT_THROW(dst.decode(iter), buffer::error); + EXPECT_EQ(dst.count(), 0u); +}