From ca9e0f40612bc2ed98f0c4cf3119f06f0beed812 Mon Sep 17 00:00:00 2001 From: Ali Masarwa Date: Sun, 1 Dec 2024 13:28:03 +0200 Subject: [PATCH] RGW/logging: add filtering for bucket logging Signed-off-by: Ali Masarwa --- doc/radosgw/bucket_logging.rst | 4 +- doc/radosgw/s3/bucketops.rst | 40 +++ examples/rgw/boto3/bucket_logging.py | 11 + examples/rgw/boto3/service-2.sdk-extras.json | 4 + src/rgw/CMakeLists.txt | 1 + src/rgw/rgw_bucket_logging.cc | 12 + src/rgw/rgw_bucket_logging.h | 28 ++ src/rgw/rgw_pubsub.cc | 208 -------------- src/rgw/rgw_pubsub.h | 86 +----- src/rgw/rgw_s3_filter.cc | 269 +++++++++++++++++++ src/rgw/rgw_s3_filter.h | 102 +++++++ 11 files changed, 470 insertions(+), 295 deletions(-) create mode 100644 src/rgw/rgw_s3_filter.cc create mode 100644 src/rgw/rgw_s3_filter.h diff --git a/doc/radosgw/bucket_logging.rst b/doc/radosgw/bucket_logging.rst index d96ffbe2758a2..3d04c616455f5 100644 --- a/doc/radosgw/bucket_logging.rst +++ b/doc/radosgw/bucket_logging.rst @@ -38,8 +38,8 @@ Journal If logging type is set to "Journal", the records are written to the log bucket before the bucket operation is completed. This means that if the logging action fails, the operation will not be executed, and an error will be returned to the client. An exception to the above are "multi/delete" log records: if writing these log records fail, the operation continues and may still be successful. -Note that it may happen that the log records were successfully written, but the bucket operation failed, since the logs are written -before such a failure, there will be no indication for that in the log records. +Journal mode supports filtering out records based on matches of the prefixes and suffixes of the logged object keys. Regular-expression matching can also be used on these to create filters. +Note that it may happen that the log records were successfully written, but the bucket operation failed, since the logs are written. Bucket Logging REST API diff --git a/doc/radosgw/s3/bucketops.rst b/doc/radosgw/s3/bucketops.rst index 2e5cc48646dee..4f354f14dab74 100644 --- a/doc/radosgw/s3/bucketops.rst +++ b/doc/radosgw/s3/bucketops.rst @@ -751,6 +751,26 @@ Parameters are XML encoded in the body of the request, in the following format: string Standard|Journal integer + + + + suffix/prefix/regex + + + + + + + + + + + + + + + + @@ -881,6 +901,26 @@ Response is XML encoded in the body of the request, in the following format: string Standard|Journal integer + + + + suffix/prefix/regex + + + + + + + + + + + + + + + + diff --git a/examples/rgw/boto3/bucket_logging.py b/examples/rgw/boto3/bucket_logging.py index fdc219c57650a..7a972dac8bca4 100644 --- a/examples/rgw/boto3/bucket_logging.py +++ b/examples/rgw/boto3/bucket_logging.py @@ -39,6 +39,17 @@ bucket_logging_conf = {'LoggingEnabled': { }, 'ObjectRollTime': 60, 'LoggingType': 'Journal', + "Filter": { + "Key": { + "FilterRules": + [ + { + "Name": "prefix", + "Value": "myfile" + } + ] + } + } } } diff --git a/examples/rgw/boto3/service-2.sdk-extras.json b/examples/rgw/boto3/service-2.sdk-extras.json index 5c22ee9f2487b..15aa6bc853803 100644 --- a/examples/rgw/boto3/service-2.sdk-extras.json +++ b/examples/rgw/boto3/service-2.sdk-extras.json @@ -287,6 +287,10 @@ "RecordsBatchSize":{ "shape":"RecordsBatchSize", "documentation":"indicates how many records to batch in memory before writing to the object. if set to zero, records are written syncronously to the object. if ObjectRollTimee is reached, the batch of records will be written to the object regardless of the number of records.

" + }, + "Filter":{ + "shape":"NotificationConfigurationFilter", + "documentation":"

A filter for all log object. Types of filter for the object by its: attributes, tags and key (prefix, suffix and regex).

" } }, "documentation":"

Describes where logs are stored the prefix assigned to all log object keys for a bucket, and their format. also, the level the delivery guarantee of the records.

" diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index 96f3237b896e8..3727c525ce779 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -90,6 +90,7 @@ set(librgw_common_srcs rgw_notify_event_type.cc rgw_period_history.cc rgw_period_puller.cc + rgw_s3_filter.cc rgw_pubsub.cc rgw_coroutine.cc rgw_cr_rest.cc diff --git a/src/rgw/rgw_bucket_logging.cc b/src/rgw/rgw_bucket_logging.cc index 87a242d9952b2..f63efeee9408d 100644 --- a/src/rgw/rgw_bucket_logging.cc +++ b/src/rgw/rgw_bucket_logging.cc @@ -31,6 +31,7 @@ bool configuration::decode_xml(XMLObj* obj) { logging_type = LoggingType::Standard; } else if (type == "Journal") { logging_type = LoggingType::Journal; + RGWXMLDecoder::decode_xml("Filter", s3_filter, o); } else { // we don't allow for type "Any" in the configuration throw RGWXMLDecoder::err("invalid bucket logging record type: '" + type + "'"); @@ -73,6 +74,9 @@ void configuration::dump_xml(Formatter *f) const { break; case LoggingType::Journal: ::encode_xml("LoggingType", "Journal", f); + if (s3_filter.has_content()) { + ::encode_xml("Filter", s3_filter, f); + } break; case LoggingType::Any: ::encode_xml("LoggingType", "", f); @@ -118,6 +122,9 @@ void configuration::dump(Formatter *f) const { break; case LoggingType::Journal: encode_json("loggingType", "Journal", f); + if (s3_filter.has_content()) { + encode_json("Filter", s3_filter, f); + } break; case LoggingType::Any: encode_json("loggingType", "", f); @@ -526,6 +533,11 @@ int log_record(rgw::sal::Driver* driver, if (type != LoggingType::Any && configuration.logging_type != type) { return 0; } + if (configuration.s3_filter.has_content()) { + if (!match(configuration.s3_filter, obj)) { + return 0; + } + } ldpp_dout(dpp, 20) << "INFO: found matching logging configuration of bucket '" << s->bucket->get_name() << "' configuration: " << configuration.to_json_str() << dendl; if (auto ret = log_record(driver, obj, s, op_name, etag, size, configuration, dpp, y, async_completion, log_source_bucket); ret < 0) { diff --git a/src/rgw/rgw_bucket_logging.h b/src/rgw/rgw_bucket_logging.h index d380cf0ef0927..bedd5d3e0bd02 100644 --- a/src/rgw/rgw_bucket_logging.h +++ b/src/rgw/rgw_bucket_logging.h @@ -10,6 +10,7 @@ #include "include/buffer.h" #include "include/encoding.h" #include "common/async/yield_context.h" +#include "rgw_s3_filter.h" class XMLObj; namespace ceph { class Formatter; } @@ -48,6 +49,26 @@ namespace rgw::bucketlogging { Standard|Journal integer integer + + + + suffix/prefix/regex + + + + + + + + + + + + + + + + */ @@ -78,6 +99,7 @@ struct configuration { PartitionDateSource date_source = PartitionDateSource::DeliveryTime; // EventTime: use only year, month, and day. The hour, minutes and seconds are set to 00 in the key // DeliveryTime: the time the log object was created + rgw_s3_filter s3_filter; bool decode_xml(XMLObj *obj); void dump_xml(Formatter *f) const; void dump(Formatter *f) const; // json @@ -92,6 +114,9 @@ struct configuration { encode(static_cast(logging_type), bl); encode(records_batch_size, bl); encode(static_cast(date_source), bl); + if (logging_type == LoggingType::Journal) { + encode(s3_filter, bl); + } ENCODE_FINISH(bl); } @@ -108,6 +133,9 @@ struct configuration { decode(records_batch_size, bl); decode(type, bl); date_source = static_cast(type); + if (logging_type == LoggingType::Journal) { + decode(s3_filter, bl); + } DECODE_FINISH(bl); } }; diff --git a/src/rgw/rgw_pubsub.cc b/src/rgw/rgw_pubsub.cc index cb68d72d7da59..87a46bd61a6eb 100644 --- a/src/rgw/rgw_pubsub.cc +++ b/src/rgw/rgw_pubsub.cc @@ -62,214 +62,6 @@ void set_event_id(std::string& id, const std::string& hash, const utime_t& ts) { } } -void rgw_s3_key_filter::dump(Formatter *f) const { - if (!has_content()) { - return; - } - f->open_array_section("FilterRules"); - if (!prefix_rule.empty()) { - f->open_object_section(""); - ::encode_json("Name", "prefix", f); - ::encode_json("Value", prefix_rule, f); - f->close_section(); - } - if (!suffix_rule.empty()) { - f->open_object_section(""); - ::encode_json("Name", "suffix", f); - ::encode_json("Value", suffix_rule, f); - f->close_section(); - } - if (!regex_rule.empty()) { - f->open_object_section(""); - ::encode_json("Name", "regex", f); - ::encode_json("Value", regex_rule, f); - f->close_section(); - } - f->close_section(); -} - -bool rgw_s3_key_filter::decode_xml(XMLObj* obj) { - XMLObjIter iter = obj->find("FilterRule"); - XMLObj *o; - - const auto throw_if_missing = true; - auto prefix_not_set = true; - auto suffix_not_set = true; - auto regex_not_set = true; - std::string name; - - while ((o = iter.get_next())) { - RGWXMLDecoder::decode_xml("Name", name, o, throw_if_missing); - if (name == "prefix" && prefix_not_set) { - prefix_not_set = false; - RGWXMLDecoder::decode_xml("Value", prefix_rule, o, throw_if_missing); - } else if (name == "suffix" && suffix_not_set) { - suffix_not_set = false; - RGWXMLDecoder::decode_xml("Value", suffix_rule, o, throw_if_missing); - } else if (name == "regex" && regex_not_set) { - regex_not_set = false; - RGWXMLDecoder::decode_xml("Value", regex_rule, o, throw_if_missing); - } else { - throw RGWXMLDecoder::err("invalid/duplicate S3Key filter rule name: '" + name + "'"); - } - } - return true; -} - -void rgw_s3_key_filter::dump_xml(Formatter *f) const { - if (!prefix_rule.empty()) { - f->open_object_section("FilterRule"); - ::encode_xml("Name", "prefix", f); - ::encode_xml("Value", prefix_rule, f); - f->close_section(); - } - if (!suffix_rule.empty()) { - f->open_object_section("FilterRule"); - ::encode_xml("Name", "suffix", f); - ::encode_xml("Value", suffix_rule, f); - f->close_section(); - } - if (!regex_rule.empty()) { - f->open_object_section("FilterRule"); - ::encode_xml("Name", "regex", f); - ::encode_xml("Value", regex_rule, f); - f->close_section(); - } -} - -bool rgw_s3_key_filter::has_content() const { - return !(prefix_rule.empty() && suffix_rule.empty() && regex_rule.empty()); -} - -void rgw_s3_key_value_filter::dump(Formatter *f) const { - if (!has_content()) { - return; - } - f->open_array_section("FilterRules"); - for (const auto& key_value : kv) { - f->open_object_section(""); - ::encode_json("Name", key_value.first, f); - ::encode_json("Value", key_value.second, f); - f->close_section(); - } - f->close_section(); -} - -bool rgw_s3_key_value_filter::decode_xml(XMLObj* obj) { - kv.clear(); - XMLObjIter iter = obj->find("FilterRule"); - XMLObj *o; - - const auto throw_if_missing = true; - - std::string key; - std::string value; - - while ((o = iter.get_next())) { - RGWXMLDecoder::decode_xml("Name", key, o, throw_if_missing); - RGWXMLDecoder::decode_xml("Value", value, o, throw_if_missing); - kv.emplace(key, value); - } - return true; -} - -void rgw_s3_key_value_filter::dump_xml(Formatter *f) const { - for (const auto& key_value : kv) { - f->open_object_section("FilterRule"); - ::encode_xml("Name", key_value.first, f); - ::encode_xml("Value", key_value.second, f); - f->close_section(); - } -} - -bool rgw_s3_key_value_filter::has_content() const { - return !kv.empty(); -} - -void rgw_s3_filter::dump(Formatter *f) const { - encode_json("S3Key", key_filter, f); - encode_json("S3Metadata", metadata_filter, f); - encode_json("S3Tags", tag_filter, f); -} - -bool rgw_s3_filter::decode_xml(XMLObj* obj) { - RGWXMLDecoder::decode_xml("S3Key", key_filter, obj); - RGWXMLDecoder::decode_xml("S3Metadata", metadata_filter, obj); - RGWXMLDecoder::decode_xml("S3Tags", tag_filter, obj); - return true; -} - -void rgw_s3_filter::dump_xml(Formatter *f) const { - if (key_filter.has_content()) { - ::encode_xml("S3Key", key_filter, f); - } - if (metadata_filter.has_content()) { - ::encode_xml("S3Metadata", metadata_filter, f); - } - if (tag_filter.has_content()) { - ::encode_xml("S3Tags", tag_filter, f); - } -} - -bool rgw_s3_filter::has_content() const { - return key_filter.has_content() || - metadata_filter.has_content() || - tag_filter.has_content(); -} - -bool match(const rgw_s3_key_filter& filter, const std::string& key) { - const auto key_size = key.size(); - const auto prefix_size = filter.prefix_rule.size(); - if (prefix_size != 0) { - // prefix rule exists - if (prefix_size > key_size) { - // if prefix is longer than key, we fail - return false; - } - if (!std::equal(filter.prefix_rule.begin(), filter.prefix_rule.end(), key.begin())) { - return false; - } - } - const auto suffix_size = filter.suffix_rule.size(); - if (suffix_size != 0) { - // suffix rule exists - if (suffix_size > key_size) { - // if suffix is longer than key, we fail - return false; - } - if (!std::equal(filter.suffix_rule.begin(), filter.suffix_rule.end(), (key.end() - suffix_size))) { - return false; - } - } - if (!filter.regex_rule.empty()) { - // TODO add regex chaching in the filter - const std::regex base_regex(filter.regex_rule); - if (!std::regex_match(key, base_regex)) { - return false; - } - } - return true; -} - -bool match(const rgw_s3_key_value_filter& filter, const KeyValueMap& kv) { - // all filter pairs must exist with the same value in the object's metadata/tags - // object metadata/tags may include items not in the filter - return std::includes(kv.begin(), kv.end(), filter.kv.begin(), filter.kv.end()); -} - -bool match(const rgw_s3_key_value_filter& filter, const KeyMultiValueMap& kv) { - // all filter pairs must exist with the same value in the object's metadata/tags - // object metadata/tags may include items not in the filter - for (auto& filter : filter.kv) { - auto result = kv.equal_range(filter.first); - if (std::any_of(result.first, result.second, [&filter](const std::pair& p) { return p.second == filter.second;})) - continue; - else - return false; - } - return true; -} - bool match(const rgw::notify::EventTypeList& events, rgw::notify::EventType event) { // if event list exists, and none of the events in the list matches the event type, filter the message if (!events.empty() && std::find(events.begin(), events.end(), event) == events.end()) { diff --git a/src/rgw/rgw_pubsub.h b/src/rgw/rgw_pubsub.h index 8a6b290cb8568..176ada952042b 100644 --- a/src/rgw/rgw_pubsub.h +++ b/src/rgw/rgw_pubsub.h @@ -9,94 +9,10 @@ #include "rgw_zone.h" #include "rgw_notify_event_type.h" #include +#include "rgw_s3_filter.h" class XMLObj; -struct rgw_s3_key_filter { - std::string prefix_rule; - std::string suffix_rule; - std::string regex_rule; - - bool has_content() const; - - void dump(Formatter *f) const; - bool decode_xml(XMLObj *obj); - void dump_xml(Formatter *f) const; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(prefix_rule, bl); - encode(suffix_rule, bl); - encode(regex_rule, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(prefix_rule, bl); - decode(suffix_rule, bl); - decode(regex_rule, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(rgw_s3_key_filter) - -using KeyValueMap = boost::container::flat_map; -using KeyMultiValueMap = std::multimap; - -struct rgw_s3_key_value_filter { - KeyValueMap kv; - - bool has_content() const; - - void dump(Formatter *f) const; - bool decode_xml(XMLObj *obj); - void dump_xml(Formatter *f) const; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(kv, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(kv, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(rgw_s3_key_value_filter) - -struct rgw_s3_filter { - rgw_s3_key_filter key_filter; - rgw_s3_key_value_filter metadata_filter; - rgw_s3_key_value_filter tag_filter; - - bool has_content() const; - - void dump(Formatter *f) const; - bool decode_xml(XMLObj *obj); - void dump_xml(Formatter *f) const; - - void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); - encode(key_filter, bl); - encode(metadata_filter, bl); - encode(tag_filter, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); - decode(key_filter, bl); - decode(metadata_filter, bl); - if (struct_v >= 2) { - decode(tag_filter, bl); - } - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(rgw_s3_filter) - using OptionalFilter = std::optional; struct rgw_pubsub_topic_filter; diff --git a/src/rgw/rgw_s3_filter.cc b/src/rgw/rgw_s3_filter.cc new file mode 100644 index 0000000000000..05a7c4a72930f --- /dev/null +++ b/src/rgw/rgw_s3_filter.cc @@ -0,0 +1,269 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_pubsub.h" +#include "rgw_tools.h" +#include "rgw_xml.h" +#include "rgw_s3_filter.h" +#include "common/errno.h" +#include "rgw_sal.h" +#include +#include + +void rgw_s3_key_filter::dump(Formatter *f) const { + if (!has_content()) { + return; + } + f->open_array_section("FilterRules"); + if (!prefix_rule.empty()) { + f->open_object_section(""); + ::encode_json("Name", "prefix", f); + ::encode_json("Value", prefix_rule, f); + f->close_section(); + } + if (!suffix_rule.empty()) { + f->open_object_section(""); + ::encode_json("Name", "suffix", f); + ::encode_json("Value", suffix_rule, f); + f->close_section(); + } + if (!regex_rule.empty()) { + f->open_object_section(""); + ::encode_json("Name", "regex", f); + ::encode_json("Value", regex_rule, f); + f->close_section(); + } + f->close_section(); +} + +bool rgw_s3_key_filter::decode_xml(XMLObj* obj) { + XMLObjIter iter = obj->find("FilterRule"); + XMLObj *o; + + const auto throw_if_missing = true; + auto prefix_not_set = true; + auto suffix_not_set = true; + auto regex_not_set = true; + std::string name; + + while ((o = iter.get_next())) { + RGWXMLDecoder::decode_xml("Name", name, o, throw_if_missing); + if (name == "prefix" && prefix_not_set) { + prefix_not_set = false; + RGWXMLDecoder::decode_xml("Value", prefix_rule, o, throw_if_missing); + } else if (name == "suffix" && suffix_not_set) { + suffix_not_set = false; + RGWXMLDecoder::decode_xml("Value", suffix_rule, o, throw_if_missing); + } else if (name == "regex" && regex_not_set) { + regex_not_set = false; + RGWXMLDecoder::decode_xml("Value", regex_rule, o, throw_if_missing); + } else { + throw RGWXMLDecoder::err("invalid/duplicate S3Key filter rule name: '" + name + "'"); + } + } + return true; +} + +void rgw_s3_key_filter::dump_xml(Formatter *f) const { + if (!prefix_rule.empty()) { + f->open_object_section("FilterRule"); + ::encode_xml("Name", "prefix", f); + ::encode_xml("Value", prefix_rule, f); + f->close_section(); + } + if (!suffix_rule.empty()) { + f->open_object_section("FilterRule"); + ::encode_xml("Name", "suffix", f); + ::encode_xml("Value", suffix_rule, f); + f->close_section(); + } + if (!regex_rule.empty()) { + f->open_object_section("FilterRule"); + ::encode_xml("Name", "regex", f); + ::encode_xml("Value", regex_rule, f); + f->close_section(); + } +} + +bool rgw_s3_key_filter::has_content() const { + return !(prefix_rule.empty() && suffix_rule.empty() && regex_rule.empty()); +} + +void rgw_s3_key_value_filter::dump(Formatter *f) const { + if (!has_content()) { + return; + } + f->open_array_section("FilterRules"); + for (const auto& key_value : kv) { + f->open_object_section(""); + ::encode_json("Name", key_value.first, f); + ::encode_json("Value", key_value.second, f); + f->close_section(); + } + f->close_section(); +} + +bool rgw_s3_key_value_filter::decode_xml(XMLObj* obj) { + kv.clear(); + XMLObjIter iter = obj->find("FilterRule"); + XMLObj *o; + + const auto throw_if_missing = true; + + std::string key; + std::string value; + + while ((o = iter.get_next())) { + RGWXMLDecoder::decode_xml("Name", key, o, throw_if_missing); + RGWXMLDecoder::decode_xml("Value", value, o, throw_if_missing); + kv.emplace(key, value); + } + return true; +} + +void rgw_s3_key_value_filter::dump_xml(Formatter *f) const { + for (const auto& key_value : kv) { + f->open_object_section("FilterRule"); + ::encode_xml("Name", key_value.first, f); + ::encode_xml("Value", key_value.second, f); + f->close_section(); + } +} + +bool rgw_s3_key_value_filter::has_content() const { + return !kv.empty(); +} + +void rgw_s3_filter::dump(Formatter *f) const { + encode_json("S3Key", key_filter, f); + encode_json("S3Metadata", metadata_filter, f); + encode_json("S3Tags", tag_filter, f); +} + +bool rgw_s3_filter::decode_xml(XMLObj* obj) { + RGWXMLDecoder::decode_xml("S3Key", key_filter, obj); + RGWXMLDecoder::decode_xml("S3Metadata", metadata_filter, obj); + RGWXMLDecoder::decode_xml("S3Tags", tag_filter, obj); + return true; +} + +void rgw_s3_filter::dump_xml(Formatter *f) const { + if (key_filter.has_content()) { + ::encode_xml("S3Key", key_filter, f); + } + if (metadata_filter.has_content()) { + ::encode_xml("S3Metadata", metadata_filter, f); + } + if (tag_filter.has_content()) { + ::encode_xml("S3Tags", tag_filter, f); + } +} + +bool rgw_s3_filter::has_content() const { + return key_filter.has_content() || + metadata_filter.has_content() || + tag_filter.has_content(); +} + +bool match(const rgw_s3_key_filter& filter, const std::string& key) { + const auto key_size = key.size(); + const auto prefix_size = filter.prefix_rule.size(); + if (prefix_size != 0) { + // prefix rule exists + if (prefix_size > key_size) { + // if prefix is longer than key, we fail + return false; + } + if (!std::equal(filter.prefix_rule.begin(), filter.prefix_rule.end(), key.begin())) { + return false; + } + } + const auto suffix_size = filter.suffix_rule.size(); + if (suffix_size != 0) { + // suffix rule exists + if (suffix_size > key_size) { + // if suffix is longer than key, we fail + return false; + } + if (!std::equal(filter.suffix_rule.begin(), filter.suffix_rule.end(), (key.end() - suffix_size))) { + return false; + } + } + if (!filter.regex_rule.empty()) { + // TODO add regex caching in the filter + const std::regex base_regex(filter.regex_rule); + if (!std::regex_match(key, base_regex)) { + return false; + } + } + return true; +} + +bool match(const rgw_s3_key_value_filter& filter, const KeyValueMap& kv) { + // all filter pairs must exist with the same value in the object's metadata/tags + // object metadata/tags may include items not in the filter + return std::includes(kv.begin(), kv.end(), filter.kv.begin(), filter.kv.end()); +} + +bool match(const rgw_s3_key_value_filter& filter, const KeyMultiValueMap& kv) { + // all filter pairs must exist with the same value in the object's metadata/tags + // object metadata/tags may include items not in the filter + for (auto& filter : filter.kv) { + auto result = kv.equal_range(filter.first); + if (std::any_of(result.first, result.second, [&filter](const std::pair& p) { return p.second == filter.second;})) + continue; + else + return false; + } + return true; +} + +bool match(const rgw_s3_filter& s3_filter, const rgw::sal::Object* obj) { + if (obj == nullptr) { + return false; + } + + if (match(s3_filter.key_filter, obj->get_name())) { + return true; + } + + const auto &attrs = obj->get_attrs(); + if (!s3_filter.metadata_filter.kv.empty()) { + KeyValueMap attrs_map; + for (auto& attr : attrs) { + if (boost::algorithm::starts_with(attr.first, RGW_ATTR_META_PREFIX)) { + std::string_view key(attr.first); + key.remove_prefix(sizeof(RGW_ATTR_PREFIX)-1); + // we want to pass a null terminated version + // of the bufferlist, hence "to_str().c_str()" + attrs_map.emplace(key, attr.second.to_str().c_str()); + } + } + if (match(s3_filter.metadata_filter, attrs_map)) { + return true; + } + } + + if (!s3_filter.tag_filter.kv.empty()) { + // tag filter exists + // try to fetch tags from the attributes + KeyMultiValueMap tags; + const auto attr_iter = attrs.find(RGW_ATTR_TAGS); + if (attr_iter != attrs.end()) { + auto bliter = attr_iter->second.cbegin(); + RGWObjTags obj_tags; + try { + ::decode(obj_tags, bliter); + } catch (buffer::error &) { + // not able to decode tags + return false; + } + tags = std::move(obj_tags.get_tags()); + } + if (match(s3_filter.tag_filter, tags)) { + return true; + } + } + + return false; +} diff --git a/src/rgw/rgw_s3_filter.h b/src/rgw/rgw_s3_filter.h new file mode 100644 index 0000000000000..9bbc4ef0088e3 --- /dev/null +++ b/src/rgw/rgw_s3_filter.h @@ -0,0 +1,102 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw_tools.h" +#include + +class XMLObj; + +struct rgw_s3_key_filter { + std::string prefix_rule; + std::string suffix_rule; + std::string regex_rule; + + bool has_content() const; + + void dump(Formatter *f) const; + bool decode_xml(XMLObj *obj); + void dump_xml(Formatter *f) const; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(prefix_rule, bl); + encode(suffix_rule, bl); + encode(regex_rule, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(prefix_rule, bl); + decode(suffix_rule, bl); + decode(regex_rule, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(rgw_s3_key_filter) + +using KeyValueMap = boost::container::flat_map; +using KeyMultiValueMap = std::multimap; + +struct rgw_s3_key_value_filter { + KeyValueMap kv; + + bool has_content() const; + + void dump(Formatter *f) const; + bool decode_xml(XMLObj *obj); + void dump_xml(Formatter *f) const; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(kv, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(kv, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(rgw_s3_key_value_filter) + +struct rgw_s3_filter { + rgw_s3_key_filter key_filter; + rgw_s3_key_value_filter metadata_filter; + rgw_s3_key_value_filter tag_filter; + + bool has_content() const; + + void dump(Formatter *f) const; + bool decode_xml(XMLObj *obj); + void dump_xml(Formatter *f) const; + + void encode(bufferlist& bl) const { + ENCODE_START(2, 1, bl); + encode(key_filter, bl); + encode(metadata_filter, bl); + encode(tag_filter, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(2, bl); + decode(key_filter, bl); + decode(metadata_filter, bl); + if (struct_v >= 2) { + decode(tag_filter, bl); + } + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(rgw_s3_filter) + +bool match(const rgw_s3_key_filter& filter, const std::string& key); + +bool match(const rgw_s3_key_value_filter& filter, const KeyValueMap& kv); + +bool match(const rgw_s3_key_value_filter& filter, const KeyMultiValueMap& kv); + +bool match(const rgw_s3_filter& filter, const rgw::sal::Object* obj); -- 2.39.5