From: Yehuda Sadeh Date: Sat, 18 Aug 2018 02:04:12 +0000 (-0700) Subject: rgw: zone, sysobj services: add missing system obj calls X-Git-Tag: v14.1.0~965^2~40 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3192f640d5f88cf9cce5aa8764ef29df08cfba32;p=ceph.git rgw: zone, sysobj services: add missing system obj calls Signed-off-by: Yehuda Sadeh --- diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index 4c32a31c3d1f..2ae150227d23 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -41,6 +41,7 @@ endfunction() set(librgw_common_srcs services/svc_quota.cc services/svc_rados.cc + services/svc_sys_obj.cc services/svc_zone.cc services/svc_zone_utils.cc rgw_service.cc diff --git a/src/rgw/rgw_multi.cc b/src/rgw/rgw_multi.cc index a2fc09ff4780..5e0689e62e16 100644 --- a/src/rgw/rgw_multi.cc +++ b/src/rgw/rgw_multi.cc @@ -12,6 +12,8 @@ #include "rgw_multi.h" #include "rgw_op.h" +#include "services/svc_sys_obj.h" + #define dout_subsys ceph_subsys_rgw @@ -82,7 +84,6 @@ int list_multipart_parts(RGWRados *store, RGWBucketInfo& bucket_info, CephContex { map parts_map; map::iterator iter; - bufferlist header; rgw_obj obj; obj.init_ns(bucket_info.bucket, meta_oid, RGW_OBJ_NS_MULTIPART); @@ -97,6 +98,10 @@ int list_multipart_parts(RGWRados *store, RGWBucketInfo& bucket_info, CephContex parts.clear(); + + auto obj_ctx = store->svc.sysobj->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(raw_obj); + if (sorted_omap) { string p; p = "part."; @@ -105,9 +110,9 @@ int list_multipart_parts(RGWRados *store, RGWBucketInfo& bucket_info, CephContex snprintf(buf, sizeof(buf), "%08d", marker); p.append(buf); - ret = store->omap_get_vals(raw_obj, header, p, num_parts + 1, parts_map); + ret = sysobj.omap().get_vals(p, num_parts + 1, &parts_map, nullptr); } else { - ret = store->omap_get_all(raw_obj, header, parts_map); + ret = sysobj.omap().get_all(&parts_map); } if (ret < 0) return ret; diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 1f7a7652ba46..1d10ea15f07c 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -49,6 +49,7 @@ #include "services/svc_zone.h" #include "services/svc_quota.h" +#include "services/svc_sys_obj.h" #include "cls/lock/cls_lock_client.h" #include "cls/rgw/cls_rgw_client.h" @@ -5608,7 +5609,7 @@ void RGWCompleteMultipart::execute() op_ret = -ERR_INVALID_PART; return; } else { - manifest.append(obj_part.manifest, store); + manifest.append(obj_part.manifest, store->svc.zone.get()); } bool part_compressed = (obj_part.cs_info.compression_type != "none"); diff --git a/src/rgw/rgw_putobj_processor.cc b/src/rgw/rgw_putobj_processor.cc index b68d7b701a24..ba93779769a8 100644 --- a/src/rgw/rgw_putobj_processor.cc +++ b/src/rgw/rgw_putobj_processor.cc @@ -15,6 +15,7 @@ #include "rgw_putobj_aio.h" #include "rgw_putobj_processor.h" #include "rgw_multi.h" +#include "services/svc_sys_obj.h" #define dout_subsys ceph_subsys_rgw @@ -441,8 +442,13 @@ int MultipartObjectProcessor::complete(size_t accounted_size, rgw_raw_obj raw_meta_obj; store->obj_to_raw(bucket_info.placement_rule, meta_obj, &raw_meta_obj); - const bool must_exist = true;// detect races with abort - r = store->omap_set(raw_meta_obj, p, bl, must_exist); + + auto obj_ctx = store->svc.sysobj->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(raw_meta_obj); + + r = sysobj.omap() + .set_must_exist(true) + .set(p, bl); if (r < 0) { return r; } diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index ced9ebe55e8a..f081cf591380 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -72,6 +72,11 @@ using namespace librados; #include "rgw_realm_watcher.h" #include "rgw_reshard.h" +#include "services/svc_zone.h" +#include "services/svc_zone_utils.h" +#include "services/svc_quota.h" +#include "services/svc_sys_obj.h" + #include "compressor/Compressor.h" #ifdef WITH_LTTNG @@ -92,19 +97,14 @@ static string notify_oid_prefix = "notify"; static string *notify_oids = NULL; static string shadow_ns = "shadow"; static string dir_oid_prefix = ".dir."; -static string default_storage_pool_suffix = "rgw.buckets.data"; static string default_bucket_index_pool_suffix = "rgw.buckets.index"; static string default_storage_extra_pool_suffix = "rgw.buckets.non-ec"; -static string avail_pools = ".pools.avail"; static string log_lock_name = "rgw_log_lock"; static RGWObjCategory main_category = RGW_OBJ_CATEGORY_MAIN; #define RGW_USAGE_OBJ_PREFIX "usage." #define FIRST_EPOCH 1 -static string RGW_DEFAULT_ZONE_ROOT_POOL = "rgw.root"; -static string RGW_DEFAULT_ZONEGROUP_ROOT_POOL = "rgw.root"; -static string RGW_DEFAULT_REALM_ROOT_POOL = "rgw.root"; -static string RGW_DEFAULT_PERIOD_ROOT_POOL = "rgw.root"; + #define dout_subsys ceph_subsys_rgw @@ -1640,10 +1640,10 @@ int RGWRados::register_to_service_map(const string& daemon_type, const map metadata = meta; metadata["num_handles"] = stringify(rados.size()); - metadata["zonegroup_id"] = zonegroup.get_id(); - metadata["zonegroup_name"] = zonegroup.get_name(); - metadata["zone_name"] = zone_name(); - metadata["zone_id"] = zone_id();; + metadata["zonegroup_id"] = svc.zone->get_zonegroup().get_id(); + metadata["zonegroup_name"] = svc.zone->get_zonegroup().get_name(); + metadata["zone_name"] = svc.zone->zone_name(); + metadata["zone_id"] = svc.zone->zone_id(); string name = cct->_conf->name.get_id(); if (name.compare(0, 4, "rgw.") == 0) { name = name.substr(4); @@ -1668,24 +1668,21 @@ int RGWRados::update_service_map(std::map&& status) return 0; } -bool RGWRados::zone_syncs_from(RGWZone& target_zone, RGWZone& source_zone) -{ - return target_zone.syncs_from(source_zone.name) && - sync_modules_manager->supports_data_export(source_zone.tier_type); -} - /** * Initialize the RADOS instance and prepare to do other ops * Returns 0 on success, -ERR# on failure. */ int RGWRados::init_complete() { + int ret; + if (run_sync_thread) { - ret = sync_modules_manager->create_instance(cct, svc.zone->get_zone_public_config()->tier_type, svc.zone->get_zone_params()->tier_config, &sync_module); + auto& zone_public_config = svc.zone->get_zone(); + ret = sync_modules_manager->create_instance(cct, zone_public_config.tier_type, svc.zone->get_zone_params().tier_config, &sync_module); if (ret < 0) { lderr(cct) << "ERROR: failed to init sync module instance, ret=" << ret << dendl; if (ret == -ENOENT) { - lderr(cct) << "ERROR: " << zone_public_config->tier_type + lderr(cct) << "ERROR: " << zone_public_config.tier_type << " sync module does not exist. valid sync modules: " << sync_modules_manager->get_registered_module_names() << dendl; @@ -1699,7 +1696,7 @@ int RGWRados::init_complete() period_puller.reset(new RGWPeriodPuller(this)); period_history.reset(new RGWPeriodHistory(cct, period_puller.get(), - current_period)); + svc.zone->get_current_period())); if (need_watch_notify()) { ret = init_watch(); @@ -1709,44 +1706,6 @@ int RGWRados::init_complete() } } - /* first build all zones index */ - for (auto ziter : get_zonegroup().zones) { - const string& id = ziter.first; - RGWZone& z = ziter.second; - zone_id_by_name[z.name] = id; - zone_by_id[id] = z; - } - - if (zone_by_id.find(zone_id()) == zone_by_id.end()) { - ldout(cct, 0) << "WARNING: could not find zone config in zonegroup for local zone (" << zone_id() << "), will use defaults" << dendl; - } - zone_public_config = zone_by_id[zone_id()]; - for (auto ziter : get_zonegroup().zones) { - const string& id = ziter.first; - RGWZone& z = ziter.second; - if (id == zone_id()) { - continue; - } - if (z.endpoints.empty()) { - ldout(cct, 0) << "WARNING: can't generate connection for zone " << z.id << " id " << z.name << ": no endpoints defined" << dendl; - continue; - } - ldout(cct, 20) << "generating connection object for zone " << z.name << " id " << z.id << dendl; - RGWRESTConn *conn = new RGWRESTConn(cct, this, z.id, z.endpoints); - zone_conn_map[id] = conn; - if (zone_syncs_from(zone_public_config, z) || - zone_syncs_from(z, zone_public_config)) { - if (zone_syncs_from(zone_public_config, z)) { - zone_data_sync_from_map[id] = conn; - } - if (zone_syncs_from(z, zone_public_config)) { - zone_data_notify_to_map[id] = conn; - } - } else { - ldout(cct, 20) << "NOTICE: not syncing to/from zone " << z.name << " id " << z.id << dendl; - } - } - ret = open_root_pool_ctx(); if (ret < 0) return ret; @@ -1779,9 +1738,15 @@ int RGWRados::init_complete() obj_expirer->start_processor(); } + auto& current_period = svc.zone->get_current_period(); + auto& zonegroup = svc.zone->get_zonegroup(); + auto& zone_params = svc.zone->get_zone_params(); + auto& zone = svc.zone->get_zone(); + +#warning sync service needed /* no point of running sync thread if we don't have a master zone configured or there is no rest_master_conn */ - if (get_zonegroup().master_zone.empty() || !rest_master_conn + if (zonegroup.master_zone.empty() || !svc.zone->get_master_conn() || current_period.get_id().empty()) { run_sync_thread = false; } @@ -1816,10 +1781,9 @@ int RGWRados::init_complete() } if (run_sync_thread) { - - for (const auto &pt: get_zonegroup().placement_targets) { - if (get_zone_params().placement_pools.find(pt.second.name) - == get_zone_params().placement_pools.end()){ + for (const auto &pt: zonegroup.placement_targets) { + if (zone_params.placement_pools.find(pt.second.name) + == zone_params.placement_pools.end()){ ldout(cct, 0) << "WARNING: This zone does not contain the placement target " << pt.second.name << " present in zonegroup" << dendl; } @@ -1846,7 +1810,7 @@ int RGWRados::init_complete() data_log->set_observer(&*bucket_trim); Mutex::Locker dl(data_sync_thread_lock); - for (auto iter : zone_data_sync_from_map) { + for (auto iter : svc.zone->get_zone_data_sync_from_map()) { ldout(cct, 5) << "starting data sync thread for zone " << iter.first << dendl; auto *thread = new RGWDataSyncProcessorThread(this, async_rados, iter.first); ret = thread->init(); @@ -1880,7 +1844,7 @@ int RGWRados::init_complete() quota_handler = RGWQuotaHandler::generate_handler(this, quota_threads); bucket_index_max_shards = (cct->_conf->rgw_override_bucket_index_max_shards ? cct->_conf->rgw_override_bucket_index_max_shards : - get_zone().bucket_index_max_shards); + zone.bucket_index_max_shards); if (bucket_index_max_shards > get_max_bucket_shards()) { bucket_index_max_shards = get_max_bucket_shards(); ldout(cct, 1) << __func__ << " bucket index max shards is too large, reset to value: " @@ -1891,7 +1855,7 @@ int RGWRados::init_complete() binfo_cache = new RGWChainedCacheImpl; binfo_cache->init(this); - bool need_tombstone_cache = !zone_data_notify_to_map.empty(); /* have zones syncing from us */ + bool need_tombstone_cache = !svc.zone->get_zone_data_notify_to_map().empty(); /* have zones syncing from us */ if (need_tombstone_cache) { obj_tombstone_cache = new tombstone_cache_t(cct->_conf->rgw_obj_tombstone_cache_size); @@ -1902,7 +1866,7 @@ int RGWRados::init_complete() reshard = new RGWReshard(this); /* only the master zone in the zonegroup reshards buckets */ - run_reshard_thread = run_reshard_thread && (get_zonegroup().master_zone == zone_public_config.id); + run_reshard_thread = run_reshard_thread && (zonegroup.master_zone == zone.id); if (run_reshard_thread) { reshard->start_processor(); } @@ -1925,8 +1889,7 @@ int RGWRados::initialize() cct->_conf.get_val("rgw_inject_notify_timeout_probability"); max_notify_retries = cct->_conf.get_val("rgw_max_notify_retries"); - svc_registry = std::make_unique(); - svc_registry->register_all(); + svc_registry = std::make_unique(cct); JSONFormattable zone_svc_conf; ret = svc_registry->get_instance("zone", zone_svc_conf, &svc.zone); @@ -2005,32 +1968,33 @@ int RGWRados::list_raw_prefixed_objs(const rgw_pool& pool, const string& prefix, */ int RGWRados::open_root_pool_ctx() { - return rgw_init_ioctx(get_rados_handle(), get_zone_params().domain_root, root_pool_ctx, true); + return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().domain_root, root_pool_ctx, true); } int RGWRados::open_gc_pool_ctx() { - return rgw_init_ioctx(get_rados_handle(), get_zone_params().gc_pool, gc_pool_ctx, true); + return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().gc_pool, gc_pool_ctx, true); } int RGWRados::open_lc_pool_ctx() { - return rgw_init_ioctx(get_rados_handle(), get_zone_params().lc_pool, lc_pool_ctx, true); + return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().lc_pool, lc_pool_ctx, true); } int RGWRados::open_objexp_pool_ctx() { - return rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, objexp_pool_ctx, true); + return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, objexp_pool_ctx, true); } int RGWRados::open_reshard_pool_ctx() { - return rgw_init_ioctx(get_rados_handle(), get_zone_params().reshard_pool, reshard_pool_ctx, true); + return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().reshard_pool, reshard_pool_ctx, true); } int RGWRados::init_watch() { - int r = rgw_init_ioctx(&rados[0], get_zone_params().control_pool, control_pool_ctx, true); +#warning needs to be part of the sysobj/cache service + int r = rgw_init_ioctx(&rados[0], svc.zone->get_zone_params().control_pool, control_pool_ctx, true); if (r < 0) { return r; } @@ -2124,6 +2088,9 @@ int RGWRados::open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados:: return open_pool_ctx(explicit_pool, index_ctx); } + auto& zonegroup = svc.zone->get_zonegroup(); + auto& zone_params = svc.zone->get_zone_params(); + const string *rule = &bucket_info.placement_rule; if (rule->empty()) { rule = &zonegroup.default_placement; @@ -2192,7 +2159,7 @@ struct log_list_state { int RGWRados::log_list_init(const string& prefix, RGWAccessHandle *handle) { log_list_state *state = new log_list_state; - int r = rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, state->io_ctx); + int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, state->io_ctx); if (r < 0) { delete state; return r; @@ -2226,7 +2193,7 @@ int RGWRados::log_list_next(RGWAccessHandle handle, string *name) int RGWRados::log_remove(const string& name) { librados::IoCtx io_ctx; - int r = rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, io_ctx); + int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx); if (r < 0) return r; return io_ctx.remove(name); @@ -2245,7 +2212,7 @@ struct log_show_state { int RGWRados::log_show_init(const string& name, RGWAccessHandle *handle) { log_show_state *state = new log_show_state; - int r = rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, state->io_ctx); + int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, state->io_ctx); if (r < 0) { delete state; return r; @@ -2491,7 +2458,7 @@ void RGWRados::time_log_prepare_entry(cls_log_entry& entry, const real_time& ut, int RGWRados::time_log_add_init(librados::IoCtx& io_ctx) { - return rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, io_ctx, true); + return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx, true); } @@ -2540,7 +2507,7 @@ int RGWRados::time_log_list(const string& oid, const real_time& start_time, cons { librados::IoCtx io_ctx; - int r = rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, io_ctx); + int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx); if (r < 0) return r; librados::ObjectReadOperation op; @@ -2564,7 +2531,7 @@ int RGWRados::time_log_info(const string& oid, cls_log_header *header) { librados::IoCtx io_ctx; - int r = rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, io_ctx); + int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx); if (r < 0) return r; librados::ObjectReadOperation op; @@ -2582,7 +2549,7 @@ int RGWRados::time_log_info(const string& oid, cls_log_header *header) int RGWRados::time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion) { - int r = rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, io_ctx); + int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx); if (r < 0) return r; @@ -2603,7 +2570,7 @@ int RGWRados::time_log_trim(const string& oid, const real_time& start_time, cons { librados::IoCtx io_ctx; - int r = rgw_init_ioctx(get_rados_handle(), get_zone_params().log_pool, io_ctx); + int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx); if (r < 0) return r; @@ -3168,9 +3135,9 @@ void RGWRados::create_bucket_id(string *bucket_id) { uint64_t iid = instance_id(); uint64_t bid = next_bucket_id(); - char buf[get_zone_params().get_id().size() + 48]; + char buf[svc.zone->get_zone_params().get_id().size() + 48]; snprintf(buf, sizeof(buf), "%s.%" PRIu64 ".%" PRIu64, - get_zone_params().get_id().c_str(), iid, bid); + svc.zone->get_zone_params().get_id().c_str(), iid, bid); *bucket_id = buf; } @@ -3194,8 +3161,8 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket, for (int i = 0; i < MAX_CREATE_RETRIES; i++) { int ret = 0; - ret = select_bucket_placement(owner, zonegroup_id, placement_rule, - &selected_placement_rule_name, &rule_info); + ret = svc.zone->select_bucket_placement(owner, zonegroup_id, placement_rule, + &selected_placement_rule_name, &rule_info); if (ret < 0) return ret; @@ -3288,196 +3255,9 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket, return -ENOENT; } -int RGWRados::select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& request_rule, - string *pselected_rule_name, RGWZonePlacementInfo *rule_info) - -{ - /* first check that zonegroup exists within current period. */ - RGWZoneGroup zonegroup; - int ret = get_zonegroup(zonegroup_id, zonegroup); - if (ret < 0) { - ldout(cct, 0) << "could not find zonegroup " << zonegroup_id << " in current period" << dendl; - return ret; - } - - /* find placement rule. Hierarchy: request rule > user default rule > zonegroup default rule */ - std::map::const_iterator titer; - - if (!request_rule.empty()) { - titer = zonegroup.placement_targets.find(request_rule); - if (titer == zonegroup.placement_targets.end()) { - ldout(cct, 0) << "could not find requested placement id " << request_rule - << " within zonegroup " << dendl; - return -ERR_INVALID_LOCATION_CONSTRAINT; - } - } else if (!user_info.default_placement.empty()) { - titer = zonegroup.placement_targets.find(user_info.default_placement); - if (titer == zonegroup.placement_targets.end()) { - ldout(cct, 0) << "could not find user default placement id " << user_info.default_placement - << " within zonegroup " << dendl; - return -ERR_INVALID_LOCATION_CONSTRAINT; - } - } else { - if (zonegroup.default_placement.empty()) { // zonegroup default rule as fallback, it should not be empty. - ldout(cct, 0) << "misconfiguration, zonegroup default placement id should not be empty." << dendl; - return -ERR_ZONEGROUP_DEFAULT_PLACEMENT_MISCONFIGURATION; - } else { - titer = zonegroup.placement_targets.find(zonegroup.default_placement); - if (titer == zonegroup.placement_targets.end()) { - ldout(cct, 0) << "could not find zonegroup default placement id " << zonegroup.default_placement - << " within zonegroup " << dendl; - return -ERR_INVALID_LOCATION_CONSTRAINT; - } - } - } - - /* now check tag for the rule, whether user is permitted to use rule */ - const auto& target_rule = titer->second; - if (!target_rule.user_permitted(user_info.placement_tags)) { - ldout(cct, 0) << "user not permitted to use placement rule " << titer->first << dendl; - return -EPERM; - } - - if (pselected_rule_name) - *pselected_rule_name = titer->first; - - return select_bucket_location_by_rule(titer->first, rule_info); -} - -int RGWRados::select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info) -{ - if (location_rule.empty()) { - /* we can only reach here if we're trying to set a bucket location from a bucket - * created on a different zone, using a legacy / default pool configuration - */ - if (rule_info) { - return select_legacy_bucket_placement(rule_info); - } - - return 0; - } - - /* - * make sure that zone has this rule configured. We're - * checking it for the local zone, because that's where this bucket object is going to - * reside. - */ - map::iterator piter = get_zone_params().placement_pools.find(location_rule); - if (piter == get_zone_params().placement_pools.end()) { - /* couldn't find, means we cannot really place data for this bucket in this zone */ - if (get_zonegroup().equals(zonegroup.get_id())) { - /* that's a configuration error, zone should have that rule, as we're within the requested - * zonegroup */ - ldout(cct, 0) << "ERROR: This zone does not contain placement rule" - << location_rule << " present in the zonegroup!" << dendl; - return -EINVAL; - } else { - /* oh, well, data is not going to be placed here, bucket object is just a placeholder */ - return 0; - } - } - - RGWZonePlacementInfo& placement_info = piter->second; - - if (rule_info) { - *rule_info = placement_info; - } - - return 0; -} - -int RGWRados::select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& placement_rule, - string *pselected_rule_name, RGWZonePlacementInfo *rule_info) -{ - if (!get_zone_params().placement_pools.empty()) { - return select_new_bucket_location(user_info, zonegroup_id, placement_rule, - pselected_rule_name, rule_info); - } - - if (pselected_rule_name) { - pselected_rule_name->clear(); - } - - if (rule_info) { - return select_legacy_bucket_placement(rule_info); - } - - return 0; -} - -int RGWRados::select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info) -{ - bufferlist map_bl; - map m; - string pool_name; - bool write_map = false; - - rgw_raw_obj obj(get_zone_params().domain_root, avail_pools); - - RGWObjectCtx obj_ctx(this); - int ret = rgw_get_system_obj(this, obj_ctx, get_zone_params().domain_root, avail_pools, map_bl, NULL, NULL); - if (ret < 0) { - goto read_omap; - } - - try { - auto iter = map_bl.cbegin(); - decode(m, iter); - } catch (buffer::error& err) { - ldout(cct, 0) << "ERROR: couldn't decode avail_pools" << dendl; - } - -read_omap: - if (m.empty()) { - bufferlist header; - ret = omap_get_all(obj, header, m); - - write_map = true; - } - - if (ret < 0 || m.empty()) { - vector pools; - string s = string("default.") + default_storage_pool_suffix; - pools.push_back(rgw_pool(s)); - vector retcodes; - bufferlist bl; - ret = create_pools(pools, retcodes); - if (ret < 0) - return ret; - ret = omap_set(obj, s, bl); - if (ret < 0) - return ret; - m[s] = bl; - } - - if (write_map) { - bufferlist new_bl; - encode(m, new_bl); - ret = put_system_obj_data(NULL, obj, new_bl, -1, false); - if (ret < 0) { - ldout(cct, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl; - } - } - - auto miter = m.begin(); - if (m.size() > 1) { - // choose a pool at random - auto r = ceph::util::generate_random_number(0, m.size() - 1); - std::advance(miter, r); - } - pool_name = miter->first; - - rule_info->data_pool = pool_name; - rule_info->data_extra_pool = pool_name; - rule_info->index_pool = pool_name; - rule_info->index_type = RGWBIType_Normal; - - return 0; -} - bool RGWRados::get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool) { - return rgw_get_obj_data_pool(zonegroup, zone_params, placement_rule, obj, pool); + return rgw_get_obj_data_pool(svc.zone->get_zonegroup(), svc.zone->get_zone_params(), placement_rule, obj, pool); } bool RGWRados::obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj) @@ -3487,72 +3267,6 @@ bool RGWRados::obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_ return get_obj_data_pool(placement_rule, obj, &raw_obj->pool); } -int RGWRados::update_placement_map() -{ - bufferlist header; - map m; - rgw_raw_obj obj(get_zone_params().domain_root, avail_pools); - int ret = omap_get_all(obj, header, m); - if (ret < 0) - return ret; - - bufferlist new_bl; - encode(m, new_bl); - ret = put_system_obj_data(NULL, obj, new_bl, -1, false); - if (ret < 0) { - ldout(cct, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl; - } - - return ret; -} - -int RGWRados::add_bucket_placement(const rgw_pool& new_pool) -{ - librados::Rados *rad = get_rados_handle(); - int ret = rad->pool_lookup(new_pool.name.c_str()); - if (ret < 0) // DNE, or something - return ret; - - rgw_raw_obj obj(get_zone_params().domain_root, avail_pools); - bufferlist empty_bl; - ret = omap_set(obj, new_pool.to_str(), empty_bl); - - // don't care about return value - update_placement_map(); - - return ret; -} - -int RGWRados::remove_bucket_placement(const rgw_pool& old_pool) -{ - rgw_raw_obj obj(get_zone_params().domain_root, avail_pools); - int ret = omap_del(obj, old_pool.to_str()); - - // don't care about return value - update_placement_map(); - - return ret; -} - -int RGWRados::list_placement_set(set& names) -{ - bufferlist header; - map m; - - rgw_raw_obj obj(get_zone_params().domain_root, avail_pools); - int ret = omap_get_all(obj, header, m); - if (ret < 0) - return ret; - - names.clear(); - map::iterator miter; - for (miter = m.begin(); miter != m.end(); ++miter) { - names.insert(rgw_pool(miter->first)); - } - - return names.size(); -} - int RGWRados::create_pools(vector& pools, vector& retcodes) { vector completions; @@ -3682,7 +3396,7 @@ int RGWRados::get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref) if (ref->oid.empty()) { ref->oid = obj.pool.to_str(); - ref->pool = get_zone_params().domain_root; + ref->pool = svc.zone->get_zone_params().domain_root; } else { ref->pool = obj.pool; } @@ -4249,7 +3963,7 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si string req_id; if (!s) { // fake req_id - req_id = store->unique_id(store->get_new_req_id()); + req_id = store->svc.zone_utils->unique_id(store->get_new_req_id()); } else { req_id = s->req_id; } @@ -4347,7 +4061,7 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si if (attrs.find(RGW_ATTR_SOURCE_ZONE) == attrs.end()) { bufferlist bl; - encode(store->get_zone_short_id(), bl); + encode(store->svc.zone->get_zone_short_id(), bl); op.setxattr(RGW_ATTR_SOURCE_ZONE, bl); } @@ -4973,8 +4687,9 @@ int RGWRados::stat_remote_obj(RGWObjectCtx& obj_ctx, if (source_zone.empty()) { if (src_bucket_info.zonegroup.empty()) { /* source is in the master zonegroup */ - conn = rest_master_conn; + conn = svc.zone->get_master_conn(); } else { + auto& zonegroup_conn_map = svc.zone->get_zonegroup_conn_map(); map::iterator iter = zonegroup_conn_map.find(src_bucket_info.zonegroup); if (iter == zonegroup_conn_map.end()) { ldout(cct, 0) << "could not find zonegroup connection to zonegroup: " << source_zone << dendl; @@ -4983,6 +4698,7 @@ int RGWRados::stat_remote_obj(RGWObjectCtx& obj_ctx, conn = iter->second; } } else { + auto& zone_conn_map = svc.zone->get_zone_conn_map(); map::iterator iter = zone_conn_map.find(source_zone); if (iter == zone_conn_map.end()) { ldout(cct, 0) << "could not find zone connection to zone: " << source_zone << dendl; @@ -5099,10 +4815,12 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, } RGWRESTConn *conn; + auto& zone_conn_map = svc.zone->get_zone_conn_map(); + auto& zonegroup_conn_map = svc.zone->get_zonegroup_conn_map(); if (source_zone.empty()) { if (dest_bucket_info.zonegroup.empty()) { /* source is in the master zonegroup */ - conn = rest_master_conn; + conn = svc.zone->get_master_conn(); } else { map::iterator iter = zonegroup_conn_map.find(src_bucket_info.zonegroup); if (iter == zonegroup_conn_map.end()) { @@ -5125,7 +4843,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, boost::optional compressor; CompressorRef plugin; - const auto& compression_type = zone_params.get_compression_type( + const auto& compression_type = svc.zone->get_zone_params().get_compression_type( dest_bucket_info.placement_rule); if (compression_type != "none") { plugin = Compressor::create(cct, compression_type); @@ -5233,7 +4951,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, /* non critical error */ } } - set_mtime_weight.init(set_mtime, get_zone_short_id(), pg_ver); + set_mtime_weight.init(set_mtime, svc.zone->get_zone_short_id(), pg_ver); } #define MAX_COMPLETE_RETRY 100 @@ -5301,6 +5019,8 @@ int RGWRados::copy_obj_to_remote_dest(RGWObjState *astate, RGWRESTStreamS3PutObj *out_stream_req; + auto rest_master_conn = svc.zone->get_master_conn(); + int ret = rest_master_conn->put_obj_async(user_id, dest_obj, astate->size, src_attrs, true, &out_stream_req); if (ret < 0) { return ret; @@ -5372,8 +5092,10 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, append_rand_alpha(cct, dest_obj.get_oid(), shadow_oid, 32); shadow_obj.init_ns(dest_obj.bucket, shadow_oid, shadow_ns); - remote_dest = !get_zonegroup().equals(dest_bucket_info.zonegroup); - remote_src = !get_zonegroup().equals(src_bucket_info.zonegroup); + auto& zonegroup = svc.zone->get_zonegroup(); + + remote_dest = !zonegroup.equals(dest_bucket_info.zonegroup); + remote_src = !zonegroup.equals(src_bucket_info.zonegroup); if (remote_src && remote_dest) { ldout(cct, 0) << "ERROR: can't copy object when both src and dest buckets are remote" << dendl; @@ -5684,37 +5406,6 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, nullptr, nullptr, nullptr, nullptr, nullptr); } -/** - * Check to see if the bucket metadata could be synced - * bucket: the bucket to check - * Returns false is the bucket is not synced - */ -bool RGWRados::is_syncing_bucket_meta(const rgw_bucket& bucket) -{ - - /* no current period */ - if (current_period.get_id().empty()) { - return false; - } - - /* zonegroup is not master zonegroup */ - if (!get_zonegroup().is_master_zonegroup()) { - return false; - } - - /* single zonegroup and a single zone */ - if (current_period.is_single_zonegroup() && get_zonegroup().zones.size() == 1) { - return false; - } - - /* zone is not master */ - if (get_zonegroup().master_zone.compare(zone_public_config.id) != 0) { - return false; - } - - return true; -} - int RGWRados::check_bucket_empty(RGWBucketInfo& bucket_info) { std::vector ent_list; @@ -5774,7 +5465,7 @@ int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& ob return r; /* if the bucket is not synced we can remove the meta file */ - if (!is_syncing_bucket_meta(bucket)) { + if (!svc.zone->is_syncing_bucket_meta(bucket)) { RGWObjVersionTracker objv_tracker; string entry = bucket.get_key(); r= rgw_bucket_instance_remove_entry(this, entry, &objv_tracker); @@ -8364,7 +8055,7 @@ int RGWRados::bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjStat if (_zones_trace) { zones_trace = *_zones_trace; } - zones_trace.insert(get_zone().id); + zones_trace.insert(svc.zone->get_zone().id); BucketShard bs(this); @@ -8376,7 +8067,7 @@ int RGWRados::bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjStat return cls_rgw_bucket_link_olh(bs->index_ctx, op, bs->bucket_obj, key, olh_state.olh_tag, delete_marker, op_tag, meta, olh_epoch, unmod_since, high_precision_time, - get_zone().log_data, zones_trace); + svc.zone->get_zone().log_data, zones_trace); }); if (r < 0) { ldout(cct, 20) << "cls_rgw_bucket_link_olh() returned r=" << r << dendl; @@ -8409,7 +8100,7 @@ int RGWRados::bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, con if (_zones_trace) { zones_trace = *_zones_trace; } - zones_trace.insert(get_zone().id); + zones_trace.insert(svc.zone->get_zone().id); BucketShard bs(this); @@ -8419,7 +8110,7 @@ int RGWRados::bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, con librados::ObjectWriteOperation op; cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING); return cls_rgw_bucket_unlink_instance(bs->index_ctx, op, bs->bucket_obj, key, op_tag, - olh_tag, olh_epoch, get_zone().log_data, zones_trace); + olh_tag, olh_epoch, svc.zone->get_zone().log_data, zones_trace); }); if (r < 0) { ldout(cct, 20) << "cls_rgw_bucket_link_olh() returned r=" << r << dendl; @@ -8819,19 +8510,6 @@ void RGWRados::gen_rand_obj_instance_name(rgw_obj *target_obj) target_obj->key.set_instance(buf); } -static void filter_attrset(map& unfiltered_attrset, const string& check_prefix, - map *attrset) -{ - attrset->clear(); - map::iterator iter; - for (iter = unfiltered_attrset.lower_bound(check_prefix); - iter != unfiltered_attrset.end(); ++iter) { - if (!boost::algorithm::starts_with(iter->first, check_prefix)) - break; - (*attrset)[iter->first] = iter->second; - } -} - int RGWRados::get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh) { map unfiltered_attrset; @@ -8847,7 +8525,7 @@ int RGWRados::get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWO } map attrset; - filter_attrset(unfiltered_attrset, RGW_ATTR_OLH_PREFIX, &attrset); + rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_OLH_PREFIX, &attrset); map::iterator iter = attrset.find(RGW_ATTR_OLH_INFO); if (iter == attrset.end()) { /* not an olh */ @@ -8929,7 +8607,7 @@ int RGWRados::remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWOb int RGWRados::follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& obj_ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target) { map pending_entries; - filter_attrset(state->attrset, RGW_ATTR_OLH_PENDING_PREFIX, &pending_entries); + rgw_filter_attrset(state->attrset, RGW_ATTR_OLH_PENDING_PREFIX, &pending_entries); map rm_pending_entries; check_pending_olh_entries(pending_entries, &rm_pending_entries); @@ -9012,7 +8690,7 @@ int RGWRados::raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, if (pmtime) *pmtime = ceph::real_clock::from_timespec(mtime_ts); if (attrs) { - filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs); + rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs); } return 0; @@ -9203,11 +8881,11 @@ void RGWRados::get_bucket_meta_oid(const rgw_bucket& bucket, string& oid) void RGWRados::get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj) { if (!bucket.oid.empty()) { - obj.init(get_zone_params().domain_root, bucket.oid); + obj.init(svc.zone->get_zone_params().domain_root, bucket.oid); } else { string oid; get_bucket_meta_oid(bucket, oid); - obj.init(get_zone_params().domain_root, oid); + obj.init(svc.zone->get_zone_params().domain_root, oid); } } @@ -9242,11 +8920,13 @@ int RGWRados::get_bucket_instance_from_oid(RGWObjectCtx& obj_ctx, const string& rgw_cache_entry_info *cache_info, boost::optional refresh_version) { - ldout(cct, 20) << "reading from " << get_zone_params().domain_root << ":" << oid << dendl; + auto& domain_root = svc.zone->get_zone_params().domain_root; + + ldout(cct, 20) << "reading from " << domain_root << ":" << oid << dendl; bufferlist epbl; - int ret = rgw_get_system_obj(this, obj_ctx, get_zone_params().domain_root, + int ret = rgw_get_system_obj(this, obj_ctx, domain_root, oid, epbl, &info.objv_tracker, pmtime, pattrs, cache_info, refresh_version); if (ret < 0) { @@ -9278,7 +8958,7 @@ int RGWRados::get_bucket_entrypoint_info(RGWObjectCtx& obj_ctx, string bucket_entry; rgw_make_bucket_entry_name(tenant_name, bucket_name, bucket_entry); - int ret = rgw_get_system_obj(this, obj_ctx, get_zone_params().domain_root, + int ret = rgw_get_system_obj(this, obj_ctx, svc.zone->get_zone_params().domain_root, bucket_entry, bl, objv_tracker, pmtime, pattrs, cache_info, refresh_version); if (ret < 0) { @@ -9523,100 +9203,6 @@ int RGWRados::put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, real_t return 0; } -int RGWRados::omap_get_vals(rgw_raw_obj& obj, bufferlist& header, const string& marker, uint64_t count, std::map& m) -{ - rgw_rados_ref ref; - int r = get_raw_obj_ref(obj, &ref); - if (r < 0) { - return r; - } - - r = ref.ioctx.omap_get_vals(ref.oid, marker, count, &m); - if (r < 0) - return r; - - return 0; - -} - -int RGWRados::omap_get_all(rgw_raw_obj& obj, bufferlist& header, - std::map& m) -{ - rgw_rados_ref ref; - int r = get_raw_obj_ref(obj, &ref); - if (r < 0) { - return r; - } - -#define MAX_OMAP_GET_ENTRIES 1024 - const int count = MAX_OMAP_GET_ENTRIES; - string start_after; - - while (true) { - std::map t; - r = ref.ioctx.omap_get_vals(ref.oid, start_after, count, &t); - if (r < 0) { - return r; - } - if (t.empty()) { - break; - } - start_after = t.rbegin()->first; - m.insert(t.begin(), t.end()); - } - return 0; -} - -int RGWRados::omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist) -{ - rgw_rados_ref ref; - int r = get_raw_obj_ref(obj, &ref); - if (r < 0) { - return r; - } - ldout(cct, 15) << "omap_set obj=" << obj << " key=" << key << dendl; - - map m; - m[key] = bl; - ObjectWriteOperation op; - if (must_exist) - op.assert_exists(); - op.omap_set(m); - r = ref.ioctx.operate(ref.oid, &op); - return r; -} - -int RGWRados::omap_set(rgw_raw_obj& obj, std::map& m, bool must_exist) -{ - rgw_rados_ref ref; - int r = get_raw_obj_ref(obj, &ref); - if (r < 0) { - return r; - } - - ObjectWriteOperation op; - if (must_exist) - op.assert_exists(); - op.omap_set(m); - r = ref.ioctx.operate(ref.oid, &op); - return r; -} - -int RGWRados::omap_del(rgw_raw_obj& obj, const std::string& key) -{ - rgw_rados_ref ref; - int r = get_raw_obj_ref(obj, &ref); - if (r < 0) { - return r; - } - - set k; - k.insert(key); - - r = ref.ioctx.omap_rm_keys(ref.oid, k); - return r; -} - int RGWRados::update_containers_stats(map& m) { RGWObjectCtx obj_ctx(this); @@ -9861,14 +9447,6 @@ int RGWRados::pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vectorget_zone().id); ObjectWriteOperation o; cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance); cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING); - cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), get_zone().log_data, bilog_flags, zones_trace); + cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), svc.zone->get_zone().log_data, bilog_flags, zones_trace); return bs.index_ctx.operate(bs.bucket_obj, &o); } @@ -10283,7 +9861,7 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify if (_zones_trace) { zones_trace = *_zones_trace; } - zones_trace.insert(get_zone().id); + zones_trace.insert(svc.zone->get_zone().id); rgw_bucket_entry_ver ver; ver.pool = pool; @@ -10291,10 +9869,10 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify cls_rgw_obj_key key(ent.key.name, ent.key.instance); cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING); cls_rgw_bucket_complete_op(o, op, tag, ver, key, dir_meta, remove_objs, - get_zone().log_data, bilog_flags, &zones_trace); + svc.zone->get_zone().log_data, bilog_flags, &zones_trace); complete_op_data *arg; index_completion_manager->create_completion(obj, op, tag, ver, key, dir_meta, remove_objs, - get_zone().log_data, bilog_flags, &zones_trace, &arg); + svc.zone->get_zone().log_data, bilog_flags, &zones_trace, &arg); librados::AioCompletion *completion = arg->rados_completion; int ret = bs.index_ctx.aio_operate(bs.bucket_obj, arg->rados_completion, &o); completion->release(); /* can't reference arg here, as it might have already been released */ @@ -10591,7 +10169,7 @@ check_updates: int RGWRados::cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info) { - rgw_raw_obj obj(get_zone_params().usage_log_pool, oid); + rgw_raw_obj obj(svc.zone->get_zone_params().usage_log_pool, oid); rgw_rados_ref ref; int r = get_raw_obj_ref(obj, &ref); @@ -10609,7 +10187,7 @@ int RGWRados::cls_obj_usage_log_add(const string& oid, int RGWRados::cls_obj_usage_log_read(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, string& read_iter, map& usage, bool *is_truncated) { - rgw_raw_obj obj(get_zone_params().usage_log_pool, oid); + rgw_raw_obj obj(svc.zone->get_zone_params().usage_log_pool, oid); rgw_rados_ref ref; int r = get_raw_obj_ref(obj, &ref); @@ -10627,7 +10205,7 @@ int RGWRados::cls_obj_usage_log_read(string& oid, string& user, uint64_t start_e int RGWRados::cls_obj_usage_log_trim(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch) { - rgw_raw_obj obj(get_zone_params().usage_log_pool, oid); + rgw_raw_obj obj(svc.zone->get_zone_params().usage_log_pool, oid); rgw_rados_ref ref; int r = get_raw_obj_ref(obj, &ref); @@ -10641,7 +10219,7 @@ int RGWRados::cls_obj_usage_log_trim(string& oid, string& user, uint64_t start_e int RGWRados::cls_obj_usage_log_clear(string& oid) { - rgw_raw_obj obj(get_zone_params().usage_log_pool, oid); + rgw_raw_obj obj(svc.zone->get_zone_params().usage_log_pool, oid); rgw_rados_ref ref; int r = get_raw_obj_ref(obj, &ref); @@ -10660,7 +10238,7 @@ int RGWRados::remove_objs_from_index(RGWBucketInfo& bucket_info, listget_zone().log_data ? CEPH_RGW_DIR_SUGGEST_LOG_OP : 0); int r = open_bucket_index(bucket_info, index_ctx, dir_oid); if (r < 0) @@ -10691,7 +10269,7 @@ int RGWRados::check_disk_state(librados::IoCtx io_ctx, bufferlist& suggested_updates) { const rgw_bucket& bucket = bucket_info.bucket; - uint8_t suggest_flag = (get_zone().log_data ? CEPH_RGW_DIR_SUGGEST_LOG_OP : 0); + uint8_t suggest_flag = (svc.zone->get_zone().log_data ? CEPH_RGW_DIR_SUGGEST_LOG_OP : 0); std::string loc; @@ -10839,7 +10417,7 @@ int RGWRados::cls_user_get_header(const string& user_id, cls_user_header *header { string buckets_obj_id; rgw_get_buckets_obj(user_id, buckets_obj_id); - rgw_raw_obj obj(get_zone_params().user_uid_pool, buckets_obj_id); + rgw_raw_obj obj(svc.zone->get_zone_params().user_uid_pool, buckets_obj_id); rgw_rados_ref ref; int r = get_raw_obj_ref(obj, &ref); @@ -10864,7 +10442,7 @@ int RGWRados::cls_user_reset_stats(const string& user_id) { string buckets_obj_id; rgw_get_buckets_obj(user_id, buckets_obj_id); - rgw_raw_obj obj(get_zone_params().user_uid_pool, buckets_obj_id); + rgw_raw_obj obj(svc.zone->get_zone_params().user_uid_pool, buckets_obj_id); rgw_rados_ref ref; int r = get_raw_obj_ref(obj, &ref); @@ -10881,7 +10459,7 @@ int RGWRados::cls_user_get_header_async(const string& user_id, RGWGetUserHeader_ { string buckets_obj_id; rgw_get_buckets_obj(user_id, buckets_obj_id); - rgw_raw_obj obj(get_zone_params().user_uid_pool, buckets_obj_id); + rgw_raw_obj obj(svc.zone->get_zone_params().user_uid_pool, buckets_obj_id); rgw_rados_ref ref; int r = get_raw_obj_ref(obj, &ref); @@ -11009,7 +10587,7 @@ int RGWRados::complete_sync_user_stats(const rgw_user& user_id) { string buckets_obj_id; rgw_get_buckets_obj(user_id, buckets_obj_id); - rgw_raw_obj obj(get_zone_params().user_uid_pool, buckets_obj_id); + rgw_raw_obj obj(svc.zone->get_zone_params().user_uid_pool, buckets_obj_id); return cls_user_complete_stats_sync(obj); } @@ -11473,7 +11051,7 @@ string RGWRados::get_mfa_oid(const rgw_user& user) int RGWRados::get_mfa_ref(const rgw_user& user, rgw_rados_ref *ref) { string oid = get_mfa_oid(user); - rgw_raw_obj obj(get_zone_params().otp_pool, oid); + rgw_raw_obj obj(svc.zone->get_zone_params().otp_pool, oid); return get_system_obj_ref(obj, ref); } @@ -11621,7 +11199,7 @@ int RGWRados::set_mfa(const string& oid, const list bool reset_obj, RGWObjVersionTracker *objv_tracker, const real_time& mtime) { - rgw_raw_obj obj(get_zone_params().otp_pool, oid); + rgw_raw_obj obj(svc.zone->get_zone_params().otp_pool, oid); rgw_rados_ref ref; int r = get_system_obj_ref(obj, &ref); if (r < 0) { @@ -11648,7 +11226,7 @@ int RGWRados::set_mfa(const string& oid, const list int RGWRados::list_mfa(const string& oid, list *result, RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime) { - rgw_raw_obj obj(get_zone_params().otp_pool, oid); + rgw_raw_obj obj(svc.zone->get_zone_params().otp_pool, oid); rgw_rados_ref ref; int r = get_system_obj_ref(obj, &ref); if (r < 0) { diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index c0429ddc3295..761676817cc8 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -309,12 +309,6 @@ public: virtual ~RGWGetDataCB() {} }; -class RGWAccessListFilter { -public: - virtual ~RGWAccessListFilter() {} - virtual bool filter(string& name, string& key) = 0; -}; - struct RGWCloneRangeInfo { rgw_obj src; off_t src_ofs; @@ -2070,10 +2064,6 @@ public: */ int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true); - /** - * Check to see if the bucket metadata is synced - */ - bool is_syncing_bucket_meta(const rgw_bucket& bucket); void wakeup_meta_sync_shards(set& shard_ids); void wakeup_data_sync_shards(const string& source_zone, map >& shard_ids); @@ -2227,11 +2217,6 @@ public: void gen_rand_obj_instance_name(rgw_obj *target); - int omap_get_vals(rgw_raw_obj& obj, bufferlist& header, const std::string& marker, uint64_t count, std::map& m); - int omap_get_all(rgw_raw_obj& obj, bufferlist& header, std::map& m); - int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist = false); - int omap_set(rgw_raw_obj& obj, map& m, bool must_exist = false); - int omap_del(rgw_raw_obj& obj, const std::string& key); int update_containers_stats(map& m); int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl); diff --git a/src/rgw/rgw_rest_realm.cc b/src/rgw/rgw_rest_realm.cc index 193191d51842..39197472d4a3 100644 --- a/src/rgw/rgw_rest_realm.cc +++ b/src/rgw/rgw_rest_realm.cc @@ -65,7 +65,7 @@ void RGWOp_Period_Get::execute() period.set_id(period_id); period.set_epoch(epoch); - http_ret = period.init(store->ctx(), store, realm_id, realm_name); + http_ret = period.init(store->ctx(), store->svc.sysobj.get(), realm_id, realm_name); if (http_ret < 0) ldout(store->ctx(), 5) << "failed to read period" << dendl; } @@ -82,7 +82,7 @@ void RGWOp_Period_Post::execute() auto cct = store->ctx(); // initialize the period without reading from rados - period.init(cct, store, false); + period.init(cct, store->svc.sysobj.get(), false); // decode the period from input const auto max_size = cct->_conf->rgw_max_put_param_size; @@ -105,7 +105,7 @@ void RGWOp_Period_Post::execute() // period that we haven't restarted with yet. we also don't want to modify // the objects in use by RGWRados RGWRealm realm(period.get_realm()); - http_ret = realm.init(cct, store); + http_ret = realm.init(cct, store->svc.sysobj.get()); if (http_ret < 0) { lderr(cct) << "failed to read current realm: " << cpp_strerror(-http_ret) << dendl; @@ -113,7 +113,7 @@ void RGWOp_Period_Post::execute() } RGWPeriod current_period; - http_ret = current_period.init(cct, store, realm.get_id()); + http_ret = current_period.init(cct, store->svc.sysobj.get(), realm.get_id()); if (http_ret < 0) { lderr(cct) << "failed to read current period: " << cpp_strerror(-http_ret) << dendl; @@ -258,7 +258,7 @@ void RGWOp_Realm_Get::execute() // read realm realm.reset(new RGWRealm(id, name)); - http_ret = realm->init(g_ceph_context, store); + http_ret = realm->init(g_ceph_context, store->svc.sysobj.get()); if (http_ret < 0) lderr(store->ctx()) << "failed to read realm id=" << id << " name=" << name << dendl; diff --git a/src/rgw/rgw_service.h b/src/rgw/rgw_service.h index da8bce523cf8..7149b47a34ce 100644 --- a/src/rgw/rgw_service.h +++ b/src/rgw/rgw_service.h @@ -102,12 +102,29 @@ public: } bool find(const string& name, RGWServiceRef *svc); + /* returns existing or creates a new one */ int get_instance(RGWServiceRef& svc, const string& conf, RGWServiceInstanceRef *ref); /* returns existing or creates a new one */ + + /* returns existing or creates a new one */ + template + int get_instance(RGWServiceRef& svc, + const string& conf, + T *ref) { + RGWServiceInstanceRef r; + int ret = get_instance(svc, conf, &r); + if (ret < 0) { + return ret; + } + *ref = std::static_pointer_cast(r); + return 0; + } + + template int get_instance(const string& svc_name, const string& conf, - RGWServiceInstanceRef *ref) { + T *ref) { auto iter = services.find(svc_name); if (iter == services.end()) { return -ENOENT; @@ -117,4 +134,5 @@ public: void remove_instance(RGWServiceInstance *instance); }; + #endif diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc index 886bb316d5ca..592de20294c6 100644 --- a/src/rgw/rgw_tools.cc +++ b/src/rgw/rgw_tools.cc @@ -189,6 +189,19 @@ const char *rgw_find_mime_by_ext(string& ext) return iter->second.c_str(); } +void rgw_filter_attrset(map& unfiltered_attrset, const string& check_prefix, + map *attrset) +{ + attrset->clear(); + map::iterator iter; + for (iter = unfiltered_attrset.lower_bound(check_prefix); + iter != unfiltered_attrset.end(); ++iter) { + if (!boost::algorithm::starts_with(iter->first, check_prefix)) + break; + (*attrset)[iter->first] = iter->second; + } +} + int rgw_tools_init(CephContext *cct) { ext_mime_map = new std::map; diff --git a/src/rgw/rgw_tools.h b/src/rgw/rgw_tools.h index 1c12da8fde80..ba39a99c9983 100644 --- a/src/rgw/rgw_tools.h +++ b/src/rgw/rgw_tools.h @@ -25,8 +25,12 @@ int rgw_get_system_obj(RGWRados *rgwstore, RGWObjectCtx& obj_ctx, const rgw_pool int rgw_delete_system_obj(RGWRados *rgwstore, const rgw_pool& pool, const string& oid, RGWObjVersionTracker *objv_tracker); +const char *rgw_find_mime_by_ext(string& ext); + +void rgw_filter_attrset(map& unfiltered_attrset, const string& check_prefix, + map *attrset); + int rgw_tools_init(CephContext *cct); void rgw_tools_cleanup(); -const char *rgw_find_mime_by_ext(string& ext); #endif diff --git a/src/rgw/rgw_torrent.cc b/src/rgw/rgw_torrent.cc index 2754dda02bad..35d8e47b25b4 100644 --- a/src/rgw/rgw_torrent.cc +++ b/src/rgw/rgw_torrent.cc @@ -7,6 +7,8 @@ #include "include/str_list.h" #include "include/rados/librados.hpp" +#include "services/svc_sys_obj.h" + #define dout_subsys ceph_subsys_rgw using ceph::crypto::MD5; @@ -242,7 +244,10 @@ int seed::save_torrent_file() rgw_raw_obj raw_obj; store->obj_to_raw(s->bucket_info.placement_rule, obj, &raw_obj); - op_ret = store->omap_set(raw_obj, key, bl); + auto obj_ctx = store->svc.sysobj->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(raw_obj); + + op_ret = sysobj.omap().set(key, bl); if (op_ret < 0) { ldout(s->cct, 0) << "ERROR: failed to omap_set() op_ret = " << op_ret << dendl; diff --git a/src/rgw/services/svc_rados.cc b/src/rgw/services/svc_rados.cc index b0b4d02da8c6..9d4eb227105f 100644 --- a/src/rgw/services/svc_rados.cc +++ b/src/rgw/services/svc_rados.cc @@ -99,6 +99,36 @@ int RGWSI_RADOS::open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx) return init_ioctx(cct, get_rados_handle(), pool, io_ctx, create); } +int RGWSI_RADOS::pool_iterate(librados::IoCtx& io_ctx, + librados::NObjectIterator& iter, + uint32_t num, vector& objs, + RGWAccessListFilter *filter, + bool *is_truncated) +{ + if (iter == io_ctx.nobjects_end()) + return -ENOENT; + + uint32_t i; + + for (i = 0; i < num && iter != io_ctx.nobjects_end(); ++i, ++iter) { + rgw_bucket_dir_entry e; + + string oid = iter->get_oid(); + ldout(cct, 20) << "RGWRados::pool_iterate: got " << oid << dendl; + + // fill it in with initial values; we may correct later + if (filter && !filter->filter(oid, oid)) + continue; + + e.key = oid; + objs.push_back(e); + } + + if (is_truncated) + *is_truncated = (iter != io_ctx.nobjects_end()); + + return objs.size(); +} void RGWSI_RADOS::Obj::init(const rgw_raw_obj& obj) { ref.oid = obj.oid; @@ -109,8 +139,9 @@ void RGWSI_RADOS::Obj::init(const rgw_raw_obj& obj) int RGWSI_RADOS::Obj::open() { int r = rados_svc->open_pool_ctx(ref.pool, ref.ioctx); - if (r < 0) + if (r < 0) { return r; + } ref.ioctx.locator_set_key(ref.key); @@ -136,3 +167,142 @@ uint64_t RGWSI_RADOS::Obj::get_last_version() { return ref.ioctx.get_last_version(); } + +int RGWSI_RADOS::Pool::create(const vector& pools, vector *retcodes) +{ + vector completions; + vector rets; + + librados::Rados *rad = rados_svc->get_rados_handle(); + for (auto iter = pools.begin(); iter != pools.end(); ++iter) { + librados::PoolAsyncCompletion *c = librados::Rados::pool_async_create_completion(); + completions.push_back(c); + auto& pool = *iter; + int ret = rad->pool_create_async(pool.name.c_str(), c); + rets.push_back(ret); + } + + vector::iterator riter; + vector::iterator citer; + + bool error = false; + assert(rets.size() == completions.size()); + for (riter = rets.begin(), citer = completions.begin(); riter != rets.end(); ++riter, ++citer) { + int r = *riter; + librados::PoolAsyncCompletion *c = *citer; + if (r == 0) { + c->wait(); + r = c->get_return_value(); + if (r < 0) { + ldout(rados_svc->cct, 0) << "WARNING: async pool_create returned " << r << dendl; + error = true; + } + } + c->release(); + retcodes->push_back(r); + } + if (error) { + return 0; + } + + std::vector io_ctxs; + retcodes->clear(); + for (auto pool : pools) { + io_ctxs.emplace_back(); + int ret = rad->ioctx_create(pool.name.c_str(), io_ctxs.back()); + if (ret < 0) { + ldout(rados_svc->cct, 0) << "WARNING: ioctx_create returned " << ret << dendl; + error = true; + } + retcodes->push_back(ret); + } + if (error) { + return 0; + } + + completions.clear(); + for (auto &io_ctx : io_ctxs) { + librados::PoolAsyncCompletion *c = + librados::Rados::pool_async_create_completion(); + completions.push_back(c); + int ret = io_ctx.application_enable_async(pg_pool_t::APPLICATION_NAME_RGW, + false, c); + assert(ret == 0); + } + + retcodes->clear(); + for (auto c : completions) { + c->wait(); + int ret = c->get_return_value(); + if (ret == -EOPNOTSUPP) { + ret = 0; + } else if (ret < 0) { + ldout(rados_svc->cct, 0) << "WARNING: async application_enable returned " << ret + << dendl; + error = true; + } + c->release(); + retcodes->push_back(ret); + } + return 0; +} + +int RGWSI_RADOS::Pool::lookup(const rgw_pool& pool) +{ + librados::Rados *rad = rados_svc->get_rados_handle(); + int ret = rad->pool_lookup(pool.name.c_str()); + if (ret < 0) { + return ret; + } + + return 0; +} + +int RGWSI_RADOS::Pool::List::init(const string& marker, RGWAccessListFilter *filter) +{ + if (ctx.initialized) { + return -EINVAL; + } + + int r = pool.rados_svc->open_pool_ctx(pool.pool, ctx.ioctx); + if (r < 0) { + return r; + } + + librados::ObjectCursor oc; + if (!oc.from_str(marker)) { + ldout(pool.rados_svc->cct, 10) << "failed to parse cursor: " << marker << dendl; + return -EINVAL; + } + + ctx.iter = ctx.ioctx.nobjects_begin(oc); + ctx.filter = filter; + ctx.initialized = true; + + return 0; +} + +int RGWSI_RADOS::Pool::List::get_next(int max, + std::list *oids, + bool *is_truncated) +{ + if (!ctx.initialized) { + return -EINVAL; + } + vector objs; + int r = pool.rados_svc->pool_iterate(ctx.ioctx, ctx.iter, max, objs, ctx.filter, is_truncated); + if (r < 0) { + if(r != -ENOENT) { + ldout(pool.rados_svc->cct, 10) << "failed to list objects pool_iterate returned r=" << r << dendl; + } + return r; + } + + vector::iterator iter; + for (auto& o : objs) { + oids->push_back(o.key.name); + } + + return oids->size(); +} + diff --git a/src/rgw/services/svc_rados.h b/src/rgw/services/svc_rados.h index a49f78beb660..7cafb7869012 100644 --- a/src/rgw/services/svc_rados.h +++ b/src/rgw/services/svc_rados.h @@ -6,6 +6,21 @@ #include "include/rados/librados.hpp" +class RGWAccessListFilter { +public: + virtual ~RGWAccessListFilter() {} + virtual bool filter(string& name, string& key) = 0; +}; + +struct RGWAccessListFilterPrefix : public RGWAccessListFilter { + string prefix; + + explicit RGWAccessListFilterPrefix(const string& _prefix) : prefix(_prefix) {} + bool filter(string& name, string& key) override { + return (prefix.compare(key.substr(0, prefix.size())) == 0); + } +}; + class RGWS_RADOS : public RGWService { std::vector get_deps(); @@ -33,6 +48,12 @@ class RGWSI_RADOS : public RGWServiceInstance librados::Rados* get_rados_handle(); int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx); + int pool_iterate(librados::IoCtx& ioctx, + librados::NObjectIterator& iter, + uint32_t num, vector& objs, + RGWAccessListFilter *filter, + bool *is_truncated); + public: RGWSI_RADOS(RGWService *svc, CephContext *cct): RGWServiceInstance(svc, cct), handle_lock("rados_handle_lock") {} @@ -59,6 +80,12 @@ public: Obj(Obj&& o) : rados_svc(o.rados_svc), ref(std::move(o.ref)) {} + Obj& operator=(Obj&& o) { + rados_svc = o.rados_svc; + ref = std::move(o.ref); + return *this; + } + int open(); int operate(librados::ObjectWriteOperation *op); @@ -68,10 +95,64 @@ public: uint64_t get_last_version(); }; + class Pool { + friend class RGWSI_RADOS; + + RGWSI_RADOS *rados_svc{nullptr}; + rgw_pool pool; + + Pool(RGWSI_RADOS *_rados_svc, const rgw_pool& _pool) : rados_svc(_rados_svc), + pool(_pool) {} + + Pool(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {} + public: + Pool() {} + Pool(const Pool& p) : rados_svc(p.rados_svc), + pool(p.pool) {} + + int create(const std::vector& pools, std::vector *retcodes); + int lookup(const rgw_pool& pool); + + struct List { + Pool& pool; + + struct Ctx { + bool initialized{false}; + librados::IoCtx ioctx; + librados::NObjectIterator iter; + RGWAccessListFilter *filter{nullptr}; + } ctx; + + List(Pool& _pool) : pool(_pool) {} + + int init(const string& marker, RGWAccessListFilter *filter = nullptr); + int get_next(int max, + std::list *oids, + bool *is_truncated); + }; + + List op() { + return List(*this); + } + + friend class List; + }; + Obj obj(const rgw_raw_obj& o) { return Obj(this, o); } + Pool pool() { + return Pool(this); + } + + Pool pool(const rgw_pool& p) { + return Pool(this, p); + } + + friend class Obj; + friend class Pool; + friend class Pool::List; }; #endif diff --git a/src/rgw/services/svc_sys_obj.cc b/src/rgw/services/svc_sys_obj.cc index e0bd58567193..53e19dde39f7 100644 --- a/src/rgw/services/svc_sys_obj.cc +++ b/src/rgw/services/svc_sys_obj.cc @@ -1,5 +1,20 @@ #include "svc_sys_obj.h" #include "svc_rados.h" +#include "svc_zone.h" + +#include "rgw/rgw_tools.h" + +#define dout_subsys ceph_subsys_rgw + +RGWSysObjectCtx&& RGWSI_SysObj::init_obj_ctx() +{ + return std::move(RGWSysObjectCtx(this)); +} + +RGWSI_SysObj::Obj&& RGWSI_SysObj::get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj) +{ + return std::move(Obj(this, obj_ctx, obj)); +} int RGWS_SysObj::create_instance(const string& conf, RGWServiceInstanceRef *instance) { @@ -14,8 +29,8 @@ std::map RGWSI_SysObj::get_deps() RGWServiceInstance::dependency dep2 = { .name = "zone", .conf = "{}" }; map deps; - deps["rados_dep"] = dep1 - deps["zone_dep"] = dep2 + deps["rados_dep"] = dep1; + deps["zone_dep"] = dep2; return deps; } @@ -32,11 +47,11 @@ int RGWSI_SysObj::load(const string& conf, std::mapcanonicalize_raw_obj(&obj); - *pobj = rados_svc->obj(obj); + *pobj = std::move(rados_svc->obj(obj)); int r = pobj->open(); if (r < 0) { return r; @@ -60,7 +75,7 @@ int RGWSI_SysObj::get_system_obj_state_impl(RGWSysObjectCtx *rctx, rgw_raw_obj& s->obj = obj; - int r = raw_stat(obj, &s->size, &s->mtime, &s->epoch, &s->attrset, (s->prefetch_data ? &s->data : NULL), objv_tracker); + int r = raw_stat(obj, &s->size, &s->mtime, &s->epoch, &s->attrset, (s->prefetch_data ? &s->data : nullptr), objv_tracker); if (r == -ENOENT) { s->exists = false; s->has_attrs = true; @@ -98,8 +113,8 @@ int RGWSI_SysObj::raw_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, map *attrs, bufferlist *first_chunk, RGWObjVersionTracker *objv_tracker) { - RGWSI_Rados::Obj rados_obj; - int r = get_rados_obj(obj, &rados_obj); + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); if (r < 0) { return r; } @@ -113,13 +128,13 @@ int RGWSI_SysObj::raw_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, objv_tracker->prepare_op_for_read(&op); } if (attrs) { - op.getxattrs(&unfiltered_attrset, NULL); + op.getxattrs(&unfiltered_attrset, nullptr); } if (psize || pmtime) { - op.stat2(&size, &mtime_ts, NULL); + op.stat2(&size, &mtime_ts, nullptr); } if (first_chunk) { - op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, NULL); + op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, nullptr); } bufferlist outbl; r = rados_obj.operate(&op, &outbl); @@ -136,21 +151,21 @@ int RGWSI_SysObj::raw_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, if (pmtime) *pmtime = ceph::real_clock::from_timespec(mtime_ts); if (attrs) { - filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs); + rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs); } return 0; } int RGWSI_SysObj::stat(RGWSysObjectCtx& obj_ctx, - RGWSI_SysObj::SystemObject::Read::GetObjState& state, + RGWSI_SysObj::Obj::ROp::GetObjState& state, rgw_raw_obj& obj, map *attrs, real_time *lastmod, uint64_t *obj_size, RGWObjVersionTracker *objv_tracker) { - RGWSysObjState *astate = NULL; + RGWSysObjState *astate = nullptr; int r = get_system_obj_state(&obj_ctx, obj, &astate, objv_tracker); if (r < 0) @@ -178,13 +193,13 @@ int RGWSI_SysObj::stat(RGWSysObjectCtx& obj_ctx, return 0; } -int RGWSI_SysObj::read_obj(RGWObjectCtx& obj_ctx, - Obj::Read::GetObjState& read_state, - RGWObjVersionTracker *objv_tracker, - rgw_raw_obj& obj, - bufferlist *bl, off_t ofs, off_t end, - map *attrs, - boost::optional) +int RGWSI_SysObj::read(RGWSysObjectCtx& obj_ctx, + Obj::ROp::GetObjState& read_state, + RGWObjVersionTracker *objv_tracker, + rgw_raw_obj& obj, + bufferlist *bl, off_t ofs, off_t end, + map *attrs, + boost::optional) { uint64_t len; librados::ObjectReadOperation op; @@ -199,19 +214,19 @@ int RGWSI_SysObj::read_obj(RGWObjectCtx& obj_ctx, } ldout(cct, 20) << "rados->read ofs=" << ofs << " len=" << len << dendl; - op.read(ofs, len, bl, NULL); + op.read(ofs, len, bl, nullptr); if (attrs) { - op.getxattrs(attrs, NULL); + op.getxattrs(attrs, nullptr); } - RGWSI_Rados::Obj rados_obj; - int r = get_rados_obj(obj, &rados_obj); + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); if (r < 0) { ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; return r; } - r = rados_obj.operate(&op, NULL); + r = rados_obj.operate(&op, nullptr); if (r < 0) { ldout(cct, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl; return r; @@ -228,7 +243,7 @@ int RGWSI_SysObj::read_obj(RGWObjectCtx& obj_ctx, read_state.last_ver = op_ver; - return bl.length(); + return bl->length(); } /** @@ -238,10 +253,12 @@ int RGWSI_SysObj::read_obj(RGWObjectCtx& obj_ctx, * dest: bufferlist to store the result in * Returns: 0 on success, -ERR# otherwise. */ -int RGWSI_SysObj::get_attr(rgw_raw_obj& obj, std::string_view name, bufferlist *dest) +int RGWSI_SysObj::get_attr(rgw_raw_obj& obj, + const char *name, + bufferlist *dest) { - RGWSI_Rados::Obj rados_obj; - int r = get_rados_obj(obj, &rados_obj); + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); if (r < 0) { ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; return r; @@ -259,15 +276,239 @@ int RGWSI_SysObj::get_attr(rgw_raw_obj& obj, std::string_view name, bufferlist * return 0; } +int RGWSI_SysObj::omap_get_vals(rgw_raw_obj& obj, + const string& marker, + uint64_t count, + std::map *m, + bool *pmore) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); + if (r < 0) { + ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + string start_after = marker; + bool more; + + do { + librados::ObjectReadOperation op; + + std::map t; + int rval; + op.omap_get_vals2(start_after, count, &t, &more, &rval); + + r = rados_obj.operate(&op, nullptr); + if (r < 0) { + return r; + } + if (t.empty()) { + break; + } + count -= t.size(); + start_after = t.rbegin()->first; + m->insert(t.begin(), t.end()); + } while (more && count > 0); + + if (pmore) { + *pmore = more; + } + return 0; +} + +int RGWSI_SysObj::omap_get_all(rgw_raw_obj& obj, std::map *m) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); + if (r < 0) { + ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + +#define MAX_OMAP_GET_ENTRIES 1024 + const int count = MAX_OMAP_GET_ENTRIES; + string start_after; + bool more; + + do { + librados::ObjectReadOperation op; + + std::map t; + int rval; + op.omap_get_vals2(start_after, count, &t, &more, &rval); + + r = rados_obj.operate(&op, nullptr); + if (r < 0) { + return r; + } + if (t.empty()) { + break; + } + start_after = t.rbegin()->first; + m->insert(t.begin(), t.end()); + } while (more); + return 0; +} + +int RGWSI_SysObj::omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); + if (r < 0) { + ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + ldout(cct, 15) << "omap_set obj=" << obj << " key=" << key << dendl; + + map m; + m[key] = bl; + librados::ObjectWriteOperation op; + if (must_exist) + op.assert_exists(); + op.omap_set(m); + r = rados_obj.operate(&op); + return r; +} + +int RGWSI_SysObj::omap_set(rgw_raw_obj& obj, const std::map& m, bool must_exist) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); + if (r < 0) { + ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + if (must_exist) + op.assert_exists(); + op.omap_set(m); + r = rados_obj.operate(&op); + return r; +} + +int RGWSI_SysObj::omap_del(rgw_raw_obj& obj, const std::string& key) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); + if (r < 0) { + ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + set k; + k.insert(key); + + librados::ObjectWriteOperation op; + + op.omap_rm_keys(k); + + r = rados_obj.operate(&op); + return r; +} + +int RGWSI_SysObj::remove(RGWSysObjectCtx& obj_ctx, + RGWObjVersionTracker *objv_tracker, + rgw_raw_obj& obj) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); + if (r < 0) { + ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } + + op.remove(); + r = rados_obj.operate(&op); + if (r < 0) + return r; + + return 0; +} + +int RGWSI_SysObj::write(rgw_raw_obj& obj, + real_time *pmtime, + map& attrs, + bool exclusive, + const bufferlist& data, + RGWObjVersionTracker *objv_tracker, + real_time set_mtime) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(zone_svc.get(), obj, &rados_obj); + if (r < 0) { + ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + + if (exclusive) { + op.create(true); // exclusive create + } else { + op.remove(); + op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK); + op.create(false); + } + + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } + + if (real_clock::is_zero(set_mtime)) { + set_mtime = real_clock::now(); + } + + struct timespec mtime_ts = real_clock::to_timespec(set_mtime); + op.mtime2(&mtime_ts); + op.write_full(data); + + bufferlist acl_bl; + + for (map::iterator iter = attrs.begin(); iter != attrs.end(); ++iter) { + const string& name = iter->first; + bufferlist& bl = iter->second; + + if (!bl.length()) + continue; + + op.setxattr(name.c_str(), bl); + } + + r = rados_obj.operate(&op); + if (r < 0) { + return r; + } + + if (objv_tracker) { + objv_tracker->apply_write(); + } + + if (pmtime) { + *pmtime = set_mtime; + } + + return 0; +} + + void RGWSI_SysObj::Obj::invalidate_state() { ctx.invalidate(obj); } -int RGWSI_SysObj::Obj::Read::GetObjState::get_rados_obj(RGWSI_RADOS *rados_svc, - RGWSI_Zone *zone_svc, - rgw_raw_obj& obj, - RGWSI_Rados::Obj **pobj) +int RGWSI_SysObj::Obj::ROp::GetObjState::get_rados_obj(RGWSI_RADOS *rados_svc, + RGWSI_Zone *zone_svc, + rgw_raw_obj& obj, + RGWSI_RADOS::Obj **pobj) { if (!has_rados_obj) { zone_svc->canonicalize_raw_obj(&obj); @@ -283,30 +524,29 @@ int RGWSI_SysObj::Obj::Read::GetObjState::get_rados_obj(RGWSI_RADOS *rados_svc, return 0; } -int RGWSI_SysObj::Obj::Read::stat() +int RGWSI_SysObj::Obj::ROp::stat() { RGWSI_SysObj *svc = source.sysobj_svc; rgw_raw_obj& obj = source.obj; - return sysobj_svc->stat(source.ctx(), state, obj, stat_params.attrs, - stat_params.lastmod, stat_params.obj_size, - stat_params.objv_tracker); + return svc->stat(source.get_ctx(), state, obj, attrs, + lastmod, obj_size, + objv_tracker); } - -int RGWSI_SysObj::Obj::Read::read(int64_t ofs, int64_t end, bufferlist *bl) +int RGWSI_SysObj::Obj::ROp::read(int64_t ofs, int64_t end, bufferlist *bl) { RGWSI_SysObj *svc = source.sysobj_svc; rgw_raw_obj& obj = source.get_obj(); return svc->read(source.get_ctx(), state, - read_params.objv_tracker, + objv_tracker, obj, bl, ofs, end, - read_params.attrs, + attrs, refresh_version); } -int RGWSI_SysObj::Obj::Read::get_attr(std::string_view name, bufferlist *dest) +int RGWSI_SysObj::Obj::ROp::get_attr(const char *name, bufferlist *dest) { RGWSI_SysObj *svc = source.sysobj_svc; rgw_raw_obj& obj = source.get_obj(); @@ -314,3 +554,97 @@ int RGWSI_SysObj::Obj::Read::get_attr(std::string_view name, bufferlist *dest) return svc->get_attr(obj, name, dest); } +int RGWSI_SysObj::Obj::WOp::remove() +{ + RGWSI_SysObj *svc = source.sysobj_svc; + rgw_raw_obj& obj = source.get_obj(); + + return svc->remove(source.get_ctx(), + objv_tracker, + obj); +} + +int RGWSI_SysObj::Obj::WOp::write(bufferlist& bl) +{ + RGWSI_SysObj *svc = source.sysobj_svc; + rgw_raw_obj& obj = source.get_obj(); + + return svc->write(obj, pmtime, attrs, exclusive, + bl, objv_tracker, mtime); +} + +int RGWSI_SysObj::Pool::Op::list_prefixed_objs(const string& prefix, list *result) +{ + bool is_truncated; + + auto rados_svc = source.get_rados_svc(); + auto rados_pool = rados_svc->pool(source.pool); + + auto op = rados_pool.op(); + + RGWAccessListFilterPrefix filter(prefix); + + int r = op.init(string(), &filter); + if (r < 0) { + return r; + } + + do { + list oids; +#define MAX_OBJS_DEFAULT 1000 + int r = op.get_next(MAX_OBJS_DEFAULT, &oids, &is_truncated); + if (r < 0) { + return r; + } + for (auto& val : oids) { + if (val.size() > prefix.size()) { + result->push_back(val.substr(prefix.size())); + } + } + } while (is_truncated); + + return 0; +} + +int RGWSI_SysObj::Obj::OmapOp::get_all(std::map *m) +{ + RGWSI_SysObj *svc = source.sysobj_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_get_all(obj, m); +} + +int RGWSI_SysObj::Obj::OmapOp::get_vals(const string& marker, + uint64_t count, + std::map *m, + bool *pmore) +{ + RGWSI_SysObj *svc = source.sysobj_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_get_vals(obj, marker, count, m, pmore); +} + +int RGWSI_SysObj::Obj::OmapOp::set(const std::string& key, bufferlist& bl) +{ + RGWSI_SysObj *svc = source.sysobj_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_set(obj, key, bl, must_exist); +} + +int RGWSI_SysObj::Obj::OmapOp::set(const map& m) +{ + RGWSI_SysObj *svc = source.sysobj_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_set(obj, m, must_exist); +} + +int RGWSI_SysObj::Obj::OmapOp::del(const std::string& key) +{ + RGWSI_SysObj *svc = source.sysobj_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_del(obj, key); +} diff --git a/src/rgw/services/svc_sys_obj.h b/src/rgw/services/svc_sys_obj.h index 1a38cc268f49..53d93cc4f90e 100644 --- a/src/rgw/services/svc_sys_obj.h +++ b/src/rgw/services/svc_sys_obj.h @@ -49,67 +49,7 @@ struct RGWSysObjState { }; template -class RGWSysObjectCtxImpl { - RGWSI_SysObj *sysobj_svc; - std::map objs_state; - RWLock lock; - -public: - explicit RGWSysObjectCtxImpl(RGWSI_SysObj *_sysobj_svc) : sysobj_svc(_sysobj_svc), lock("RGWSysObjectCtxImpl") {} - - RGWSysObjectCtxImpl(const RGWSysObjectCtxImpl& rhs) : sysobj_svc(rhs.sysobj_svc), - objs_state(rhs.objs_state), - lock("RGWSysObjectCtxImpl") {} - RGWSysObjectCtxImpl(const RGWSysObjectCtxImpl&& rhs) : sysobj_svc(rhs.sysobj_svc), - objs_state(std::move(rhs.objs_state)), - lock("RGWSysObjectCtxImpl") {} - - S *get_state(const T& obj) { - S *result; - typename std::map::iterator iter; - lock.get_read(); - assert (!obj.empty()); - iter = objs_state.find(obj); - if (iter != objs_state.end()) { - result = &iter->second; - lock.unlock(); - } else { - lock.unlock(); - lock.get_write(); - result = &objs_state[obj]; - lock.unlock(); - } - return result; - } - - void set_atomic(T& obj) { - RWLock::WLocker wl(lock); - assert (!obj.empty()); - objs_state[obj].is_atomic = true; - } - void set_prefetch_data(T& obj) { - RWLock::WLocker wl(lock); - assert (!obj.empty()); - objs_state[obj].prefetch_data = true; - } - void invalidate(T& obj) { - RWLock::WLocker wl(lock); - auto iter = objs_state.find(obj); - if (iter == objs_state.end()) { - return; - } - bool is_atomic = iter->second.is_atomic; - bool prefetch_data = iter->second.prefetch_data; - - objs_state.erase(iter); - - if (is_atomic || prefetch_data) { - auto& s = objs_state[obj]; - s.is_atomic = is_atomic; - s.prefetch_data = prefetch_data; - } - } -}; +class RGWSysObjectCtxImpl; using RGWSysObjectCtx = RGWSysObjectCtxImpl; @@ -125,7 +65,7 @@ class RGWSI_SysObj : public RGWServiceInstance { public: class Obj { - friend class Read; + friend class ROp; RGWSI_SysObj *sysobj_svc; RGWSysObjectCtx& ctx; @@ -150,7 +90,7 @@ public: return obj; } - struct Read { + struct ROp { Obj& source; struct GetObjState { @@ -160,61 +100,152 @@ public: GetObjState() {} - int get_rados_obj(RGWSI_SysObj *sysobj_svc, rgw_raw_obj& obj, RGWSI_RADOS::Obj **pobj); + int get_rados_obj(RGWSI_RADOS *rados_svc, + RGWSI_Zone *zone_svc, + rgw_raw_obj& obj, + RGWSI_RADOS::Obj **pobj); } state; - struct StatParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - ceph::real_time *lastmod{nullptr}; - uint64_t *obj_size{nullptr}; - map *attrs{nullptr}; - - StatParams& set_last_mod(ceph::real_time *_lastmod) { - lastmod = _lastmod; - return *this; - } - StatParams& set_obj_size(uint64_t *_obj_size) { - obj_size = _obj_size; - return *this; - } - StatParams& set_attrs(map *_attrs) { - attrs = _attrs; - return *this; - } - } stat_params; - - struct ReadParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - map *attrs{nullptr}; - boost::optional refresh_version{boost::none}; - - ReadParams& set_attrs(map *_attrs) { - attrs = _attrs; - return *this; - } - ReadParams& set_obj_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - ReadParams& set_refresh_version(const obj_version& rf) { - refresh_version = rf; - return *this; - } - } read_params; - - Read(Obj& _source) : source(_source) {} + RGWObjVersionTracker *objv_tracker{nullptr}; + map *attrs{nullptr}; + boost::optional refresh_version{boost::none}; + ceph::real_time *lastmod{nullptr}; + uint64_t *obj_size{nullptr}; + + ROp& set_last_mod(ceph::real_time *_lastmod) { + lastmod = _lastmod; + return *this; + } + + ROp& set_obj_size(uint64_t *_obj_size) { + obj_size = _obj_size; + return *this; + } + + ROp& set_attrs(map *_attrs) { + attrs = _attrs; + return *this; + } + + ROp& set_refresh_version(const obj_version& rf) { + refresh_version = rf; + return *this; + } + + ROp(Obj& _source) : source(_source) {} int stat(); int read(int64_t ofs, int64_t end, bufferlist *pbl); int read(bufferlist *pbl) { return read(0, -1, pbl); } - int get_attr(std::string_view name, bufferlist *dest); + int get_attr(const char *name, bufferlist *dest); }; + + struct WOp { + Obj& source; + + RGWObjVersionTracker *objv_tracker{nullptr}; + map attrs; + ceph::real_time mtime; + ceph::real_time *pmtime; + bool exclusive{false}; + + WOp& set_attrs(map&& _attrs) { + attrs = _attrs; + return *this; + } + + WOp& set_mtime(const ceph::real_time& _mtime) { + mtime = _mtime; + return *this; + } + + WOp& set_pmtime(ceph::real_time *_pmtime) { + pmtime = _pmtime; + return *this; + } + + WOp& set_exclusive(bool _exclusive = true) { + exclusive = _exclusive; + return *this; + } + + WOp(Obj& _source) : source(_source) {} + + int remove(); + int write(bufferlist& bl); + }; + + struct OmapOp { + Obj& source; + + bool must_exist{false}; + + OmapOp& set_must_exist(bool _must_exist = true) { + must_exist = _must_exist; + return *this; + } + + OmapOp(Obj& _source) : source(_source) {} + + int get_all(std::map *m); + int get_vals(const string& marker, + uint64_t count, + std::map *m, + bool *pmore); + int set(const std::string& key, bufferlist& bl); + int set(const map& m); + int del(const std::string& key); + }; + ROp rop() { + return ROp(*this); + } + + WOp wop() { + return WOp(*this); + } + + OmapOp omap() { + return OmapOp(*this); + } + }; + + class Pool { + friend class Op; + + RGWSI_SysObj *sysobj_svc; + rgw_pool pool; + + RGWSI_RADOS *get_rados_svc(); + + public: + Pool(RGWSI_SysObj *_sysobj_svc, + const rgw_pool& _pool) : sysobj_svc(_sysobj_svc), + pool(_pool) {} + + rgw_pool& get_pool() { + return pool; + } + + struct Op { + Pool& source; + + Op(Pool& _source) : source(_source) {} + + int list_prefixed_objs(const std::string& prefix, std::list *result); + }; + + Op op() { + return Op(*this); + } }; friend class Obj; - friend class Obj::Read; + friend class Obj::ROp; + friend class Obj::WOp; + friend class Pool; + friend class Pool::Op; private: std::shared_ptr rados_svc; @@ -233,7 +264,7 @@ private: RGWObjVersionTracker *objv_tracker); int stat(RGWSysObjectCtx& obj_ctx, - RGWSI_SysObj::Obj::Read::GetObjState& state, + RGWSI_SysObj::Obj::ROp::GetObjState& state, rgw_raw_obj& obj, map *attrs, real_time *lastmod, @@ -241,29 +272,106 @@ private: RGWObjVersionTracker *objv_tracker); int read(RGWSysObjectCtx& obj_ctx, - Obj::Read::GetObjState& read_state, + Obj::ROp::GetObjState& read_state, RGWObjVersionTracker *objv_tracker, rgw_raw_obj& obj, bufferlist *bl, off_t ofs, off_t end, map *attrs, boost::optional); - int get_attr(rgw_raw_obj& obj, std::string_view name, bufferlist *dest); - + int get_attr(rgw_raw_obj& obj, const char *name, bufferlist *dest); + + int omap_get_all(rgw_raw_obj& obj, std::map *m); + int omap_get_vals(rgw_raw_obj& obj, + const string& marker, + uint64_t count, + std::map *m, + bool *pmore); + int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist = false); + int omap_set(rgw_raw_obj& obj, const map& m, bool must_exist = false); + int omap_del(rgw_raw_obj& obj, const std::string& key); + + int remove(RGWSysObjectCtx& obj_ctx, + RGWObjVersionTracker *objv_tracker, + rgw_raw_obj& obj); + + int write(rgw_raw_obj& obj, + real_time *pmtime, + map& attrs, + bool exclusive, + const bufferlist& data, + RGWObjVersionTracker *objv_tracker, + real_time set_mtime); public: RGWSI_SysObj(RGWService *svc, CephContext *cct): RGWServiceInstance(svc, cct) {} - RGWSysObjectCtx&& init_obj_ctx() { - return std::move(RGWSysObjectCtx(this)); - } + RGWSysObjectCtx&& init_obj_ctx(); + Obj&& get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj); - Obj&& get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj) { - return std::move(Obj(this, obj_ctx, obj)); + Pool&& get_pool(const rgw_pool& pool) { + return std::move(Pool(this, pool)); } }; using RGWSysObj = RGWSI_SysObj::Obj; +template +class RGWSysObjectCtxImpl { + RGWSI_SysObj *sysobj_svc; + std::map objs_state; + RWLock lock; + +public: + explicit RGWSysObjectCtxImpl(RGWSI_SysObj *_sysobj_svc) : sysobj_svc(_sysobj_svc), lock("RGWSysObjectCtxImpl") {} + + RGWSysObjectCtxImpl(const RGWSysObjectCtxImpl& rhs) : sysobj_svc(rhs.sysobj_svc), + objs_state(rhs.objs_state), + lock("RGWSysObjectCtxImpl") {} + RGWSysObjectCtxImpl(const RGWSysObjectCtxImpl&& rhs) : sysobj_svc(rhs.sysobj_svc), + objs_state(std::move(rhs.objs_state)), + lock("RGWSysObjectCtxImpl") {} + + S *get_state(const T& obj) { + S *result; + typename std::map::iterator iter; + lock.get_read(); + assert (!obj.empty()); + iter = objs_state.find(obj); + if (iter != objs_state.end()) { + result = &iter->second; + lock.unlock(); + } else { + lock.unlock(); + lock.get_write(); + result = &objs_state[obj]; + lock.unlock(); + } + return result; + } + + void set_atomic(T& obj) { + RWLock::WLocker wl(lock); + assert (!obj.empty()); + } + void set_prefetch_data(T& obj) { + RWLock::WLocker wl(lock); + assert (!obj.empty()); + objs_state[obj].prefetch_data = true; + } + void invalidate(T& obj) { + RWLock::WLocker wl(lock); + auto iter = objs_state.find(obj); + if (iter == objs_state.end()) { + return; + } + objs_state.erase(iter); + } + + RGWSI_SysObj::Obj&& get_obj(const rgw_raw_obj& obj) { + return sysobj_svc->get_obj(*this, obj); + } +}; + #endif diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc index ab9f63031f3d..69e857c5bbbd 100644 --- a/src/rgw/services/svc_zone.cc +++ b/src/rgw/services/svc_zone.cc @@ -6,6 +6,7 @@ #include "rgw/rgw_rest_conn.h" #include "common/errno.h" +#include "include/random.h" #define dout_subsys ceph_subsys_rgw @@ -21,6 +22,8 @@ static string RGW_DEFAULT_ZONE_ROOT_POOL = "rgw.root"; static string RGW_DEFAULT_ZONEGROUP_ROOT_POOL = "rgw.root"; static string RGW_DEFAULT_REALM_ROOT_POOL = "rgw.root"; static string RGW_DEFAULT_PERIOD_ROOT_POOL = "rgw.root"; +static string avail_pools = ".pools.avail"; +static string default_storage_pool_suffix = "rgw.buckets.data"; int RGWS_Zone::create_instance(const string& conf, RGWServiceInstanceRef *instance) { @@ -30,21 +33,20 @@ int RGWS_Zone::create_instance(const string& conf, RGWServiceInstanceRef *instan std::map RGWSI_Zone::get_deps() { - RGWServiceInstance::dependency dep1 = { .name = "rados", - .conf = "{}" }; - RGWServiceInstance::dependency dep2 = { .name = "sys_obj", + RGWServiceInstance::dependency dep1 = { .name = "sys_obj", .conf = "{}" }; map deps; - deps["rados_dep"] = dep1; - deps["sys_obj_dep"] = dep2; + deps["sys_obj_dep"] = dep1; + + RGWServiceInstance::dependency dep2 = { .name = "rados_obj", + .conf = "{}" }; + map deps2; + deps["rados_dep"] = dep2; return deps; } int RGWSI_Zone::load(const string& conf, std::map& dep_refs) { - rados_svc = static_pointer_cast(dep_refs["rados_dep"]); - assert(rados_svc); - sysobj_svc = static_pointer_cast(dep_refs["sys_obj_dep"]); assert(sysobj_svc); @@ -54,6 +56,9 @@ int RGWSI_Zone::load(const string& conf, std::map(); current_period = make_shared(); + rados_svc = static_pointer_cast(dep_refs["rados_dep"]); + assert(rados_svc); + return 0; } @@ -148,6 +153,44 @@ int RGWSI_Zone::init() writeable_zone = (zone_public_config->tier_type.empty() || zone_public_config->tier_type == "rgw"); + /* first build all zones index */ + for (auto ziter : zonegroup->zones) { + const string& id = ziter.first; + RGWZone& z = ziter.second; + zone_id_by_name[z.name] = id; + zone_by_id[id] = z; + } + + if (zone_by_id.find(zone_id()) == zone_by_id.end()) { + ldout(cct, 0) << "WARNING: could not find zone config in zonegroup for local zone (" << zone_id() << "), will use defaults" << dendl; + } + *zone_public_config = zone_by_id[zone_id()]; + for (auto ziter : get_zonegroup().zones) { + const string& id = ziter.first; + RGWZone& z = ziter.second; + if (id == zone_id()) { + continue; + } + if (z.endpoints.empty()) { + ldout(cct, 0) << "WARNING: can't generate connection for zone " << z.id << " id " << z.name << ": no endpoints defined" << dendl; + continue; + } + ldout(cct, 20) << "generating connection object for zone " << z.name << " id " << z.id << dendl; + RGWRESTConn *conn = new RGWRESTConn(cct, this, z.id, z.endpoints); + zone_conn_map[id] = conn; + if (zone_syncs_from(*zone_public_config, z) || + zone_syncs_from(z, *zone_public_config)) { + if (zone_syncs_from(*zone_public_config, z)) { + zone_data_sync_from_map[id] = conn; + } + if (zone_syncs_from(z, *zone_public_config)) { + zone_data_notify_to_map[id] = conn; + } + } else { + ldout(cct, 20) << "NOTICE: not syncing to/from zone " << z.name << " id " << z.id << dendl; + } + } + return 0; } @@ -170,35 +213,41 @@ void RGWSI_Zone::shutdown() int RGWSI_Zone::list_regions(list& regions) { RGWZoneGroup zonegroup; + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zonegroup.get_pool(cct)); - return list_raw_prefixed_objs(zonegroup.get_pool(cct), region_info_oid_prefix, regions); + return syspool.op().list_prefixed_objs(region_info_oid_prefix, ®ions); } int RGWSI_Zone::list_zonegroups(list& zonegroups) { RGWZoneGroup zonegroup; + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zonegroup.get_pool(cct)); - return list_raw_prefixed_objs(zonegroup.get_pool(cct), zonegroup_names_oid_prefix, zonegroups); + return syspool.op().list_prefixed_objs(zonegroup_names_oid_prefix, &zonegroups); } int RGWSI_Zone::list_zones(list& zones) { RGWZoneParams zoneparams; + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zoneparams.get_pool(cct)); - return list_raw_prefixed_objs(zoneparams.get_pool(cct), zone_names_oid_prefix, zones); + return syspool.op().list_prefixed_objs(zonegroup_names_oid_prefix, &zones); } int RGWSI_Zone::list_realms(list& realms) { RGWRealm realm(cct, sysobj_svc.get()); - return list_raw_prefixed_objs(realm.get_pool(cct), realm_names_oid_prefix, realms); + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(realm.get_pool(cct)); + + return syspool.op().list_prefixed_objs(zonegroup_names_oid_prefix, &realms); } int RGWSI_Zone::list_periods(list& periods) { RGWPeriod period; list raw_periods; - int ret = list_raw_prefixed_objs(period.get_pool(cct), period.get_info_oid_prefix(), raw_periods); + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(period.get_pool(cct)); + int ret = syspool.op().list_prefixed_objs(period.get_info_oid_prefix(), &raw_periods); if (ret < 0) { return ret; } @@ -254,9 +303,8 @@ int RGWSI_Zone::replace_region_with_zonegroup() RGWSysObjectCtx obj_ctx = sysobj_svc->init_obj_ctx(); RGWSysObj sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid)); - RGWSysObj::Read rop(sysobj); - int ret = rop.read(&bl); + int ret = sysobj.rop().read(&bl); if (ret < 0 && ret != -ENOENT) { ldout(cct, 0) << __func__ << " failed to read converted: ret "<< ret << " " << cpp_strerror(-ret) << dendl; @@ -349,12 +397,12 @@ int RGWSI_Zone::replace_region_with_zonegroup() ldout(cct, 0) << __func__ << " Error setting realm as default: " << cpp_strerror(-ret) << dendl; return ret; } - ret = realm.init(cct, this); + ret = realm->init(cct, sysobj_svc.get()); if (ret < 0) { ldout(cct, 0) << __func__ << " Error initing realm: " << cpp_strerror(-ret) << dendl; return ret; } - ret = current_period->init(cct, this, realm->get_id(), realm->get_name()); + ret = current_period->init(cct, sysobj_svc.get(), realm->get_id(), realm->get_name()); if (ret < 0) { ldout(cct, 0) << __func__ << " Error initing current period: " << cpp_strerror(-ret) << dendl; return ret; @@ -376,7 +424,7 @@ int RGWSI_Zone::replace_region_with_zonegroup() } RGWZoneGroup zonegroup(*iter); zonegroup.set_id(*iter); - int ret = zonegroup.init(cct, this, true, true); + int ret = zonegroup.init(cct, sysobj_svc.get(), true, true); if (ret < 0) { ldout(cct, 0) << __func__ << " failed init zonegroup: ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; @@ -463,7 +511,7 @@ int RGWSI_Zone::replace_region_with_zonegroup() for (auto const& iter : regions) { RGWZoneGroup zonegroup(iter); - int ret = zonegroup.init(cct, this, true, true); + int ret = zonegroup.init(cct, sysobj_svc.get(), true, true); if (ret < 0) { ldout(cct, 0) << __func__ << " failed init zonegroup" << iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; @@ -477,8 +525,9 @@ int RGWSI_Zone::replace_region_with_zonegroup() } /* mark as converted */ - ret = rgw_put_system_obj(this, pool, oid, bl, - true, NULL, real_time(), NULL); + ret = sysobj.wop() + .set_exclusive(true) + .write(bl); if (ret < 0 ) { ldout(cct, 0) << __func__ << " failed to mark cluster as converted: ret "<< ret << " " << cpp_strerror(-ret) << dendl; @@ -668,9 +717,8 @@ int RGWSI_Zone::convert_regionmap() RGWSysObjectCtx obj_ctx = sysobj_svc->init_obj_ctx(); RGWSysObj sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid)); - RGWSysObj::Read rop(sysobj); - int ret = rop.read(&bl); + int ret = sysobj.rop().read(&bl); if (ret < 0 && ret != -ENOENT) { return ret; } else if (ret == -ENOENT) { @@ -708,10 +756,9 @@ int RGWSI_Zone::convert_regionmap() current_period->set_bucket_quota(zonegroupmap.bucket_quota); // remove the region_map so we don't try to convert again - rgw_raw_obj obj(pool, oid); - ret = delete_system_obj(obj); + ret = sysobj.wop().remove(); if (ret < 0) { - ldout(cct, 0) << "Error could not remove " << obj + ldout(cct, 0) << "Error could not remove " << sysobj.get_obj() << " after upgrading to zonegroup map: " << cpp_strerror(ret) << dendl; return ret; } @@ -854,6 +901,37 @@ bool RGWSI_Zone::can_reshard() const (zonegroup->zones.size() == 1 && current_period->is_single_zonegroup()); } +/** + * Check to see if the bucket metadata could be synced + * bucket: the bucket to check + * Returns false is the bucket is not synced + */ +bool RGWSI_Zone::is_syncing_bucket_meta(const rgw_bucket& bucket) +{ + + /* no current period */ + if (current_period->get_id().empty()) { + return false; + } + + /* zonegroup is not master zonegroup */ + if (!zonegroup->is_master_zonegroup()) { + return false; + } + + /* single zonegroup and a single zone */ + if (current_period->is_single_zonegroup() && zonegroup->zones.size() == 1) { + return false; + } + + /* zone is not master */ + if (zonegroup->master_zone.compare(zone_public_config->id) != 0) { + return false; + } + + return true; +} + void RGWSI_Zone::canonicalize_raw_obj(rgw_raw_obj *obj) { @@ -862,3 +940,270 @@ void RGWSI_Zone::canonicalize_raw_obj(rgw_raw_obj *obj) obj->pool = zone_params->domain_root; } } + +int RGWSI_Zone::select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& request_rule, + string *pselected_rule_name, RGWZonePlacementInfo *rule_info) + +{ + /* first check that zonegroup exists within current period. */ + RGWZoneGroup zonegroup; + int ret = get_zonegroup(zonegroup_id, zonegroup); + if (ret < 0) { + ldout(cct, 0) << "could not find zonegroup " << zonegroup_id << " in current period" << dendl; + return ret; + } + + /* find placement rule. Hierarchy: request rule > user default rule > zonegroup default rule */ + std::map::const_iterator titer; + + if (!request_rule.empty()) { + titer = zonegroup.placement_targets.find(request_rule); + if (titer == zonegroup.placement_targets.end()) { + ldout(cct, 0) << "could not find requested placement id " << request_rule + << " within zonegroup " << dendl; + return -ERR_INVALID_LOCATION_CONSTRAINT; + } + } else if (!user_info.default_placement.empty()) { + titer = zonegroup.placement_targets.find(user_info.default_placement); + if (titer == zonegroup.placement_targets.end()) { + ldout(cct, 0) << "could not find user default placement id " << user_info.default_placement + << " within zonegroup " << dendl; + return -ERR_INVALID_LOCATION_CONSTRAINT; + } + } else { + if (zonegroup.default_placement.empty()) { // zonegroup default rule as fallback, it should not be empty. + ldout(cct, 0) << "misconfiguration, zonegroup default placement id should not be empty." << dendl; + return -ERR_ZONEGROUP_DEFAULT_PLACEMENT_MISCONFIGURATION; + } else { + titer = zonegroup.placement_targets.find(zonegroup.default_placement); + if (titer == zonegroup.placement_targets.end()) { + ldout(cct, 0) << "could not find zonegroup default placement id " << zonegroup.default_placement + << " within zonegroup " << dendl; + return -ERR_INVALID_LOCATION_CONSTRAINT; + } + } + } + + /* now check tag for the rule, whether user is permitted to use rule */ + const auto& target_rule = titer->second; + if (!target_rule.user_permitted(user_info.placement_tags)) { + ldout(cct, 0) << "user not permitted to use placement rule " << titer->first << dendl; + return -EPERM; + } + + if (pselected_rule_name) + *pselected_rule_name = titer->first; + + return select_bucket_location_by_rule(titer->first, rule_info); +} + +int RGWSI_Zone::select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info) +{ + if (location_rule.empty()) { + /* we can only reach here if we're trying to set a bucket location from a bucket + * created on a different zone, using a legacy / default pool configuration + */ + if (rule_info) { + return select_legacy_bucket_placement(rule_info); + } + + return 0; + } + + /* + * make sure that zone has this rule configured. We're + * checking it for the local zone, because that's where this bucket object is going to + * reside. + */ + map::iterator piter = get_zone_params().placement_pools.find(location_rule); + if (piter == get_zone_params().placement_pools.end()) { + /* couldn't find, means we cannot really place data for this bucket in this zone */ + if (get_zonegroup().equals(zonegroup->get_id())) { + /* that's a configuration error, zone should have that rule, as we're within the requested + * zonegroup */ + ldout(cct, 0) << "ERROR: This zone does not contain placement rule" + << location_rule << " present in the zonegroup!" << dendl; + return -EINVAL; + } else { + /* oh, well, data is not going to be placed here, bucket object is just a placeholder */ + return 0; + } + } + + RGWZonePlacementInfo& placement_info = piter->second; + + if (rule_info) { + *rule_info = placement_info; + } + + return 0; +} + +int RGWSI_Zone::select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& placement_rule, + string *pselected_rule_name, RGWZonePlacementInfo *rule_info) +{ + if (!get_zone_params().placement_pools.empty()) { + return select_new_bucket_location(user_info, zonegroup_id, placement_rule, + pselected_rule_name, rule_info); + } + + if (pselected_rule_name) { + pselected_rule_name->clear(); + } + + if (rule_info) { + return select_legacy_bucket_placement(rule_info); + } + + return 0; +} + +int RGWSI_Zone::select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info) +{ + bufferlist map_bl; + map m; + string pool_name; + bool write_map = false; + + rgw_raw_obj obj(get_zone_params().domain_root, avail_pools); + + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + + int ret = sysobj.rop().read(&map_bl); + if (ret < 0) { + goto read_omap; + } + + try { + auto iter = map_bl.cbegin(); + decode(m, iter); + } catch (buffer::error& err) { + ldout(cct, 0) << "ERROR: couldn't decode avail_pools" << dendl; + } + +read_omap: + if (m.empty()) { + ret = sysobj.omap().get_all(&m); + + write_map = true; + } + + if (ret < 0 || m.empty()) { + vector pools; + string s = string("default.") + default_storage_pool_suffix; + pools.push_back(rgw_pool(s)); + vector retcodes; + bufferlist bl; + ret = rados_svc->pool().create(pools, &retcodes); + if (ret < 0) + return ret; + ret = sysobj.omap().set(s, bl); + if (ret < 0) + return ret; + m[s] = bl; + } + + if (write_map) { + bufferlist new_bl; + encode(m, new_bl); + ret = sysobj.wop().write(new_bl); + if (ret < 0) { + ldout(cct, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl; + } + } + + auto miter = m.begin(); + if (m.size() > 1) { + // choose a pool at random + auto r = ceph::util::generate_random_number(0, m.size() - 1); + std::advance(miter, r); + } + pool_name = miter->first; + + rule_info->data_pool = pool_name; + rule_info->data_extra_pool = pool_name; + rule_info->index_pool = pool_name; + rule_info->index_type = RGWBIType_Normal; + + return 0; +} + +int RGWSI_Zone::update_placement_map() +{ + bufferlist header; + map m; + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + + int ret = sysobj.omap().get_all(&m); + if (ret < 0) + return ret; + + bufferlist new_bl; + encode(m, new_bl); + ret = sysobj.wop().write(new_bl); + if (ret < 0) { + ldout(cct, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl; + } + + return ret; +} + +int RGWSI_Zone::add_bucket_placement(const rgw_pool& new_pool) +{ + int ret = rados_svc->pool().lookup(new_pool); + if (ret < 0) { // DNE, or something + return ret; + } + + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + + bufferlist empty_bl; + ret = sysobj.omap().set(new_pool.to_str(), empty_bl); + + // don't care about return value + update_placement_map(); + + return ret; +} + +int RGWSI_Zone::remove_bucket_placement(const rgw_pool& old_pool) +{ + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + + int ret = sysobj.omap().del(old_pool.to_str()); + + // don't care about return value + update_placement_map(); + + return ret; +} + +int RGWSI_Zone::list_placement_set(set& names) +{ + bufferlist header; + map m; + + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + int ret = sysobj.omap().get_all(&m); + if (ret < 0) + return ret; + + names.clear(); + map::iterator miter; + for (miter = m.begin(); miter != m.end(); ++miter) { + names.insert(rgw_pool(miter->first)); + } + + return names.size(); +} + diff --git a/src/rgw/services/svc_zone.h b/src/rgw/services/svc_zone.h index 646d9084b9a3..13da2cc4fd90 100644 --- a/src/rgw/services/svc_zone.h +++ b/src/rgw/services/svc_zone.h @@ -21,8 +21,8 @@ public: class RGWSI_Zone : public RGWServiceInstance { - std::shared_ptr rados_svc; std::shared_ptr sysobj_svc; + std::shared_ptr rados_svc; std::shared_ptr realm; std::shared_ptr zonegroup; @@ -51,6 +51,10 @@ class RGWSI_Zone : public RGWServiceInstance int init_zg_from_local(bool *creating_defaults); int convert_regionmap(); + int update_placement_map(); + int add_bucket_placement(const rgw_pool& new_pool); + int remove_bucket_placement(const rgw_pool& old_pool); + int list_placement_set(set& names); public: RGWSI_Zone(RGWService *svc, CephContext *cct): RGWServiceInstance(svc, cct) {} @@ -76,10 +80,18 @@ public: return rest_master_conn; } + map& get_zonegroup_conn_map() { + return zonegroup_conn_map; + } + map& get_zone_conn_map() { return zone_conn_map; } + map& get_zone_data_sync_from_map() { + return zone_data_sync_from_map; + } + map& get_zone_data_notify_to_map() { return zone_data_notify_to_map; } @@ -102,6 +114,7 @@ public: bool need_to_log_data() const; bool need_to_log_metadata() const; bool can_reshard() const; + bool is_syncing_bucket_meta(const rgw_bucket& bucket); int list_zonegroups(list& zonegroups); int list_regions(list& regions); @@ -109,6 +122,8 @@ public: int list_realms(list& realms); int list_periods(list& periods); int list_periods(const string& current_period, list& periods); + + void canonicalize_raw_obj(rgw_raw_obj *obj); }; #endif