From: Yehuda Sadeh Date: Thu, 30 Apr 2015 00:12:34 +0000 (-0700) Subject: rgw: iterate over linked objects, store them X-Git-Tag: v0.94.4~49^2~14 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=748ea57cfeca7b295afca5b8126d454eb85550c7;p=ceph.git rgw: iterate over linked objects, store them only keep part of the oid name if it is in a namespace. Signed-off-by: Yehuda Sadeh (cherry picked from commit 75902fdb12ce0413d3beb455df47134f3d7386e7) --- diff --git a/src/rgw/rgw_orphan.cc b/src/rgw/rgw_orphan.cc index adef6243ff9..5fa4df84c0b 100644 --- a/src/rgw/rgw_orphan.cc +++ b/src/rgw/rgw_orphan.cc @@ -135,6 +135,9 @@ int RGWOrphanSearch::init(const string& job_name, RGWOrphanSearchInfo *info) { snprintf(buf, sizeof(buf), "%s.buckets.%d", index_objs_prefix.c_str(), i); buckets_instance_index[i] = buf; + + snprintf(buf, sizeof(buf), "%s.linked.%d", index_objs_prefix.c_str(), i); + linked_objs_index[i] = buf; } return 0; } @@ -221,7 +224,22 @@ int RGWOrphanSearch::build_all_oids_index() } string obj_marker = oid.substr(0, pos); - int shard = orphan_shard(oid); + string obj_name; + string obj_instance; + string obj_ns; + + rgw_obj::parse_raw_oid(oid.substr(pos + 1), &obj_name, &obj_instance, &obj_ns); + + string hash_oid; + if (obj_ns.empty()) { + hash_oid = oid; + } else { + hash_oid = oid.substr(0, oid.size() - 10); + } + + ldout(store->ctx(), 20) << "hash_oid=" << hash_oid << dendl; + + int shard = orphan_shard(hash_oid); oids[shard].push_back(oid); #define COUNT_BEFORE_FLUSH 1000 @@ -287,13 +305,77 @@ int RGWOrphanSearch::build_buckets_instance_index() } } while (truncated); + ret = log_oids(buckets_instance_index, instances); + if (ret < 0) { + lderr(store->ctx()) << __func__ << ": ERROR: log_oids() returned ret=" << ret << dendl; + return ret; + } store->meta_mgr->list_keys_complete(handle); return 0; } +int RGWOrphanSearch::handle_stat_result(map >& oids, RGWRados::Object::Stat::Result& result) +{ + set obj_oids; + rgw_bucket& bucket = result.obj.bucket; + if (!result.has_manifest) { + obj_oids.insert(bucket.bucket_id + "_" + result.obj.get_object()); + } else { + RGWObjManifest& manifest = result.manifest; + + RGWObjManifest::obj_iterator miter; + for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) { + const rgw_obj& loc = miter.get_location(); + + string s = bucket.bucket_id + "_" + loc.get_object(); + + if (loc.ns.empty()) { + obj_oids.insert(s); + } else { + /* + * it's within a namespace, we can store only part of the name, so that any other tail or + * part objects of the same logical object will not be duplicated. When we do the search + * we'll only search for this substring + */ + obj_oids.insert(s.substr(0, s.size() - 10)); + } + } + } + + for (set::iterator iter = obj_oids.begin(); iter != obj_oids.end(); ++iter) { + ldout(store->ctx(), 20) << __func__ << ": oid for obj=" << result.obj << ": " << *iter << dendl; + + int shard = orphan_shard(*iter); + oids[shard].push_back(*iter); + } + + return 0; +} + +int RGWOrphanSearch::pop_and_handle_stat_op(map >& oids, std::deque& ops) +{ + RGWRados::Object::Stat& front_op = ops.front(); + + int ret = front_op.wait(); + if (ret < 0) { + if (ret != -ENOENT) { + lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl; + } + goto done; + } + ret = handle_stat_result(oids, front_op.result); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: handle_stat_response() returned error: " << cpp_strerror(-ret) << dendl; + } +done: + ops.pop_front(); + return ret; +} + int RGWOrphanSearch::build_linked_oids_for_bucket(const string& bucket_instance_id) { + ldout(store->ctx(), 10) << "building linked oids for bucket instance: " << bucket_instance_id << dendl; RGWBucketInfo bucket_info; RGWObjectCtx obj_ctx(store); int ret = store->get_bucket_instance_info(obj_ctx, bucket_instance_id, bucket_info, NULL, NULL); @@ -316,6 +398,9 @@ int RGWOrphanSearch::build_linked_oids_for_bucket(const string& bucket_instance_ bool truncated; deque stat_ops; + map > oids; + + int count = 0; do { vector result; @@ -349,27 +434,38 @@ int RGWOrphanSearch::build_linked_oids_for_bucket(const string& bucket_instance_ return ret; } if (stat_ops.size() >= max_concurrent_ios) { - RGWRados::Object::Stat& front_op = stat_ops.front(); - - ret = front_op.wait(); + ret = pop_and_handle_stat_op(oids, stat_ops); if (ret < 0) { - lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl; + if (ret != -ENOENT) { + lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl; + } } - - stat_ops.pop_front(); + } + if (++count >= COUNT_BEFORE_FLUSH) { + ret = log_oids(linked_objs_index, oids); + if (ret < 0) { + cerr << __func__ << ": ERROR: log_oids() returned ret=" << ret << std::endl; + return ret; + } + count = 0; + oids.clear(); } } } while (truncated); while (!stat_ops.empty()) { - RGWRados::Object::Stat& front_op = stat_ops.front(); - - ret = front_op.wait(); + ret = pop_and_handle_stat_op(oids, stat_ops); if (ret < 0) { - lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl; + if (ret != -ENOENT) { + lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl; + } } + } - stat_ops.pop_front(); + ret = log_oids(linked_objs_index, oids); + if (ret < 0) { + cerr << __func__ << ": ERROR: log_oids() returned ret=" << ret << std::endl; + return ret; } return 0; diff --git a/src/rgw/rgw_orphan.h b/src/rgw/rgw_orphan.h index 25692005bd9..691efa73fd1 100644 --- a/src/rgw/rgw_orphan.h +++ b/src/rgw/rgw_orphan.h @@ -142,6 +142,9 @@ class RGWOrphanSearch { return ceph_str_hash_linux(str.c_str(), str.size()) % RGW_ORPHANSEARCH_HASH_PRIME % search_info.num_shards; } + int handle_stat_result(map >& oids, RGWRados::Object::Stat::Result& result); + int pop_and_handle_stat_op(map >& oids, std::deque& ops); + public: RGWOrphanSearch(RGWRados *_store, int _max_ios) : store(_store), orphan_store(store), max_concurrent_ios(_max_ios) {} diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 163d8679629..9e3298153cd 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -5010,6 +5010,7 @@ int RGWRados::Object::Stat::stat_async() RGWRados *store = source->get_store(); RGWObjState *s = ctx.get_state(obj); /* calling this one directly because otherwise a sync request will be sent */ + result.obj = obj; if (s->has_attrs) { state.ret = 0; result.size = s->size; @@ -5074,6 +5075,7 @@ int RGWRados::Object::Stat::finish() ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": failed to decode manifest" << dendl; return -EIO; } + result.has_manifest = true; } return 0;