From ef81367a1eaeb7778dc60a668b9fe2f6a77e06ff Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Fri, 1 May 2015 15:17:10 -0700 Subject: [PATCH] rgw: compare oids and dump leaked objects Signed-off-by: Yehuda Sadeh (cherry picked from commit 1bc63d98ff8a3aa180065153690b4f8a73658b79) --- src/rgw/rgw_orphan.cc | 183 ++++++++++++++++++++++++++++++++++-------- src/rgw/rgw_orphan.h | 4 + 2 files changed, 155 insertions(+), 32 deletions(-) diff --git a/src/rgw/rgw_orphan.cc b/src/rgw/rgw_orphan.cc index ffc1cae797352..51b41a69e23a2 100644 --- a/src/rgw/rgw_orphan.cc +++ b/src/rgw/rgw_orphan.cc @@ -187,6 +187,28 @@ int RGWOrphanSearch::log_oids(map& log_shards, mapget_rados(); @@ -206,6 +228,7 @@ int RGWOrphanSearch::build_all_oids_index() map > oids; int count = 0; + uint64_t total = 0; cout << "logging all objects in the pool" << std::endl; @@ -218,32 +241,19 @@ int RGWOrphanSearch::build_all_oids_index() if (locator.size()) name += " (@" + locator + ")"; - ssize_t pos = oid.find('_'); - if (pos < 0) { - cerr << "ERROR: object does not have a bucket marker: " << oid << std::endl; - } - string obj_marker = oid.substr(0, pos); - - string obj_name; - string obj_instance; - string obj_ns; + string oid_fp; + get_obj_fingerprint(oid, &oid_fp); - rgw_obj::parse_raw_oid(oid.substr(pos + 1), &obj_name, &obj_instance, &obj_ns); - string hash_oid; - if (obj_ns.empty()) { - hash_oid = oid; - } else { - hash_oid = oid.substr(0, oid.size() - 10); - } + ldout(store->ctx(), 20) << "oid_fp=" << oid_fp << dendl; - ldout(store->ctx(), 20) << "hash_oid=" << hash_oid << dendl; - - int shard = orphan_shard(hash_oid); + int shard = orphan_shard(oid_fp); oids[shard].push_back(oid); #define COUNT_BEFORE_FLUSH 1000 + ++total; if (++count >= COUNT_BEFORE_FLUSH) { + ldout(store->ctx(), 1) << "iterated through " << total << " objects" << dendl; ret = log_oids(all_objs_index, oids); if (ret < 0) { cerr << __func__ << ": ERROR: log_oids() returned ret=" << ret << std::endl; @@ -280,6 +290,7 @@ int RGWOrphanSearch::build_buckets_instance_index() RGWObjectCtx obj_ctx(store); int count = 0; + uint64_t total = 0; do { list keys; @@ -290,6 +301,8 @@ int RGWOrphanSearch::build_buckets_instance_index() } for (list::iterator iter = keys.begin(); iter != keys.end(); ++iter) { + ++total; + ldout(store->ctx(), 10) << "bucket_instance=" << *iter << " total=" << total << dendl; int shard = orphan_shard(*iter); instances[shard].push_back(*iter); @@ -303,6 +316,7 @@ int RGWOrphanSearch::build_buckets_instance_index() instances.clear(); } } + } while (truncated); ret = log_oids(buckets_instance_index, instances); @@ -329,17 +343,14 @@ int RGWOrphanSearch::handle_stat_result(map >& oids, RGWRados: const rgw_obj& loc = miter.get_location(); string s = bucket.bucket_id + "_" + loc.get_object(); - - if (loc.ns.empty()) { - obj_oids.insert(s); - } else { - /* - * it's within a namespace, we can store only part of the name, so that any other tail or - * part objects of the same logical object will not be duplicated. When we do the search - * we'll only search for this substring - */ - obj_oids.insert(s.substr(0, s.size() - 10)); - } + /* + * if it's within a namespace, we can store only part of the name, so that any other tail or + * part objects of the same logical object will not be duplicated. When we do the search + * we'll only search for this substring + */ + string fp = (loc.ns.empty() ? s : s.substr(0, s.size() - 10)); + + obj_oids.insert(fp); } } @@ -469,6 +480,7 @@ int RGWOrphanSearch::build_linked_oids_index() map > oids; map::iterator iter = buckets_instance_index.find(search_stage.shard); for (; iter != buckets_instance_index.end(); ++iter) { + ldout(store->ctx(), 0) << "building linked oids index: " << iter->first << "/" << buckets_instance_index.size() << dendl; bool truncated; string oid = iter->second; @@ -513,12 +525,113 @@ int RGWOrphanSearch::build_linked_oids_index() return 0; } +class OMAPReader { + librados::IoCtx ioctx; + string oid; + + map entries; + map::iterator iter; + string marker; + bool truncated; + +public: + OMAPReader(librados::IoCtx& _ioctx, const string& _oid) : ioctx(_ioctx), oid(_oid), truncated(true) { + iter = entries.end(); + } + + int get_next(string *key, bufferlist *pbl, bool *done); +}; + +int OMAPReader::get_next(string *key, bufferlist *pbl, bool *done) +{ + if (iter != entries.end()) { + *key = iter->first; + if (pbl) { + *pbl = iter->second; + } + ++iter; + *done = false; + marker = *key; + return 0; + } + + if (!truncated) { + *done = true; + return 0; + } + +#define MAX_OMAP_GET_ENTRIES 100 + int ret = ioctx.omap_get_vals(oid, marker, MAX_OMAP_GET_ENTRIES, &entries); + if (ret < 0) { + if (ret == -ENOENT) { + *done = true; + return 0; + } + return ret; + } + + truncated = (entries.size() == MAX_OMAP_GET_ENTRIES); + iter = entries.begin(); + return get_next(key, pbl, done); +} + +int RGWOrphanSearch::compare_oid_indexes() +{ + assert(linked_objs_index.size() == all_objs_index.size()); + + librados::IoCtx& ioctx = orphan_store.get_ioctx(); + + map::iterator liter = linked_objs_index.begin(); + map::iterator aiter = all_objs_index.begin(); + + for (; liter != linked_objs_index.end(); ++liter, ++aiter) { + OMAPReader linked_entries(ioctx, liter->second); + OMAPReader all_entries(ioctx, aiter->second); + + bool done; + + string cur_linked; + bool linked_done = false; + + + do { + string key; + int r = all_entries.get_next(&key, NULL, &done); + if (r < 0) { + return r; + } + if (done) { + break; + } + + string key_fp; + get_obj_fingerprint(key, &key_fp); + + while (cur_linked < key_fp && !linked_done) { + r = linked_entries.get_next(&cur_linked, NULL, &linked_done); + if (r < 0) { + return r; + } + } + + if (cur_linked == key_fp) { + ldout(store->ctx(), 20) << "linked: " << key << dendl; + cout << "good: " << key << std::endl; + continue; + } + + ldout(store->ctx(), 20) << "leaked: " << key << dendl; + cout << "leaked: " << key << std::endl; + } while (!done); + } + + return 0; +} + int RGWOrphanSearch::run() { int r; -cout << "search_stage.marker=" << search_stage.marker << " search_stage.shard=" << search_stage.shard << std::endl; - switch (search_stage.stage) { case ORPHAN_SEARCH_STAGE_INIT: @@ -580,6 +693,12 @@ cout << "search_stage.marker=" << search_stage.marker << " search_stage.shard=" // fall through case ORPHAN_SEARCH_STAGE_COMPARE: + r = compare_oid_indexes(); + if (r < 0) { + lderr(store->ctx()) << __func__ << ": ERROR: build_all_objs_index returnr ret=" << r << dendl; + return r; + } + break; default: diff --git a/src/rgw/rgw_orphan.h b/src/rgw/rgw_orphan.h index 95efea1171629..ac262fe81c1e6 100644 --- a/src/rgw/rgw_orphan.h +++ b/src/rgw/rgw_orphan.h @@ -127,6 +127,8 @@ public: oid = RGW_ORPHAN_INDEX_OID; } + librados::IoCtx& get_ioctx() { return ioctx; } + int init(); int read_job(const string& job_name, RGWOrphanSearchState& state); @@ -171,6 +173,7 @@ class RGWOrphanSearch { int handle_stat_result(map >& oids, RGWRados::Object::Stat::Result& result); int pop_and_handle_stat_op(map >& oids, std::deque& ops); + void get_obj_fingerprint(const string& oid, string *fp); public: RGWOrphanSearch(RGWRados *_store, int _max_ios) : store(_store), orphan_store(store), max_concurrent_ios(_max_ios) {} @@ -189,6 +192,7 @@ public: int build_buckets_instance_index(); int build_linked_oids_for_bucket(const string& bucket_instance_id, map >& oids); int build_linked_oids_index(); + int compare_oid_indexes(); int run(); }; -- 2.39.5