backend.set_keys(to_add, t);
}
-void SnapMapper::get_objects_by_prefixes(
+int SnapMapper::get_next_objects_to_trim(
snapid_t snap,
unsigned max,
vector<hobject_t> *out)
{
- /// maintain the prefix_itr between calls to avoid searching depleted prefixes
- for ( ; prefix_itr != prefixes.end(); prefix_itr++) {
- string prefix(get_prefix(pool, snap) + *prefix_itr);
+ ceph_assert(out);
+ ceph_assert(out->empty());
+
+ // if max would be 0, we return ENOENT and the caller would mistakenly
+ // trim the snaptrim queue
+ ceph_assert(max > 0);
+ int r = 0;
+
+ /// \todo cache the prefixes-set in update_bits()
+ for (set<string>::iterator i = prefixes.begin();
+ i != prefixes.end() && out->size() < max && r == 0;
+ ++i) {
+ string prefix(get_prefix(pool, snap) + *i);
string pos = prefix;
while (out->size() < max) {
pair<string, ceph::buffer::list> next;
- // access RocksDB (an expensive operation!)
- int r = backend.get_next(pos, &next);
+ r = backend.get_next(pos, &next);
dout(20) << __func__ << " get_next(" << pos << ") returns " << r
<< " " << next << dendl;
if (r != 0) {
- return; // Done
+ break; // Done
}
if (next.first.substr(0, prefix.size()) !=
prefix) {
- // TBD: we access the DB twice for the first object of each iterator...
break; // Done with this prefix
}
out->push_back(next_decoded.second);
pos = next.first;
}
-
- if (out->size() >= max) {
- return;
- }
}
-}
-
-int SnapMapper::get_next_objects_to_trim(
- snapid_t snap,
- unsigned max,
- vector<hobject_t> *out)
-{
- ceph_assert(out);
- ceph_assert(out->empty());
-
- // if max would be 0, we return ENOENT and the caller would mistakenly
- // trim the snaptrim queue
- ceph_assert(max > 0);
-
- // The prefix_itr is bound to a prefix_itr_snap so if we trim another snap
- // we must reset the prefix_itr (should not happen normally)
- if (prefix_itr_snap != snap) {
- dout(10) << __func__ << "::Reset prefix_itr(unexpcted) snap was changed from <"
- << prefix_itr_snap << "> to <" << snap << ">" << dendl;
- reset_prefix_itr(snap);
- }
-
- // when reaching the end of the DB reset the prefix_ptr and verify
- // we didn't miss objects which were added after we started trimming
- // This should never happen in reality because the snap was logically deleted
- // before trimming starts (and so no new clone-objects could be added)
- // For more info see PG::filter_snapc()
- //
- // We still like to be extra careful and run one extra loop over all prefeixes
- get_objects_by_prefixes(snap, max, out);
- if (out->size() == 0) {
- dout(10) << __func__ << "::Reset prefix_itr after a complete trim!" << dendl;
- reset_prefix_itr(snap);
- get_objects_by_prefixes(snap, max, out);
-
- bool osd_debug_trim_objects = cct->_conf.get_val<bool>("osd_debug_trim_objects");
- if (osd_debug_trim_objects) {
- ceph_assert(out->size() == 0);
- }
- }
-
if (out->size() == 0) {
return -ENOENT;
} else {
}
}
+
int SnapMapper::remove_oid(
const hobject_t &oid,
MapCacher::Transaction<std::string, ceph::buffer::list> *t)
uint32_t bits)
: driver(driver),
mapper(new SnapMapper(g_ceph_context, driver, mask, bits, 0, shard_id_t(1))),
- mask(mask), bits(bits) {}
-
- hobject_t create_hobject(
- unsigned idx,
- snapid_t snapid,
- int64_t pool,
- const std::string& nspace) {
- const object_t oid("OID" + std::to_string(idx));
- const std::string key("KEY" + std::to_string(idx));
- const uint32_t hash = (idx & ((~0)<<bits)) | (mask & ~((~0)<<bits));
-
- return hobject_t(oid, key, snapid, hash, pool, nspace);
- }
+ mask(mask), bits(bits) {}
hobject_t random_hobject() {
return hobject_t(
}
}
- snapid_t create_snap() {
- snapid_t snapid = next;
- snap_to_hobject[snapid];
+ void create_snap() {
+ snap_to_hobject[next];
++next;
-
- return snapid;
- }
-
- // must be called with lock held to protect access to
- // hobject_to_snap and snap_to_hobject
- void add_object_to_snaps(const hobject_t & obj, const set<snapid_t> &snaps) {
- hobject_to_snap[obj] = snaps;
- for (auto snap : snaps) {
- map<snapid_t, set<hobject_t> >::iterator j = snap_to_hobject.find(snap);
- ceph_assert(j != snap_to_hobject.end());
- j->second.insert(obj);
- }
- {
- PausyAsyncMap::Transaction t;
- mapper->add_oid(obj, snaps, &t);
- driver->submit(&t);
- }
}
void create_object() {
obj = random_hobject();
} while (hobject_to_snap.count(obj));
- set<snapid_t> snaps;
+ set<snapid_t> &snaps = hobject_to_snap[obj];
choose_random_snaps(1 + (rand() % 20), &snaps);
- add_object_to_snaps(obj, snaps);
+ for (set<snapid_t>::iterator i = snaps.begin();
+ i != snaps.end();
+ ++i) {
+ map<snapid_t, set<hobject_t> >::iterator j = snap_to_hobject.find(*i);
+ ceph_assert(j != snap_to_hobject.end());
+ j->second.insert(obj);
+ }
+ {
+ PausyAsyncMap::Transaction t;
+ mapper->add_oid(obj, snaps, &t);
+ driver->submit(&t);
+ }
}
std::pair<std::string, ceph::buffer::list> to_raw(
return mapper->make_purged_snap_key(std::forward<Args>(args)...);
}
- // must be called with lock held to protect access to
- // snap_to_hobject and hobject_to_snap
- int trim_snap(snapid_t snapid, unsigned max_count, vector<hobject_t> & out) {
- set<hobject_t>& hobjects = snap_to_hobject[snapid];
+ void trim_snap() {
+ std::lock_guard l{lock};
+ if (snap_to_hobject.empty())
+ return;
+ map<snapid_t, set<hobject_t> >::iterator snap =
+ rand_choose(snap_to_hobject);
+ set<hobject_t> hobjects = snap->second;
+
vector<hobject_t> hoids;
- int ret = mapper->get_next_objects_to_trim(snapid, max_count, &hoids);
- if (ret == 0) {
- out.insert(out.end(), hoids.begin(), hoids.end());
+ while (mapper->get_next_objects_to_trim(
+ snap->first, rand() % 5 + 1, &hoids) == 0) {
for (auto &&hoid: hoids) {
ceph_assert(!hoid.is_max());
ceph_assert(hobjects.count(hoid));
hobjects.erase(hoid);
- map<hobject_t, set<snapid_t>>::iterator j = hobject_to_snap.find(hoid);
- ceph_assert(j->second.count(snapid));
+ map<hobject_t, set<snapid_t>>::iterator j =
+ hobject_to_snap.find(hoid);
+ ceph_assert(j->second.count(snap->first));
set<snapid_t> old_snaps(j->second);
- j->second.erase(snapid);
+ j->second.erase(snap->first);
{
PausyAsyncMap::Transaction t;
}
hoids.clear();
}
- return ret;
- }
-
- // must be called with lock held to protect access to
- // snap_to_hobject and hobject_to_snap in trim_snap
- // will keep trimming until reaching max_count or failing a call to trim_snap()
- void trim_snap_force(snapid_t snapid,
- unsigned max_count,
- vector<hobject_t>& out) {
-
- int guard = 1000;
- vector<hobject_t> tmp;
- unsigned prev_size = 0;
- while (tmp.size() < max_count) {
- unsigned req_size = max_count - tmp.size();
- // each call adds more objects into the tmp vector
- trim_snap(snapid, req_size, tmp);
- if (prev_size < tmp.size()) {
- prev_size = tmp.size();
- }
- else{
- // the tmp vector size was not increased in the last call
- // which means we were unable to find anything to trim
- break;
- }
- ceph_assert(--guard > 0);
- }
- out.insert(out.end(), tmp.begin(), tmp.end());
- }
-
- void trim_snap() {
- std::lock_guard l{lock};
- if (snap_to_hobject.empty()) {
- return;
- }
- int ret = 0;
- map<snapid_t, set<hobject_t> >::iterator snap = rand_choose(snap_to_hobject);
- do {
- int max_count = rand() % 5 + 1;
- vector<hobject_t> out;
- ret = trim_snap(snap->first, max_count, out);
- } while(ret == 0);
- set<hobject_t> hobjects = snap->second;
ceph_assert(hobjects.empty());
snap_to_hobject.erase(snap);
}
ceph_assert(r == 0);
ASSERT_EQ(snaps, obj->second);
}
-
- void test_prefix_itr() {
- // protects access to snap_to_hobject and hobject_to_snap
- std::lock_guard l{lock};
- snapid_t snapid = create_snap();
- // we initialize 32 PGS
- ceph_assert(bits == 5);
-
- const int64_t pool(0);
- const std::string nspace("GBH");
- set<snapid_t> snaps = { snapid };
- set<hobject_t>& hobjects = snap_to_hobject[snapid];
- vector<hobject_t> trimmed_objs;
- vector<hobject_t> stored_objs;
-
- // add objects 0, 32, 64, 96, 128, 160, 192, 224
- // which should hit all the prefixes
- for (unsigned idx = 0; idx < 8; idx++) {
- hobject_t hobj = create_hobject(idx * 32, snapid, pool, nspace);
- add_object_to_snaps(hobj, snaps);
- stored_objs.push_back(hobj);
- }
- ceph_assert(hobjects.size() == 8);
-
- // trim 0, 32, 64, 96
- trim_snap(snapid, 4, trimmed_objs);
- ceph_assert(hobjects.size() == 4);
-
- // add objects (3, 35, 67) before the prefix_itr position
- // to force an iteartor reset later
- for (unsigned idx = 0; idx < 3; idx++) {
- hobject_t hobj = create_hobject(idx * 32 + 3, snapid, pool, nspace);
- add_object_to_snaps(hobj, snaps);
- stored_objs.push_back(hobj);
- }
- ceph_assert(hobjects.size() == 7);
-
- // will now trim 128, 160, 192, 224
- trim_snap(snapid, 4, trimmed_objs);
- ceph_assert(hobjects.size() == 3);
-
- // finally, trim 3, 35, 67
- // This will force a reset to the prefix_itr (which is what we test here)
- trim_snap(snapid, 3, trimmed_objs);
- ceph_assert(hobjects.size() == 0);
-
- ceph_assert(trimmed_objs.size() == 11);
- // trimmed objs must be in the same order they were inserted
- // this will prove that the second call to add_object_to_snaps inserted
- // them before the current prefix_itr
- ceph_assert(trimmed_objs.size() == stored_objs.size());
- ceph_assert(std::equal(trimmed_objs.begin(), trimmed_objs.end(),
- stored_objs.begin()));
- snap_to_hobject.erase(snapid);
- }
-
- // insert 256 objects which should populate multiple prefixes
- // trim until we change prefix and then insert an old object
- // which we know for certain belongs to a prefix before prefix_itr
- void test_prefix_itr2() {
- // protects access to snap_to_hobject and hobject_to_snap
- std::lock_guard l{lock};
- snapid_t snapid = create_snap();
- // we initialize 32 PGS
- ceph_assert(bits == 5);
-
- const int64_t pool(0);
- const std::string nspace("GBH");
- set<snapid_t> snaps = { snapid };
- vector<hobject_t> trimmed_objs;
- vector<hobject_t> stored_objs;
-
- constexpr unsigned MAX_IDX = 256;
- for (unsigned idx = 0; idx < MAX_IDX; idx++) {
- hobject_t hobj = create_hobject(idx, snapid, pool, nspace);
- add_object_to_snaps(hobj, snaps);
- stored_objs.push_back(hobj);
- }
-
- hobject_t dup_hobj;
- bool found = false;
- trim_snap(snapid, 1, trimmed_objs);
- const std::set<std::string>::iterator itr = mapper->get_prefix_itr();
- for (unsigned idx = 1; idx < MAX_IDX + 1; idx++) {
- trim_snap(snapid, 1, trimmed_objs);
- if (!found && mapper->get_prefix_itr() != itr) {
- // we changed prefix -> insert an OBJ belonging to perv prefix
- dup_hobj = create_hobject(idx - 1, snapid, pool, nspace);
- add_object_to_snaps(dup_hobj, snaps);
- stored_objs.push_back(dup_hobj);
- found = true;
- }
- }
- ceph_assert(found);
-
- sort(trimmed_objs.begin(), trimmed_objs.end());
- sort(stored_objs.begin(), stored_objs.end());
- ceph_assert(trimmed_objs.size() == MAX_IDX+1);
- ceph_assert(trimmed_objs.size() == stored_objs.size());
- ceph_assert(std::equal(trimmed_objs.begin(), trimmed_objs.end(),
- stored_objs.begin()));
- snap_to_hobject.erase(snapid);
- }
-
- void add_rand_hobjects(unsigned count,
- snapid_t snapid,
- int64_t pool,
- const std::string& nspace,
- vector<hobject_t>& stored_objs) {
- constexpr unsigned MAX_VAL = 1000;
- set<snapid_t> snaps = { snapid };
- for (unsigned i = 0; i < count; i++) {
- hobject_t hobj;
- do {
- unsigned val = rand() % MAX_VAL;
- hobj = create_hobject(val, snapid, pool, nspace);
- }while (hobject_to_snap.count(hobj));
- add_object_to_snaps(hobj, snaps);
- stored_objs.push_back(hobj);
- }
- }
-
- // Start with a set of random objects then run a partial trim
- // followed by another random insert
- // This should cause *some* objects to be added before the prefix_itr
- // and will verify that we still remove them
- void test_prefix_itr_rand() {
- // protects access to snap_to_hobject and hobject_to_snap
- std::lock_guard l{lock};
- snapid_t snapid = create_snap();
- // we initialize 32 PGS
- ceph_assert(bits == 5);
-
- const int64_t pool(0);
- const std::string nspace("GBH");
- vector<hobject_t> trimmed_objs;
- vector<hobject_t> stored_objs;
- set<hobject_t>& hobjects = snap_to_hobject[snapid];
- ceph_assert(hobjects.size() == 0);
-
- // add 100 random objects
- add_rand_hobjects(100, snapid, pool, nspace, stored_objs);
- ceph_assert(hobjects.size() == 100);
-
- // trim the first 75 objects leaving 25 objects
- trim_snap(snapid, 75, trimmed_objs);
- ceph_assert(hobjects.size() == 25);
-
- // add another 25 random objects (now we got 50 objects)
- add_rand_hobjects(25, snapid, pool, nspace, stored_objs);
- ceph_assert(hobjects.size() == 50);
-
- // trim 49 objects leaving a single object
- // we must use a wrapper function to keep trimming while until -ENOENT
- trim_snap_force(snapid, 49, trimmed_objs);
- ceph_assert(hobjects.size() == 1);
-
- // add another 9 random objects (now we got 10 objects)
- add_rand_hobjects(9, snapid, pool, nspace, stored_objs);
- ceph_assert(hobjects.size() == 10);
-
- // trim 10 objects leaving no object in store
- trim_snap_force(snapid, 10, trimmed_objs);
- ceph_assert(hobjects.size() == 0);
-
- // add 10 random objects (now we got 10 objects)
- add_rand_hobjects(10, snapid, pool, nspace, stored_objs);
- ceph_assert(hobjects.size() == 10);
-
- // trim 10 objects leaving no object in store
- trim_snap_force(snapid, 10, trimmed_objs);
- ceph_assert(hobjects.size() == 0);
-
- sort(trimmed_objs.begin(), trimmed_objs.end());
- sort(stored_objs.begin(), stored_objs.end());
- ceph_assert(trimmed_objs.size() == 144);
- ceph_assert(trimmed_objs.size() == stored_objs.size());
-
- bool are_equal = std::equal(trimmed_objs.begin(), trimmed_objs.end(),
- stored_objs.begin());
- ceph_assert(are_equal);
- snap_to_hobject.erase(snapid);
- }
};
class SnapMapperTest : public ::testing::Test {
}
};
-// This test creates scenarios which are impossible to get with normal code.
-// The normal code deletes the snap before calling TRIM and so no new clones
-// can be added to that snap.
-// Our test calls get_next_objects_to_trim() *without* deleting the snap first.
-// This allows us to add objects to the (non-deleted) snap after trimming began.
-// We test that SnapTrim will find them even when added into positions before the prefix_itr.
-// Since those tests are doing illegal inserts we must disable osd_debug_trim_objects
-// during those tests as otherwise the code will assert.
-TEST_F(SnapMapperTest, prefix_itr) {
- bool orig_val = g_ceph_context->_conf.get_val<bool>("osd_debug_trim_objects");
- std::cout << "osd_debug_trim_objects = " << orig_val << std::endl;
- g_ceph_context->_conf.set_val("osd_debug_trim_objects", std::to_string(false));
- init(32);
- get_tester().test_prefix_itr();
- get_tester().test_prefix_itr2();
- get_tester().test_prefix_itr_rand();
- g_ceph_context->_conf.set_val("osd_debug_trim_objects", std::to_string(orig_val));
- bool curr_val = g_ceph_context->_conf.get_val<bool>("osd_debug_trim_objects");
- ceph_assert(curr_val == orig_val);
-}
-
TEST_F(SnapMapperTest, Simple) {
init(1);
get_tester().create_snap();