From 927458cfa33a1f775e4afc34cad5574c5e5a6ab1 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 25 May 2012 21:30:37 -0700 Subject: [PATCH] test/ObjectMap: Copy current DBObjectMap implementation This implementation will be used to test the upgrade process. Signed-off-by: Samuel Just --- src/Makefile.am | 2 +- src/test/ObjectMap/DBObjectMap_v0.cc | 1047 +++++++++++++++++++++++++ src/test/ObjectMap/DBObjectMap_v0.h | 454 +++++++++++ src/test/ObjectMap/test_object_map.cc | 1 + 4 files changed, 1503 insertions(+), 1 deletion(-) create mode 100644 src/test/ObjectMap/DBObjectMap_v0.cc create mode 100644 src/test/ObjectMap/DBObjectMap_v0.h diff --git a/src/Makefile.am b/src/Makefile.am index 784098ebd2658..3a589e70a736b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -801,7 +801,7 @@ test_stress_watch_LDADD = librados.la ${UNITTEST_STATIC_LDADD} test_stress_watch_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} bin_DEBUGPROGRAMS += test_stress_watch -test_object_map_SOURCES = test/ObjectMap/test_object_map.cc test/ObjectMap/KeyValueDBMemory.cc os/DBObjectMap.cc os/LevelDBStore.cc +test_object_map_SOURCES = test/ObjectMap/test_object_map.cc test/ObjectMap/KeyValueDBMemory.cc os/DBObjectMap.cc os/LevelDBStore.cc test/ObjectMap/DBObjectMap_v0.cc test_object_map_LDFLAGS = ${AM_LDFLAGS} test_object_map_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA) test_object_map_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} \ diff --git a/src/test/ObjectMap/DBObjectMap_v0.cc b/src/test/ObjectMap/DBObjectMap_v0.cc new file mode 100644 index 0000000000000..65c89f6224fca --- /dev/null +++ b/src/test/ObjectMap/DBObjectMap_v0.cc @@ -0,0 +1,1047 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- + +#include +#include +#include "include/buffer.h" +#include +#include +#include +#include + +#include +#include +#include + +#include "os/IndexManager.h" +#include "os/ObjectMap.h" +#include "os/KeyValueDB.h" +#include "DBObjectMap_v0.h" +#include + +#include "common/debug.h" +#include "common/config.h" +#define dout_subsys ceph_subsys_filestore +#undef dout_prefix +#define dout_prefix *_dout << "filestore " + +const string DBObjectMapv0::USER_PREFIX = "_USER_"; +const string DBObjectMapv0::XATTR_PREFIX = "_AXATTR_"; +const string DBObjectMapv0::SYS_PREFIX = "_SYS_"; +const string DBObjectMapv0::COMPLETE_PREFIX = "_COMPLETE_"; +const string DBObjectMapv0::HEADER_KEY = "HEADER"; +const string DBObjectMapv0::USER_HEADER_KEY = "USER_HEADER"; +const string DBObjectMapv0::LEAF_PREFIX = "_LEAF_"; +const string DBObjectMapv0::GLOBAL_STATE_KEY = "HEADER"; +const string DBObjectMapv0::REVERSE_LEAF_PREFIX = "_REVLEAF_"; + +static void append_escaped(const string &in, string *out) +{ + for (string::const_iterator i = in.begin(); i != in.end(); ++i) { + if (*i == '%') { + out->push_back('%'); + out->push_back('p'); + } else if (*i == '.') { + out->push_back('%'); + out->push_back('e'); + } else if (*i == '_') { + out->push_back('%'); + out->push_back('u'); + } else { + out->push_back(*i); + } + } +} + +bool DBObjectMapv0::check(std::ostream &out) +{ + bool retval = true; + map parent_to_num_children; + map parent_to_actual_num_children; + KeyValueDB::Iterator iter = db->get_iterator(LEAF_PREFIX); + for (iter->seek_to_first(); iter->valid(); iter->next()) { + _Header header; + bufferlist bl = iter->value(); + while (true) { + bufferlist::iterator bliter = bl.begin(); + header.decode(bliter); + if (header.seq != 0) + parent_to_actual_num_children[header.seq] = header.num_children; + if (header.parent == 0) + break; + + if (!parent_to_num_children.count(header.parent)) + parent_to_num_children[header.parent] = 0; + parent_to_num_children[header.parent]++; + if (parent_to_actual_num_children.count(header.parent)) + break; + + set to_get; + map got; + to_get.insert(HEADER_KEY); + db->get(sys_parent_prefix(header), to_get, &got); + if (!got.size()) { + out << "Missing: seq " << header.parent << std::endl; + retval = false; + break; + } else { + bl = got.begin()->second; + } + } + } + + for (map::iterator i = parent_to_num_children.begin(); + i != parent_to_num_children.end(); + parent_to_num_children.erase(i++)) { + if (!parent_to_actual_num_children.count(i->first)) + continue; + if (parent_to_actual_num_children[i->first] != i->second) { + out << "Invalid: seq " << i->first << " recorded children: " + << parent_to_actual_num_children[i->first] << " found: " + << i->second << std::endl; + retval = false; + } + parent_to_actual_num_children.erase(i->first); + } + return retval; +} + +string DBObjectMapv0::hobject_key(coll_t c, const hobject_t &hoid) +{ + string out; + append_escaped(c.to_str(), &out); + out.push_back('.'); + append_escaped(hoid.oid.name, &out); + out.push_back('.'); + append_escaped(hoid.get_key(), &out); + out.push_back('.'); + + char snap_with_hash[1000]; + char *t = snap_with_hash; + char *end = t + sizeof(snap_with_hash); + if (hoid.snap == CEPH_NOSNAP) + t += snprintf(t, end - t, ".head"); + else if (hoid.snap == CEPH_SNAPDIR) + t += snprintf(t, end - t, ".snapdir"); + else + t += snprintf(t, end - t, ".%llx", (long long unsigned)hoid.snap); + snprintf(t, end - t, ".%.*X", (int)(sizeof(hoid.hash)*2), hoid.hash); + out += string(snap_with_hash); + return out; +} + +string DBObjectMapv0::map_header_key(coll_t c, const hobject_t &hoid) +{ + return hobject_key(c, hoid); +} + +string DBObjectMapv0::header_key(uint64_t seq) +{ + char buf[100]; + snprintf(buf, sizeof(buf), "%.*" PRId64, (int)(2*sizeof(seq)), seq); + return string(buf); +} + +string DBObjectMapv0::complete_prefix(Header header) +{ + return USER_PREFIX + header_key(header->seq) + COMPLETE_PREFIX; +} + +string DBObjectMapv0::user_prefix(Header header) +{ + return USER_PREFIX + header_key(header->seq) + USER_PREFIX; +} + +string DBObjectMapv0::sys_prefix(Header header) +{ + return USER_PREFIX + header_key(header->seq) + SYS_PREFIX; +} + +string DBObjectMapv0::xattr_prefix(Header header) +{ + return USER_PREFIX + header_key(header->seq) + XATTR_PREFIX; +} + +string DBObjectMapv0::sys_parent_prefix(_Header header) +{ + return USER_PREFIX + header_key(header.parent) + SYS_PREFIX; +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::init() +{ + invalid = false; + if (ready) { + return 0; + } + assert(!parent_iter); + if (header->parent) { + Header parent = map->lookup_parent(header); + if (!parent) { + assert(0); + return -EINVAL; + } + parent_iter.reset(new DBObjectMapv0IteratorImpl(map, parent)); + } + key_iter = map->db->get_iterator(map->user_prefix(header)); + assert(key_iter); + complete_iter = map->db->get_iterator(map->complete_prefix(header)); + assert(complete_iter); + cur_iter = key_iter; + assert(cur_iter); + ready = true; + return 0; +} + +ObjectMap::ObjectMapIterator DBObjectMapv0::get_iterator( + const hobject_t &hoid, + Index index) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return ObjectMapIterator(new EmptyIteratorImpl()); + return _get_iterator(header); +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::seek_to_first() +{ + init(); + r = 0; + if (parent_iter) { + r = parent_iter->seek_to_first(); + if (r < 0) + return r; + } + r = key_iter->seek_to_first(); + if (r < 0) + return r; + return adjust(); +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::seek_to_last() +{ + init(); + r = 0; + if (parent_iter) { + r = parent_iter->seek_to_last(); + if (r < 0) + return r; + if (parent_iter->valid()) + r = parent_iter->next(); + if (r < 0) + return r; + } + r = key_iter->seek_to_last(); + if (r < 0) + return r; + if (key_iter->valid()) + r = key_iter->next(); + if (r < 0) + return r; + return adjust(); +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::lower_bound(const string &to) +{ + init(); + r = 0; + if (parent_iter) { + r = parent_iter->lower_bound(to); + if (r < 0) + return r; + } + r = key_iter->lower_bound(to); + if (r < 0) + return r; + return adjust(); +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::upper_bound(const string &after) +{ + init(); + r = 0; + if (parent_iter) { + r = parent_iter->upper_bound(after); + if (r < 0) + return r; + } + r = key_iter->upper_bound(after); + if (r < 0) + return r; + return adjust(); +} + +bool DBObjectMapv0::DBObjectMapv0IteratorImpl::valid() +{ + bool valid = !invalid && ready; + assert(!valid || cur_iter->valid()); + return valid; +} + +bool DBObjectMapv0::DBObjectMapv0IteratorImpl::valid_parent() +{ + if (parent_iter && parent_iter->valid() && + (!key_iter->valid() || key_iter->key() > parent_iter->key())) + return true; + return false; +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::next() +{ + assert(cur_iter->valid()); + assert(valid()); + cur_iter->next(); + return adjust(); +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::next_parent() +{ + if (!parent_iter || !parent_iter->valid()) { + invalid = true; + return 0; + } + r = next(); + if (r < 0) + return r; + if (!valid() || on_parent() || !parent_iter->valid()) + return 0; + + return lower_bound(parent_iter->key()); +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::in_complete_region(const string &to_test, + string *begin, + string *end) +{ + complete_iter->upper_bound(to_test); + if (complete_iter->valid()) + complete_iter->prev(); + else + complete_iter->seek_to_last(); + + if (!complete_iter->valid()) + return false; + + string _end; + if (begin) + *begin = complete_iter->key(); + _end = string(complete_iter->value().c_str()); + if (end) + *end = _end; + return (to_test >= complete_iter->key()) && (!_end.size() || _end > to_test); +} + +/** + * Moves parent_iter to the next position both out of the complete_region and + * not equal to key_iter. Then, we set cur_iter to parent_iter if valid and + * less than key_iter and key_iter otherwise. + */ +int DBObjectMapv0::DBObjectMapv0IteratorImpl::adjust() +{ + string begin, end; + while (parent_iter && parent_iter->valid()) { + if (in_complete_region(parent_iter->key(), &begin, &end)) { + if (end.size() == 0) { + parent_iter->seek_to_last(); + if (parent_iter->valid()) + parent_iter->next(); + } else + parent_iter->lower_bound(end); + } else if (key_iter->valid() && key_iter->key() == parent_iter->key()) { + parent_iter->next(); + } else { + break; + } + } + if (valid_parent()) { + cur_iter = parent_iter; + } else if (key_iter->valid()) { + cur_iter = key_iter; + } else { + invalid = true; + } + assert(invalid || cur_iter->valid()); + return 0; +} + + +string DBObjectMapv0::DBObjectMapv0IteratorImpl::key() +{ + return cur_iter->key(); +} + +bufferlist DBObjectMapv0::DBObjectMapv0IteratorImpl::value() +{ + return cur_iter->value(); +} + +int DBObjectMapv0::DBObjectMapv0IteratorImpl::status() +{ + return r; +} + +int DBObjectMapv0::set_keys(const hobject_t &hoid, + Index index, + const map &set) +{ + KeyValueDB::Transaction t = db->get_transaction(); + Header header = lookup_create_map_header(index->coll(), hoid, t); + if (!header) + return -EINVAL; + + t->set(user_prefix(header), set); + + return db->submit_transaction(t); +} + +int DBObjectMapv0::set_header(const hobject_t &hoid, + Index index, + const bufferlist &bl) +{ + KeyValueDB::Transaction t = db->get_transaction(); + Header header = lookup_create_map_header(index->coll(), hoid, t); + if (!header) + return -EINVAL; + _set_header(header, bl, t); + return db->submit_transaction(t); +} + +void DBObjectMapv0::_set_header(Header header, const bufferlist &bl, + KeyValueDB::Transaction t) +{ + map to_set; + to_set[USER_HEADER_KEY] = bl; + t->set(sys_prefix(header), to_set); +} + +int DBObjectMapv0::get_header(const hobject_t &hoid, + Index index, + bufferlist *bl) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) { + return 0; + } + return _get_header(header, bl); +} + +int DBObjectMapv0::_get_header(Header header, + bufferlist *bl) +{ + int r = 0; + map out; + while (r == 0) { + out.clear(); + set to_get; + to_get.insert(USER_HEADER_KEY); + int r = db->get(sys_prefix(header), to_get, &out); + if (r == 0 && out.size()) + break; + Header current(header); + if (!current->parent) + break; + header = lookup_parent(current); + } + + if (r < 0) + return r; + if (out.size()) + bl->swap(out.begin()->second); + return 0; +} + +int DBObjectMapv0::clear(const hobject_t &hoid, + Index index) +{ + KeyValueDB::Transaction t = db->get_transaction(); + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + remove_map_header(index->coll(), hoid, header, t); + assert(header->num_children > 0); + header->num_children--; + int r = _clear(header, t); + if (r < 0) + return r; + return db->submit_transaction(t); +} + +int DBObjectMapv0::_clear(Header header, + KeyValueDB::Transaction t) +{ + while (1) { + if (header->num_children) { + set_header(header, t); + break; + } + clear_header(header, t); + if (!header->parent) + break; + Header parent = lookup_parent(header); + if (!parent) { + return -EINVAL; + } + assert(parent->num_children > 0); + parent->num_children--; + header.swap(parent); + } + return 0; +} + +int DBObjectMapv0::merge_new_complete(Header header, + const map &new_complete, + DBObjectMapv0Iterator iter, + KeyValueDB::Transaction t) +{ + KeyValueDB::Iterator complete_iter = db->get_iterator( + complete_prefix(header) + ); + map::const_iterator i = new_complete.begin(); + set to_remove; + map to_add; + + string begin, end; + int r = 0; + while (i != new_complete.end()) { + string new_begin = i->first; + string new_end = i->second; + r = iter->in_complete_region(new_begin, &begin, &end); + if (r < 0) + return r; + if (r) { + to_remove.insert(begin); + new_begin = begin; + } + ++i; + while (i != new_complete.end()) { + if (!new_end.size() || i->first <= new_end) { + if (!new_end.size() && i->second > new_end) { + new_end = i->second; + } + ++i; + continue; + } + + r = iter->in_complete_region(new_end, &begin, &end); + if (r < 0) + return r; + if (r) { + to_remove.insert(begin); + new_end = end; + continue; + } + break; + } + bufferlist bl; + bl.append(bufferptr(new_end.c_str(), new_end.size() + 1)); + to_add.insert(make_pair(new_begin, bl)); + } + t->rmkeys(complete_prefix(header), to_remove); + t->set(complete_prefix(header), to_add); + return 0; +} + +int DBObjectMapv0::copy_up_header(Header header, + KeyValueDB::Transaction t) +{ + bufferlist bl; + int r = _get_header(header, &bl); + if (r < 0) + return r; + + _set_header(header, bl, t); + return 0; +} + +int DBObjectMapv0::need_parent(DBObjectMapv0Iterator iter) +{ + int r = iter->seek_to_first(); + if (r < 0) + return r; + + if (!iter->valid()) + return 0; + + string begin, end; + if (iter->in_complete_region(iter->key(), &begin, &end) && end == "") { + return 0; + } + return 1; +} + +int DBObjectMapv0::rm_keys(const hobject_t &hoid, + Index index, + const set &to_clear) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + KeyValueDB::Transaction t = db->get_transaction(); + t->rmkeys(user_prefix(header), to_clear); + if (!header->parent) { + return db->submit_transaction(t); + } + + // Copy up keys from parent around to_clear + int keep_parent; + { + DBObjectMapv0Iterator iter = _get_iterator(header); + iter->seek_to_first(); + map new_complete; + map to_write; + unsigned copied = 0; + for(set::const_iterator i = to_clear.begin(); + i != to_clear.end(); + ) { + copied = 0; + iter->lower_bound(*i); + ++i; + if (!iter->valid()) + break; + string begin = iter->key(); + if (!iter->on_parent()) + iter->next_parent(); + if (new_complete.size() && new_complete.rbegin()->second == begin) { + begin = new_complete.rbegin()->first; + } + while (iter->valid() && copied < 20) { + if (!to_clear.count(iter->key())) + to_write[iter->key()].append(iter->value()); + if (i != to_clear.end() && *i <= iter->key()) { + ++i; + copied = 0; + } + + iter->next_parent(); + copied++; + } + if (iter->valid()) { + new_complete[begin] = iter->key(); + } else { + new_complete[begin] = ""; + break; + } + } + t->set(user_prefix(header), to_write); + merge_new_complete(header, new_complete, iter, t); + keep_parent = need_parent(iter); + if (keep_parent < 0) + return keep_parent; + } + if (!keep_parent) { + copy_up_header(header, t); + Header parent = lookup_parent(header); + if (!parent) + return -EINVAL; + parent->num_children--; + _clear(parent, t); + header->parent = 0; + set_header(header, t); + t->rmkeys_by_prefix(complete_prefix(header)); + } + return db->submit_transaction(t); +} + +int DBObjectMapv0::get(const hobject_t &hoid, + Index index, + bufferlist *_header, + map *out) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + _get_header(header, _header); + ObjectMapIterator iter = _get_iterator(header); + for (iter->seek_to_first(); iter->valid(); iter->next()) { + if (iter->status()) + return iter->status(); + out->insert(make_pair(iter->key(), iter->value())); + } + return 0; +} + +int DBObjectMapv0::get_keys(const hobject_t &hoid, + Index index, + set *keys) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + ObjectMapIterator iter = get_iterator(hoid, index); + for (; iter->valid(); iter->next()) { + if (iter->status()) + return iter->status(); + keys->insert(iter->key()); + } + return 0; +} + +int DBObjectMapv0::scan(Header header, + const set &in_keys, + set *out_keys, + map *out_values) +{ + ObjectMapIterator db_iter = _get_iterator(header); + for (set::const_iterator key_iter = in_keys.begin(); + key_iter != in_keys.end(); + ++key_iter) { + db_iter->lower_bound(*key_iter); + if (db_iter->status()) + return db_iter->status(); + if (db_iter->valid() && db_iter->key() == *key_iter) { + if (out_keys) + out_keys->insert(*key_iter); + if (out_values) + out_values->insert(make_pair(db_iter->key(), db_iter->value())); + } + } + return 0; +} + +int DBObjectMapv0::get_values(const hobject_t &hoid, + Index index, + const set &keys, + map *out) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + return scan(header, keys, 0, out);; +} + +int DBObjectMapv0::check_keys(const hobject_t &hoid, + Index index, + const set &keys, + set *out) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + return scan(header, keys, out, 0); +} + +int DBObjectMapv0::get_xattrs(const hobject_t &hoid, + Index index, + const set &to_get, + map *out) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + return db->get(xattr_prefix(header), to_get, out); +} + +int DBObjectMapv0::get_all_xattrs(const hobject_t &hoid, + Index index, + set *out) +{ + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + KeyValueDB::Iterator iter = db->get_iterator(xattr_prefix(header)); + if (!iter) + return -EINVAL; + for (iter->seek_to_first(); !iter->status() && iter->valid(); iter->next()) + out->insert(iter->key()); + return iter->status(); +} + +int DBObjectMapv0::set_xattrs(const hobject_t &hoid, + Index index, + const map &to_set) +{ + KeyValueDB::Transaction t = db->get_transaction(); + Header header = lookup_create_map_header(index->coll(), hoid, t); + if (!header) + return -EINVAL; + t->set(xattr_prefix(header), to_set); + return db->submit_transaction(t); +} + +int DBObjectMapv0::remove_xattrs(const hobject_t &hoid, + Index index, + const set &to_remove) +{ + KeyValueDB::Transaction t = db->get_transaction(); + Header header = lookup_map_header(index->coll(), hoid); + if (!header) + return -ENOENT; + t->rmkeys(xattr_prefix(header), to_remove); + return db->submit_transaction(t); +} + +int DBObjectMapv0::clone(const hobject_t &hoid, + Index index, + const hobject_t &target, + Index target_index) +{ + assert(index->coll() != target_index->coll() || + hoid != target); + KeyValueDB::Transaction t = db->get_transaction(); + { + Header destination = lookup_map_header(target_index->coll(), target); + if (destination) { + remove_map_header(target_index->coll(), target, destination, t); + destination->num_children--; + _clear(destination, t); + } + } + + Header parent = lookup_map_header(index->coll(), hoid); + if (!parent) + return db->submit_transaction(t); + + Header source = generate_new_header(index->coll(), hoid, parent); + Header destination = generate_new_header(target_index->coll(), target, parent); + _Header lsource, ldestination; + source->num_children = parent->num_children; + lsource.parent = source->seq; + ldestination.parent = destination->seq; + parent->num_children = 2; + set_header(parent, t); + set_header(source, t); + set_header(destination, t); + set_map_header(target_index->coll(), target, ldestination, t); + + map to_set; + KeyValueDB::Iterator xattr_iter = db->get_iterator(xattr_prefix(parent)); + for (xattr_iter->seek_to_first(); + xattr_iter->valid(); + xattr_iter->next()) + to_set.insert(make_pair(xattr_iter->key(), xattr_iter->value())); + t->set(xattr_prefix(source), to_set); + t->set(xattr_prefix(destination), to_set); + t->rmkeys_by_prefix(xattr_prefix(parent)); + + string hkey(header_key(parent->seq)); + KeyValueDB::Iterator iter = db->get_iterator(REVERSE_LEAF_PREFIX); + iter->lower_bound(hkey); + assert(iter->valid()); + assert(string(iter->key(), 0, hkey.size()) == hkey); + for (; iter->valid() && string(iter->key(), 0, hkey.size()) == hkey; + iter->next()) { + pair p; + bufferlist bl = iter->value(); + bufferlist::iterator bliter = bl.begin(); + ::decode(p, bliter); + remove_map_header(p.first, p.second, parent, t); + set_map_header(p.first, p.second, lsource , t); + } + return db->submit_transaction(t); +} + +int DBObjectMapv0::link(const hobject_t &hoid, + Index index, + const hobject_t &target, + Index target_index) +{ + assert(index->coll() != target_index->coll() || + hoid != target); + KeyValueDB::Transaction t = db->get_transaction(); + { + Header destination = lookup_map_header(target_index->coll(), target); + if (destination) { + remove_map_header(target_index->coll(), target, destination, t); + destination->num_children--; + _clear(destination, t); + } + } + Header header = lookup_create_map_header(index->coll(), hoid, t); + + assert(header->num_children > 0); + header->num_children++; + set_header(header, t); + _Header ldestination; + ldestination.parent = header->seq; + set_map_header(target_index->coll(), target, ldestination, t); + return db->submit_transaction(t); +} + +int DBObjectMapv0::init() { + State state; + map result; + set to_get; + to_get.insert(GLOBAL_STATE_KEY); + int r = db->get(SYS_PREFIX, to_get, &result); + if (r < 0) + return r; + if (result.size()) { + bufferlist::iterator bliter = result.begin()->second.begin(); + state.decode(bliter); + next_seq = state.seq; + } + dout(20) << "(init)dbobjectmap: seq is " << next_seq << dendl; + return 0; +} + +int DBObjectMapv0::sync() { + return write_state(true); +} + +int DBObjectMapv0::write_state(bool sync) { + dout(20) << "dbobjectmap: seq is " << next_seq << dendl; + KeyValueDB::Transaction t = db->get_transaction(); + State state; + state.seq = next_seq; + bufferlist bl; + state.encode(bl); + map to_write; + to_write[GLOBAL_STATE_KEY] = bl; + t->set(SYS_PREFIX, to_write); + return sync ? db->submit_transaction_sync(t) : db->submit_transaction(t); +} + + +DBObjectMapv0::Header DBObjectMapv0::lookup_map_header(coll_t c, const hobject_t &hoid) +{ + Mutex::Locker l(header_lock); + _Header lheader; + while (true) { + map out; + set keys; + keys.insert(map_header_key(c, hoid)); + int r = db->get(LEAF_PREFIX, keys, &out); + if (r < 0) + return Header(); + if (out.size() < 1) + return Header(); + bufferlist::iterator iter = out.begin()->second.begin(); + lheader.decode(iter); + + if (in_use.count(lheader.parent)) { + header_cond.Wait(header_lock); + continue; + } + in_use.insert(lheader.parent); + break; + } + + dout(20) << "lookup_map_header: parent seq is " << lheader.parent + << " for hoid " << hoid << dendl; + map out; + set keys; + keys.insert(HEADER_KEY); + int r = db->get(sys_parent_prefix(lheader), keys, &out); + if (r < 0) + return Header(); + assert(out.size()); + + Header header = Header(new _Header(), RemoveOnDelete(this)); + header->seq = lheader.parent; + + bufferlist::iterator iter = out.begin()->second.begin(); + header->decode(iter); + return header; +} + +DBObjectMapv0::Header DBObjectMapv0::generate_new_header(coll_t c, const hobject_t &hoid, + Header parent) +{ + Mutex::Locker l(header_lock); + Header header = Header(new _Header(), RemoveOnDelete(this)); + header->seq = next_seq++; + if (parent) + header->parent = parent->seq; + header->num_children = 1; + header->c = c; + header->hoid = hoid; + assert(!in_use.count(header->seq)); + in_use.insert(header->seq); + + write_state(); + return header; +} + +DBObjectMapv0::Header DBObjectMapv0::lookup_parent(Header input) +{ + Mutex::Locker l(header_lock); + while (in_use.count(input->parent)) + header_cond.Wait(header_lock); + map out; + set keys; + keys.insert(HEADER_KEY); + + dout(20) << "lookup_parent: parent " << input->parent + << " for seq " << input->seq << dendl; + int r = db->get(sys_parent_prefix(input), keys, &out); + if (r < 0) { + assert(0); + return Header(); + } + if (out.size() < 1) { + assert(0); + return Header(); + } + + Header header = Header(new _Header(), RemoveOnDelete(this)); + header->seq = input->parent; + bufferlist::iterator iter = out.begin()->second.begin(); + header->decode(iter); + dout(20) << "lookup_parent: parent seq is " << header->seq << " with parent " + << header->parent << dendl; + in_use.insert(header->seq); + return header; +} + +DBObjectMapv0::Header DBObjectMapv0::lookup_create_map_header(coll_t c, const hobject_t &hoid, + KeyValueDB::Transaction t) +{ + Header header = lookup_map_header(c, hoid); + if (!header) { + header = generate_new_header(c, hoid, Header()); + set_header(header, t); + _Header lheader; + lheader.parent = header->seq; + set_map_header(c, hoid, lheader, t); + } + return header; +} + +void DBObjectMapv0::clear_header(Header header, KeyValueDB::Transaction t) +{ + dout(20) << "clear_header: clearing seq " << header->seq << dendl; + t->rmkeys_by_prefix(user_prefix(header)); + t->rmkeys_by_prefix(sys_prefix(header)); + t->rmkeys_by_prefix(complete_prefix(header)); + t->rmkeys_by_prefix(xattr_prefix(header)); + set keys; + keys.insert(header_key(header->seq)); + t->rmkeys(USER_PREFIX, keys); +} + +void DBObjectMapv0::set_header(Header header, KeyValueDB::Transaction t) +{ + dout(20) << "set_header: setting seq " << header->seq << dendl; + map to_write; + header->encode(to_write[HEADER_KEY]); + t->set(sys_prefix(header), to_write); +} + +void DBObjectMapv0::remove_map_header(coll_t c, const hobject_t &hoid, + Header header, + KeyValueDB::Transaction t) +{ + dout(20) << "remove_map_header: removing " << header->seq + << " hoid " << hoid << dendl; + set to_remove; + to_remove.insert(map_header_key(c, hoid)); + t->rmkeys(LEAF_PREFIX, to_remove); + to_remove.clear(); + to_remove.insert(header_key(header->seq) + + map_header_key(c, hoid)); + t->rmkeys(REVERSE_LEAF_PREFIX, to_remove); +} + +void DBObjectMapv0::set_map_header(coll_t c, const hobject_t &hoid, _Header header, + KeyValueDB::Transaction t) +{ + dout(20) << "set_map_header: setting " << header.seq + << " hoid " << hoid << " parent seq " + << header.parent << dendl; + map to_set; + header.encode(to_set[map_header_key(c, hoid)]); + t->set(LEAF_PREFIX, to_set); + to_set.clear(); + ::encode(make_pair(c, hoid), to_set[ + header_key(header.parent) + + map_header_key(c, hoid)]); + t->set(REVERSE_LEAF_PREFIX, to_set); +} diff --git a/src/test/ObjectMap/DBObjectMap_v0.h b/src/test/ObjectMap/DBObjectMap_v0.h new file mode 100644 index 0000000000000..e54eac0f6edf7 --- /dev/null +++ b/src/test/ObjectMap/DBObjectMap_v0.h @@ -0,0 +1,454 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#ifndef DBOBJECTMAPV0_DB_H +#define DBOBJECTMAPV0_DB_H + +#include "include/buffer.h" +#include +#include +#include + +#include +#include +#include +#include + +#include "os/IndexManager.h" +#include "os/ObjectMap.h" +#include "os/KeyValueDB.h" +#include "osd/osd_types.h" +#include "common/Mutex.h" +#include "common/Cond.h" + +/** + * DBObjectMapv0: Implements ObjectMap in terms of KeyValueDB + * + * Prefix space structure: + * + * @see complete_prefix + * @see user_prefix + * @see sys_prefix + * + * - LEAF_PREFIX: Contains mapping from (coll_t,hobject_t)->seq + * - REVERSE_LEAF_PREFIX: Contains mapping from seq->[(coll_t, hobject_t)] + * @see set_map_header + * @see remove_map_header + * - SYS_PREFIX: GLOBAL_STATE_KEY - contains next seq number + * @see State + * @see write_state + * @see init + * @see generate_new_header + * - USER_PREFIX + header_key(header->seq) + USER_PREFIX + * : key->value for header->seq + * - USER_PREFIX + header_key(header->seq) + COMPLETE_PREFIX: see below + * - USER_PREFIX + header_key(header->seq) + XATTR_PREFIX: xattrs + * - USER_PREFIX + header_key(header->seq) + SYS_PREFIX + * : USER_HEADER_KEY - omap header for header->seq + * : HEADER_KEY - encoding of header for header->seq + * + * For each node (represented by a header, not counting LEAF_PREFIX space), we + * store three mappings: the key mapping, the complete mapping, and the parent. + * The complete mapping (COMPLETE_PREFIX space) is key->key. Each x->y entry in + * this mapping indicates that the key mapping contains all entries on [x,y). + * Note, max string is represented by "", so ""->"" indicates that the parent + * is unnecessary (@see rm_keys). When looking up a key not contained in the + * the complete set, we have to check the parent if we don't find it in the + * key set. During rm_keys, we copy keys from the parent and update the + * complete set to reflect the change @see rm_keys. + */ +class DBObjectMapv0 : public ObjectMap { +public: + boost::scoped_ptr db; + + /** + * Each header has a unique id. This value is updated and written *before* + * the transaction writing th new header + */ + uint64_t next_seq; + + /** + * Serializes access to next_seq as well as the in_use set + */ + Mutex header_lock; + Cond header_cond; + + /** + * Set of headers currently in use + */ + set in_use; + + DBObjectMapv0(KeyValueDB *db) : db(db), next_seq(1), + header_lock("DBOBjectMap") + {} + + int set_keys( + const hobject_t &hoid, + Index index, + const map &set + ); + + int set_header( + const hobject_t &hoid, + Index index, + const bufferlist &bl + ); + + int get_header( + const hobject_t &hoid, + Index index, + bufferlist *bl + ); + + int clear( + const hobject_t &hoid, + Index index + ); + + int rm_keys( + const hobject_t &hoid, + Index index, + const set &to_clear + ); + + int get( + const hobject_t &hoid, + Index index, + bufferlist *header, + map *out + ); + + int get_keys( + const hobject_t &hoid, + Index index, + set *keys + ); + + int get_values( + const hobject_t &hoid, + Index index, + const set &keys, + map *out + ); + + int check_keys( + const hobject_t &hoid, + Index index, + const set &keys, + set *out + ); + + int get_xattrs( + const hobject_t &hoid, + Index index, + const set &to_get, + map *out + ); + + int get_all_xattrs( + const hobject_t &hoid, + Index index, + set *out + ); + + int set_xattrs( + const hobject_t &hoid, + Index index, + const map &to_set + ); + + int remove_xattrs( + const hobject_t &hoid, + Index index, + const set &to_remove + ); + + int clone( + const hobject_t &hoid, + Index index, + const hobject_t &target, + Index target_index + ); + + int link( + const hobject_t &hoid, + Index index, + const hobject_t &target, + Index target_index + ); + + /// Read initial state from backing store + int init(); + + /// Consistency check, debug, there must be no parallel writes + bool check(std::ostream &out); + + /// Ensure that all previous operations are durable + int sync(); + + ObjectMapIterator get_iterator(const hobject_t &hoid, + Index index); + + static const string USER_PREFIX; + static const string XATTR_PREFIX; + static const string SYS_PREFIX; + static const string COMPLETE_PREFIX; + static const string HEADER_KEY; + static const string USER_HEADER_KEY; + static const string LEAF_PREFIX; + static const string GLOBAL_STATE_KEY; + static const string REVERSE_LEAF_PREFIX; + + /// persistent state for store @see generate_header + struct State { + uint64_t seq; + State() : seq(0) {} + State(uint64_t seq) : seq(seq) {} + + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + ::encode(seq, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::iterator &bl) { + DECODE_START(1, bl); + ::decode(seq, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const { + f->dump_unsigned("seq", seq); + } + + static void generate_test_instances(list &o) { + o.push_back(new State(0)); + o.push_back(new State(20)); + } + }; + + struct _Header { + uint64_t seq; + uint64_t parent; + uint64_t num_children; + + coll_t c; + hobject_t hoid; + + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + ::encode(seq, bl); + ::encode(parent, bl); + ::encode(num_children, bl); + ::encode(c, bl); + ::encode(hoid, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::iterator &bl) { + DECODE_START(1, bl); + ::decode(seq, bl); + ::decode(parent, bl); + ::decode(num_children, bl); + ::decode(c, bl); + ::decode(hoid, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const { + f->dump_unsigned("seq", seq); + f->dump_unsigned("parent", parent); + f->dump_unsigned("num_children", num_children); + f->dump_stream("coll") << c; + f->dump_stream("oid") << hoid; + } + + static void generate_test_instances(list<_Header*> &o) { + o.push_back(new _Header); + o.push_back(new _Header); + o.back()->parent = 20; + o.back()->seq = 30; + } + + _Header() : seq(0), parent(0), num_children(1) {} + }; + +private: + /// Implicit lock on Header->seq + typedef std::tr1::shared_ptr<_Header> Header; + + /// String munging + string hobject_key(coll_t c, const hobject_t &hoid); + string map_header_key(coll_t c, const hobject_t &hoid); + string header_key(uint64_t seq); + string complete_prefix(Header header); + string user_prefix(Header header); + string sys_prefix(Header header); + string xattr_prefix(Header header); + string sys_parent_prefix(_Header header); + string sys_parent_prefix(Header header) { + return sys_parent_prefix(*header); + } + + class EmptyIteratorImpl : public ObjectMapIteratorImpl { + public: + int seek_to_first() { return 0; } + int seek_to_last() { return 0; } + int upper_bound(const string &after) { return 0; } + int lower_bound(const string &to) { return 0; } + bool valid() { return false; } + int next() { assert(0); return 0; } + string key() { assert(0); return ""; } + bufferlist value() { assert(0); return bufferlist(); } + int status() { return 0; } + }; + + + /// Iterator + class DBObjectMapv0IteratorImpl : public ObjectMapIteratorImpl { + public: + DBObjectMapv0 *map; + + /// NOTE: implicit lock on header->seq AND for all ancestors + Header header; + + /// parent_iter == NULL iff no parent + std::tr1::shared_ptr parent_iter; + KeyValueDB::Iterator key_iter; + KeyValueDB::Iterator complete_iter; + + /// cur_iter points to currently valid iterator + std::tr1::shared_ptr cur_iter; + int r; + + /// init() called, key_iter, complete_iter, parent_iter filled in + bool ready; + /// past end + bool invalid; + + DBObjectMapv0IteratorImpl(DBObjectMapv0 *map, Header header) : + map(map), header(header), r(0), ready(false), invalid(true) {} + int seek_to_first(); + int seek_to_last(); + int upper_bound(const string &after); + int lower_bound(const string &to); + bool valid(); + int next(); + string key(); + bufferlist value(); + int status(); + + bool on_parent() { + return cur_iter == parent_iter; + } + + /// skips to next valid parent entry + int next_parent(); + + /// Tests whether to_test is in complete region + int in_complete_region(const string &to_test, ///< [in] key to test + string *begin, ///< [out] beginning of region + string *end ///< [out] end of region + ); ///< @returns true if to_test is in the complete region, else false + + private: + int init(); + bool valid_parent(); + int adjust(); + }; + + typedef std::tr1::shared_ptr DBObjectMapv0Iterator; + DBObjectMapv0Iterator _get_iterator(Header header) { + return DBObjectMapv0Iterator(new DBObjectMapv0IteratorImpl(this, header)); + } + + /// sys + + /// Removes node corresponding to header + void clear_header(Header header, KeyValueDB::Transaction t); + + /// Set node containing input to new contents + void set_header(Header input, KeyValueDB::Transaction t); + + /// Remove leaf node corresponding to hoid in c + void remove_map_header(coll_t c, const hobject_t &hoid, + Header header, + KeyValueDB::Transaction t); + + /// Set leaf node for c and hoid to the value of header + void set_map_header(coll_t c, const hobject_t &hoid, _Header header, + KeyValueDB::Transaction t); + + /// Lookup or create header for c hoid + Header lookup_create_map_header(coll_t c, const hobject_t &hoid, + KeyValueDB::Transaction t); + + /** + * Generate new header for c hoid with new seq number + * + * Has the side effect of syncronously saving the new DBObjectMapv0 state + */ + Header generate_new_header(coll_t c, const hobject_t &hoid, Header parent); + + /// Lookup leaf header for c hoid + Header lookup_map_header(coll_t c, const hobject_t &hoid); + + /// Lookup header node for input + Header lookup_parent(Header input); + + + /// Helpers + int _get_header(Header header, bufferlist *bl); + + /// Scan keys in header into out_keys and out_values (if nonnull) + int scan(Header header, + const set &in_keys, + set *out_keys, + map *out_values); + + /// Remove header and all related prefixes + int _clear(Header header, + KeyValueDB::Transaction t); + /// Adds to t operations necessary to add new_complete to the complete set + int merge_new_complete(Header header, + const map &new_complete, + DBObjectMapv0Iterator iter, + KeyValueDB::Transaction t); + + /// Writes out State (mainly next_seq) + int write_state(bool sync = false); + + /// 0 if the complete set now contains all of key space, < 0 on error, 1 else + int need_parent(DBObjectMapv0Iterator iter); + + /// Copies header entry from parent @see rm_keys + int copy_up_header(Header header, + KeyValueDB::Transaction t); + + /// Sets header @see set_header + void _set_header(Header header, const bufferlist &bl, + KeyValueDB::Transaction t); + + + /** + * Removes header seq lock once Header is out of scope + * @see lookup_parent + * @see generate_new_header + */ + class RemoveOnDelete { + public: + DBObjectMapv0 *db; + uint64_t seq; + RemoveOnDelete(DBObjectMapv0 *db) : + db(db), seq(seq) {} + void operator() (_Header *header) { + Mutex::Locker l(db->header_lock); + db->in_use.erase(header->seq); + db->header_cond.Signal(); + delete header; + } + }; + friend class RemoveOnDelete; +}; +WRITE_CLASS_ENCODER(DBObjectMapv0::_Header) +WRITE_CLASS_ENCODER(DBObjectMapv0::State) + +#endif diff --git a/src/test/ObjectMap/test_object_map.cc b/src/test/ObjectMap/test_object_map.cc index ac551f5dd1bb3..a00a742e93694 100644 --- a/src/test/ObjectMap/test_object_map.cc +++ b/src/test/ObjectMap/test_object_map.cc @@ -9,6 +9,7 @@ #include "test/ObjectMap/KeyValueDBMemory.h" #include "os/KeyValueDB.h" #include "os/DBObjectMap.h" +#include "DBObjectMap_v0.h" #include "os/HashIndex.h" #include "os/LevelDBStore.h" #include -- 2.39.5