--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "SnapMapper.h"
+
+using std::string;
+
+const string SnapMapper::MAPPING_PREFIX = "MAP_";
+const string SnapMapper::OBJECT_PREFIX = "OBJ_";
+
+int OSDriver::get_keys(
+ const std::set<std::string> &keys,
+ std::map<std::string, bufferlist> *out)
+{
+ return os->omap_get_values(cid, hoid, keys, out);
+}
+
+int OSDriver::get_next(
+ const std::string &key,
+ pair<std::string, bufferlist> *next)
+{
+ ObjectMap::ObjectMapIterator iter =
+ os->get_omap_iterator(cid, hoid);
+ if (!iter) {
+ assert(0);
+ return -EINVAL;
+ }
+ iter->upper_bound(key);
+ if (iter->valid()) {
+ if (next)
+ *next = make_pair(iter->key(), iter->value());
+ return 0;
+ } else {
+ return -ENOENT;
+ }
+}
+
+struct Mapping {
+ snapid_t snap;
+ hobject_t hoid;
+ Mapping(const pair<snapid_t, hobject_t> &in)
+ : snap(in.first), hoid(in.second) {}
+ Mapping() : snap(0) {}
+ void encode(bufferlist &bl) const {
+ ENCODE_START(1, 1, bl);
+ ::encode(snap, bl);
+ ::encode(hoid, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::iterator &bl) {
+ DECODE_START(1, bl);
+ ::decode(snap, bl);
+ ::decode(hoid, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(Mapping);
+
+string SnapMapper::get_prefix(snapid_t snap)
+{
+ char buf[100];
+ int len = snprintf(
+ buf, sizeof(buf),
+ "%.*X_", (int)(sizeof(snap)*2),
+ static_cast<unsigned>(snap));
+ return MAPPING_PREFIX + string(buf, len);
+}
+
+string SnapMapper::to_raw_key(
+ const pair<snapid_t, hobject_t> &in)
+{
+ return get_prefix(in.first) + in.second.to_str();
+}
+
+pair<string, bufferlist> SnapMapper::to_raw(
+ const pair<snapid_t, hobject_t> &in)
+{
+ bufferlist bl;
+ ::encode(Mapping(in), bl);
+ return make_pair(
+ to_raw_key(in),
+ bl);
+}
+
+pair<snapid_t, hobject_t> SnapMapper::from_raw(
+ const pair<std::string, bufferlist> &image)
+{
+ Mapping map;
+ bufferlist bl(image.second);
+ bufferlist::iterator bp(bl.begin());
+ ::decode(map, bp);
+ return make_pair(map.snap, map.hoid);
+}
+
+bool SnapMapper::is_mapping(const string &to_test)
+{
+ return to_test.substr(0, MAPPING_PREFIX.size()) == MAPPING_PREFIX;
+}
+
+string SnapMapper::to_object_key(const hobject_t &hoid)
+{
+ return OBJECT_PREFIX + hoid.to_str();
+}
+
+void SnapMapper::object_snaps::encode(bufferlist &bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ::encode(oid, bl);
+ ::encode(snaps, bl);
+ ENCODE_FINISH(bl);
+}
+
+void SnapMapper::object_snaps::decode(bufferlist::iterator &bl)
+{
+ DECODE_START(1, bl);
+ ::decode(oid, bl);
+ ::decode(snaps, bl);
+ DECODE_FINISH(bl);
+}
+
+int SnapMapper::get_snaps(
+ const hobject_t &oid,
+ object_snaps *out)
+{
+ assert(check(oid));
+ set<string> keys;
+ map<string, bufferlist> got;
+ keys.insert(to_object_key(oid));
+ int r = backend.get_keys(keys, &got);
+ if (r < 0)
+ return r;
+ if (got.size() == 0)
+ return -ENOENT;
+ if (out) {
+ bufferlist::iterator bp = got.begin()->second.begin();
+ ::decode(*out, bp);
+ assert(!out->snaps.empty());
+ }
+ return 0;
+}
+
+void SnapMapper::clear_snaps(
+ const hobject_t &oid,
+ MapCacher::Transaction<std::string, bufferlist> *t)
+{
+ assert(check(oid));
+ set<string> to_remove;
+ to_remove.insert(to_object_key(oid));
+ backend.remove_keys(to_remove, t);
+}
+
+void SnapMapper::set_snaps(
+ const hobject_t &oid,
+ const object_snaps &in,
+ MapCacher::Transaction<std::string, bufferlist> *t)
+{
+ assert(check(oid));
+ map<string, bufferlist> to_set;
+ bufferlist bl;
+ ::encode(in, bl);
+ to_set[to_object_key(oid)] = bl;
+ backend.set_keys(to_set, t);
+}
+
+int SnapMapper::update_snaps(
+ const hobject_t &oid,
+ const set<snapid_t> &new_snaps,
+ const set<snapid_t> *old_snaps_check,
+ MapCacher::Transaction<std::string, bufferlist> *t)
+{
+ assert(check(oid));
+ if (new_snaps.empty())
+ return remove_oid(oid, t);
+
+ object_snaps out;
+ int r = get_snaps(oid, &out);
+ if (r < 0)
+ return r;
+ if (old_snaps_check)
+ assert(out.snaps == *old_snaps_check);
+
+ object_snaps in(oid, new_snaps);
+ set_snaps(oid, in, t);
+
+ set<string> to_remove;
+ for (set<snapid_t>::iterator i = out.snaps.begin();
+ i != out.snaps.end();
+ ++i) {
+ if (!new_snaps.count(*i)) {
+ to_remove.insert(to_raw_key(make_pair(*i, oid)));
+ }
+ }
+ backend.remove_keys(to_remove, t);
+ return 0;
+}
+
+void SnapMapper::add_oid(
+ const hobject_t &oid,
+ set<snapid_t> snaps,
+ MapCacher::Transaction<std::string, bufferlist> *t)
+{
+ assert(check(oid));
+ {
+ object_snaps out;
+ int r = get_snaps(oid, &out);
+ assert(r == -ENOENT);
+ }
+
+ object_snaps _snaps(oid, snaps);
+ set_snaps(oid, _snaps, t);
+
+ map<string, bufferlist> to_add;
+ for (set<snapid_t>::iterator i = snaps.begin();
+ i != snaps.end();
+ ++i) {
+ to_add.insert(to_raw(make_pair(*i, oid)));
+ }
+ backend.set_keys(to_add, t);
+}
+
+int SnapMapper::get_next_object_to_trim(
+ snapid_t snap,
+ hobject_t *hoid)
+{
+ for (set<string>::iterator i = prefixes.begin();
+ i != prefixes.end();
+ ++i) {
+ string list_after(get_prefix(snap) + *i);
+
+ pair<string, bufferlist> next;
+ int r = backend.get_next(list_after, &next);
+ if (r < 0) {
+ break; // Done
+ }
+
+ if (next.first.substr(0, list_after.size()) !=
+ list_after) {
+ continue; // Done with this prefix
+ }
+
+ assert(is_mapping(next.first));
+
+ pair<snapid_t, hobject_t> next_decoded(from_raw(next));
+ assert(next_decoded.first == snap);
+ assert(check(next_decoded.second));
+
+ if (hoid)
+ *hoid = next_decoded.second;
+ return 0;
+ }
+ return -ENOENT;
+}
+
+
+int SnapMapper::remove_oid(
+ const hobject_t &oid,
+ MapCacher::Transaction<std::string, bufferlist> *t)
+{
+ assert(check(oid));
+ return _remove_oid(oid, t);
+}
+
+int SnapMapper::_remove_oid(
+ const hobject_t &oid,
+ MapCacher::Transaction<std::string, bufferlist> *t)
+{
+ object_snaps out;
+ int r = get_snaps(oid, &out);
+ if (r < 0)
+ return r;
+
+ clear_snaps(oid, t);
+
+ set<string> to_remove;
+ for (set<snapid_t>::iterator i = out.snaps.begin();
+ i != out.snaps.end();
+ ++i) {
+ to_remove.insert(to_raw_key(make_pair(*i, oid)));
+ }
+ backend.remove_keys(to_remove, t);
+ return 0;
+}
+
+int SnapMapper::get_snaps(
+ const hobject_t &oid,
+ std::set<snapid_t> *snaps)
+{
+ assert(check(oid));
+ object_snaps out;
+ int r = get_snaps(oid, &out);
+ if (r < 0)
+ return r;
+ if (snaps)
+ snaps->swap(out.snaps);
+ return 0;
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef SNAPMAPPER_H
+#define SNAPMAPPER_H
+
+#include <string>
+#include <set>
+#include <utility>
+#include <string.h>
+
+#include "common/map_cacher.hpp"
+#include "os/hobject.h"
+#include "include/buffer.h"
+#include "include/encoding.h"
+#include "include/object.h"
+#include "os/ObjectStore.h"
+
+class OSDriver : public MapCacher::StoreDriver<std::string, bufferlist> {
+ ObjectStore *os;
+ coll_t cid;
+ hobject_t hoid;
+
+public:
+ class OSTransaction : public MapCacher::Transaction<std::string, bufferlist> {
+ friend class OSDriver;
+ coll_t cid;
+ hobject_t hoid;
+ ObjectStore::Transaction *t;
+ OSTransaction(
+ coll_t cid,
+ const hobject_t &hoid,
+ ObjectStore::Transaction *t)
+ : cid(cid), hoid(hoid), t(t) {}
+ public:
+ void set_keys(
+ const std::map<std::string, bufferlist> &to_set) {
+ t->omap_setkeys(cid, hoid, to_set);
+ }
+ void remove_keys(
+ const std::set<std::string> &to_remove) {
+ t->omap_rmkeys(cid, hoid, to_remove);
+ }
+ void add_callback(
+ Context *c) {
+ t->register_on_applied(c);
+ }
+ };
+
+ OSTransaction get_transaction(
+ ObjectStore::Transaction *t) {
+ return OSTransaction(cid, hoid, t);
+ }
+
+ OSDriver(ObjectStore *os, coll_t cid, const hobject_t &hoid) :
+ os(os), cid(cid), hoid(hoid) {}
+ int get_keys(
+ const std::set<std::string> &keys,
+ std::map<std::string, bufferlist> *out);
+ int get_next(
+ const std::string &key,
+ pair<std::string, bufferlist> *next);
+};
+
+/**
+ * SnapMapper
+ *
+ * Manages two mappings:
+ * 1) hobject_t -> {snapid}
+ * 2) snapid -> {hobject_t}
+ *
+ * We accomplish this using two sets of keys:
+ * 1) OBJECT_PREFIX + obj.str() -> encoding of object_snaps
+ * 2) MAPPING_PREFIX + snapid_t + obj.str() -> encoding of pair<snapid_t, obj>
+ *
+ * The on disk strings and encodings are implemented in to_raw, to_raw_key,
+ * from_raw, to_object_key.
+ *
+ * The object -> {snapid} mapping is primarily included so that the
+ * SnapMapper state can be verified against the external PG state during
+ * scrub etc.
+ *
+ * The 2) mapping is arranged such that all objects in a particular
+ * snap will sort together, and so that all objects in a pg for a
+ * particular snap will group under up to 8 prefixes.
+ */
+class SnapMapper {
+public:
+ struct object_snaps {
+ hobject_t oid;
+ std::set<snapid_t> snaps;
+ object_snaps(hobject_t oid, const std::set<snapid_t> &snaps)
+ : oid(oid), snaps(snaps) {}
+ object_snaps() {}
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bp);
+ };
+
+private:
+ MapCacher::MapCacher<std::string, bufferlist> backend;
+
+ static const std::string MAPPING_PREFIX;
+ static const std::string OBJECT_PREFIX;
+
+ static std::string get_prefix(snapid_t snap);
+
+ static std::string to_raw_key(
+ const std::pair<snapid_t, hobject_t> &to_map);
+
+ static std::pair<std::string, bufferlist> to_raw(
+ const std::pair<snapid_t, hobject_t> &to_map);
+
+ static bool is_mapping(const std::string &to_test);
+
+ std::pair<snapid_t, hobject_t> from_raw(
+ const std::pair<std::string, bufferlist> &image);
+
+ std::string to_object_key(const hobject_t &hoid);
+
+ int get_snaps(const hobject_t &oid, object_snaps *out);
+
+ void set_snaps(
+ const hobject_t &oid,
+ const object_snaps &out,
+ MapCacher::Transaction<std::string, bufferlist> *t);
+
+ void clear_snaps(
+ const hobject_t &oid,
+ MapCacher::Transaction<std::string, bufferlist> *t);
+
+ // True if hoid belongs in this mapping based on mask_bits and match
+ bool check(const hobject_t &hoid) const {
+ return hoid.match(mask_bits, match);
+ }
+
+ int _remove_oid(
+ const hobject_t &oid, ///< [in] oid to remove
+ MapCacher::Transaction<std::string, bufferlist> *t ///< [out] transaction
+ );
+
+public:
+ uint32_t mask_bits;
+ const uint32_t match;
+ string last_key_checked;
+ const int64_t pool;
+ SnapMapper(
+ MapCacher::StoreDriver<std::string, bufferlist> *driver,
+ uint32_t match, ///< [in] pgid
+ uint32_t bits, ///< [in] current split bits
+ int64_t pool ///< [in] pool
+ )
+ : backend(driver), mask_bits(bits), match(match), pool(pool) {
+ update_bits(mask_bits);
+ }
+
+ set<string> prefixes;
+ /// Update bits in case of pg split
+ void update_bits(
+ uint32_t new_bits ///< [in] new split bits
+ ) {
+ assert(new_bits >= mask_bits);
+ mask_bits = new_bits;
+ prefixes = hobject_t::get_prefixes(
+ mask_bits,
+ match,
+ pool);
+ }
+
+ /// Update snaps for oid, empty new_snaps removes the mapping
+ int update_snaps(
+ const hobject_t &oid, ///< [in] oid to update
+ const std::set<snapid_t> &new_snaps, ///< [in] new snap set
+ const std::set<snapid_t> *old_snaps, ///< [in] old snaps (for debugging)
+ MapCacher::Transaction<std::string, bufferlist> *t ///< [out] transaction
+ ); ///@ return error, 0 on success
+
+ /// Add mapping for oid, must not already be mapped
+ void add_oid(
+ const hobject_t &oid, ///< [in] oid to add
+ std::set<snapid_t> new_snaps, ///< [in] snaps
+ MapCacher::Transaction<std::string, bufferlist> *t ///< [out] transaction
+ );
+
+ /// Returns first object with snap as a snap
+ int get_next_object_to_trim(
+ snapid_t snap, ///< [in] snap to check
+ hobject_t *hoid ///< [out] next hoid to trim
+ ); ///< @return error, -ENOENT if no more objects
+
+ /// Remove mapping for oid
+ int remove_oid(
+ const hobject_t &oid, ///< [in] oid to remove
+ MapCacher::Transaction<std::string, bufferlist> *t ///< [out] transaction
+ ); ///< @return error, -ENOENT if the object is not mapped
+
+ /// Get snaps for oid
+ int get_snaps(
+ const hobject_t &oid, ///< [in] oid to get snaps for
+ std::set<snapid_t> *snaps ///< [out] snaps
+ ); ///< @return error, -ENOENT if oid is not recorded
+};
+WRITE_CLASS_ENCODER(SnapMapper::object_snaps)
+
+#endif