--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <boost/variant.hpp>
+#include <boost/optional.hpp>
+#include <iostream>
+#include <vector>
+#include <sstream>
+
+#include "ECBackend.h"
+#include "ECUtil.h"
+#include "os/ObjectStore.h"
+
+struct AppendObjectsGenerator: public boost::static_visitor<void> {
+ typedef void result_type;
+ set<hobject_t> *out;
+ AppendObjectsGenerator(set<hobject_t> *out) : out(out) {}
+ void operator()(const ECTransaction::AppendOp &op) {
+ out->insert(op.oid);
+ }
+ void operator()(const ECTransaction::TouchOp &op) {}
+ void operator()(const ECTransaction::CloneOp &op) {
+ out->insert(op.source);
+ out->insert(op.target);
+ }
+ void operator()(const ECTransaction::RenameOp &op) {
+ out->insert(op.source);
+ out->insert(op.destination);
+ }
+ void operator()(const ECTransaction::StashOp &op) {}
+ void operator()(const ECTransaction::RemoveOp &op) {}
+ void operator()(const ECTransaction::SetAttrsOp &op) {}
+ void operator()(const ECTransaction::RmAttrOp &op) {}
+ void operator()(const ECTransaction::NoOp &op) {}
+};
+void ECTransaction::get_append_objects(
+ set<hobject_t> *out) const
+{
+ AppendObjectsGenerator gen(out);
+ reverse_visit(gen);
+}
+
+struct TransGenerator : public boost::static_visitor<void> {
+ typedef void result_type;
+ map<hobject_t, ECUtil::HashInfoRef> &hash_infos;
+
+ ErasureCodeInterfaceRef &ecimpl;
+ const pg_t pgid;
+ const ECUtil::stripe_info_t sinfo;
+ map<shard_id_t, ObjectStore::Transaction> *trans;
+ set<int> want;
+ set<hobject_t> *temp_added;
+ set<hobject_t> *temp_removed;
+ stringstream *out;
+ TransGenerator(
+ map<hobject_t, ECUtil::HashInfoRef> &hash_infos,
+ ErasureCodeInterfaceRef &ecimpl,
+ pg_t pgid,
+ const ECUtil::stripe_info_t &sinfo,
+ map<shard_id_t, ObjectStore::Transaction> *trans,
+ set<hobject_t> *temp_added,
+ set<hobject_t> *temp_removed,
+ stringstream *out)
+ : hash_infos(hash_infos),
+ ecimpl(ecimpl), pgid(pgid),
+ sinfo(sinfo),
+ trans(trans),
+ temp_added(temp_added), temp_removed(temp_removed),
+ out(out) {
+ for (unsigned i = 0; i < ecimpl->get_chunk_count(); ++i) {
+ want.insert(i);
+ }
+ }
+
+ coll_t get_coll_ct(shard_id_t shard, const hobject_t &hoid) {
+ if (hoid.is_temp()) {
+ temp_removed->erase(hoid);
+ temp_added->insert(hoid);
+ }
+ return get_coll(shard, hoid);
+ }
+ coll_t get_coll_rm(shard_id_t shard, const hobject_t &hoid) {
+ if (hoid.is_temp()) {
+ temp_added->erase(hoid);
+ temp_removed->insert(hoid);
+ }
+ return get_coll(shard, hoid);
+ }
+ coll_t get_coll(shard_id_t shard, const hobject_t &hoid) {
+ if (hoid.is_temp())
+ return coll_t::make_temp_coll(spg_t(pgid, shard));
+ else
+ return coll_t(spg_t(pgid, shard));
+ }
+
+ void operator()(const ECTransaction::TouchOp &op) {
+ for (map<shard_id_t, ObjectStore::Transaction>::iterator i = trans->begin();
+ i != trans->end();
+ ++i) {
+ i->second.touch(
+ get_coll_ct(i->first, op.oid),
+ ghobject_t(op.oid, ghobject_t::NO_GEN, i->first));
+ }
+ }
+ void operator()(const ECTransaction::AppendOp &op) {
+ uint64_t offset = op.off;
+ bufferlist bl(op.bl);
+ assert(bl.length());
+ assert(offset % sinfo.get_stripe_width() == 0);
+ map<int, bufferlist> buffers;
+
+ assert(hash_infos.count(op.oid));
+ ECUtil::HashInfoRef hinfo = hash_infos[op.oid];
+
+ // align
+ if (bl.length() % sinfo.get_stripe_width())
+ bl.append_zero(
+ sinfo.get_stripe_width() -
+ ((offset + bl.length()) % sinfo.get_stripe_width()));
+ assert(bl.length() - op.bl.length() < sinfo.get_stripe_width());
+ int r = ECUtil::encode(
+ sinfo, ecimpl, bl, want, &buffers);
+
+ hinfo->append(
+ sinfo.aligned_logical_offset_to_chunk_offset(op.off),
+ buffers);
+ bufferlist hbuf;
+ ::encode(
+ *hinfo,
+ hbuf);
+
+ assert(r == 0);
+ for (map<shard_id_t, ObjectStore::Transaction>::iterator i = trans->begin();
+ i != trans->end();
+ ++i) {
+ assert(buffers.count(i->first));
+ bufferlist &enc_bl = buffers[i->first];
+ i->second.write(
+ get_coll_ct(i->first, op.oid),
+ ghobject_t(op.oid, ghobject_t::NO_GEN, i->first),
+ sinfo.logical_to_prev_chunk_offset(
+ offset),
+ enc_bl.length(),
+ enc_bl);
+ i->second.setattr(
+ get_coll_ct(i->first, op.oid),
+ ghobject_t(op.oid, ghobject_t::NO_GEN, i->first),
+ ECUtil::get_hinfo_key(),
+ hbuf);
+ }
+ }
+ void operator()(const ECTransaction::CloneOp &op) {
+ assert(hash_infos.count(op.source));
+ assert(hash_infos.count(op.target));
+ *(hash_infos[op.target]) = *(hash_infos[op.source]);
+ for (map<shard_id_t, ObjectStore::Transaction>::iterator i = trans->begin();
+ i != trans->end();
+ ++i) {
+ i->second.clone(
+ get_coll_ct(i->first, op.source),
+ ghobject_t(op.source, ghobject_t::NO_GEN, i->first),
+ ghobject_t(op.target, ghobject_t::NO_GEN, i->first));
+ }
+ }
+ void operator()(const ECTransaction::RenameOp &op) {
+ assert(hash_infos.count(op.source));
+ assert(hash_infos.count(op.destination));
+ *(hash_infos[op.destination]) = *(hash_infos[op.source]);
+ hash_infos[op.source]->clear();
+ for (map<shard_id_t, ObjectStore::Transaction>::iterator i = trans->begin();
+ i != trans->end();
+ ++i) {
+ i->second.collection_move_rename(
+ get_coll_rm(i->first, op.source),
+ ghobject_t(op.source, ghobject_t::NO_GEN, i->first),
+ get_coll_ct(i->first, op.destination),
+ ghobject_t(op.destination, ghobject_t::NO_GEN, i->first));
+ }
+ }
+ void operator()(const ECTransaction::StashOp &op) {
+ if (hash_infos.count(op.oid))
+ hash_infos[op.oid]->clear();
+ for (map<shard_id_t, ObjectStore::Transaction>::iterator i = trans->begin();
+ i != trans->end();
+ ++i) {
+ coll_t cid(get_coll_rm(i->first, op.oid));
+ i->second.collection_move_rename(
+ cid,
+ ghobject_t(op.oid, ghobject_t::NO_GEN, i->first),
+ cid,
+ ghobject_t(op.oid, op.version, i->first));
+ }
+ }
+ void operator()(const ECTransaction::RemoveOp &op) {
+ if (hash_infos.count(op.oid))
+ hash_infos[op.oid]->clear();
+ for (map<shard_id_t, ObjectStore::Transaction>::iterator i = trans->begin();
+ i != trans->end();
+ ++i) {
+ i->second.remove(
+ get_coll_rm(i->first, op.oid),
+ ghobject_t(op.oid, ghobject_t::NO_GEN, i->first));
+ }
+ }
+ void operator()(const ECTransaction::SetAttrsOp &op) {
+ map<string, bufferlist> attrs(op.attrs);
+ for (map<shard_id_t, ObjectStore::Transaction>::iterator i = trans->begin();
+ i != trans->end();
+ ++i) {
+ i->second.setattrs(
+ get_coll_ct(i->first, op.oid),
+ ghobject_t(op.oid, ghobject_t::NO_GEN, i->first),
+ attrs);
+ }
+ }
+ void operator()(const ECTransaction::RmAttrOp &op) {
+ for (map<shard_id_t, ObjectStore::Transaction>::iterator i = trans->begin();
+ i != trans->end();
+ ++i) {
+ i->second.rmattr(
+ get_coll_ct(i->first, op.oid),
+ ghobject_t(op.oid, ghobject_t::NO_GEN, i->first),
+ op.key);
+ }
+ }
+ void operator()(const ECTransaction::NoOp &op) {}
+};
+
+
+void ECTransaction::generate_transactions(
+ map<hobject_t, ECUtil::HashInfoRef> &hash_infos,
+ ErasureCodeInterfaceRef &ecimpl,
+ pg_t pgid,
+ const ECUtil::stripe_info_t &sinfo,
+ map<shard_id_t, ObjectStore::Transaction> *transactions,
+ set<hobject_t> *temp_added,
+ set<hobject_t> *temp_removed,
+ stringstream *out) const
+{
+ TransGenerator gen(
+ hash_infos,
+ ecimpl,
+ pgid,
+ sinfo,
+ transactions,
+ temp_added,
+ temp_removed,
+ out);
+ visit(gen);
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef ECTRANSACTION_H
+#define ECTRANSACTION_H
+
+#include "OSD.h"
+#include "PGBackend.h"
+#include "osd_types.h"
+#include "ECUtil.h"
+#include <boost/optional.hpp>
+#include "erasure-code/ErasureCodeInterface.h"
+
+class ECTransaction : public PGBackend::PGTransaction {
+public:
+ struct AppendOp {
+ hobject_t oid;
+ uint64_t off;
+ bufferlist bl;
+ AppendOp(const hobject_t &oid, uint64_t off, bufferlist &bl)
+ : oid(oid), off(off), bl(bl) {}
+ };
+ struct CloneOp {
+ hobject_t source;
+ hobject_t target;
+ CloneOp(const hobject_t &source, const hobject_t &target)
+ : source(source), target(target) {}
+ };
+ struct RenameOp {
+ hobject_t source;
+ hobject_t destination;
+ RenameOp(const hobject_t &source, const hobject_t &destination)
+ : source(source), destination(destination) {}
+ };
+ struct StashOp {
+ hobject_t oid;
+ version_t version;
+ StashOp(const hobject_t &oid, version_t version)
+ : oid(oid), version(version) {}
+ };
+ struct TouchOp {
+ hobject_t oid;
+ TouchOp(const hobject_t &oid) : oid(oid) {}
+ };
+ struct RemoveOp {
+ hobject_t oid;
+ RemoveOp(const hobject_t &oid) : oid(oid) {}
+ };
+ struct SetAttrsOp {
+ hobject_t oid;
+ map<string, bufferlist> attrs;
+ SetAttrsOp(const hobject_t &oid, map<string, bufferlist> &_attrs)
+ : oid(oid) {
+ attrs.swap(_attrs);
+ }
+ SetAttrsOp(const hobject_t &oid, const string &key, bufferlist &val)
+ : oid(oid) {
+ attrs.insert(make_pair(key, val));
+ }
+ };
+ struct RmAttrOp {
+ hobject_t oid;
+ string key;
+ RmAttrOp(const hobject_t &oid, const string &key) : oid(oid), key(key) {}
+ };
+ struct NoOp {};
+ typedef boost::variant<
+ AppendOp,
+ CloneOp,
+ RenameOp,
+ StashOp,
+ TouchOp,
+ RemoveOp,
+ SetAttrsOp,
+ RmAttrOp,
+ NoOp> Op;
+ list<Op> ops;
+ uint64_t written;
+
+ ECTransaction() : written(0) {}
+ /// Write
+ void touch(
+ const hobject_t &hoid) {
+ bufferlist bl;
+ ops.push_back(TouchOp(hoid));
+ }
+ void append(
+ const hobject_t &hoid,
+ uint64_t off,
+ uint64_t len,
+ bufferlist &bl) {
+ if (len == 0) {
+ touch(hoid);
+ return;
+ }
+ written += len;
+ assert(len == bl.length());
+ ops.push_back(AppendOp(hoid, off, bl));
+ }
+ void stash(
+ const hobject_t &hoid,
+ version_t former_version) {
+ ops.push_back(StashOp(hoid, former_version));
+ }
+ void remove(
+ const hobject_t &hoid) {
+ ops.push_back(RemoveOp(hoid));
+ }
+ void setattrs(
+ const hobject_t &hoid,
+ map<string, bufferlist> &attrs) {
+ ops.push_back(SetAttrsOp(hoid, attrs));
+ }
+ void setattr(
+ const hobject_t &hoid,
+ const string &attrname,
+ bufferlist &bl) {
+ ops.push_back(SetAttrsOp(hoid, attrname, bl));
+ }
+ void rmattr(
+ const hobject_t &hoid,
+ const string &attrname) {
+ ops.push_back(RmAttrOp(hoid, attrname));
+ }
+ void clone(
+ const hobject_t &from,
+ const hobject_t &to) {
+ ops.push_back(CloneOp(from, to));
+ }
+ void rename(
+ const hobject_t &from,
+ const hobject_t &to) {
+ ops.push_back(RenameOp(from, to));
+ }
+
+ void append(PGTransaction *_to_append) {
+ ECTransaction *to_append = static_cast<ECTransaction*>(_to_append);
+ written += to_append->written;
+ to_append->written = 0;
+ ops.splice(ops.end(), to_append->ops,
+ to_append->ops.begin(), to_append->ops.end());
+ }
+ void nop() {
+ ops.push_back(NoOp());
+ }
+ bool empty() const {
+ return ops.empty();
+ }
+ uint64_t get_bytes_written() const {
+ return written;
+ }
+ template <typename T>
+ void visit(T &vis) const {
+ for (list<Op>::const_iterator i = ops.begin(); i != ops.end(); ++i) {
+ boost::apply_visitor(vis, *i);
+ }
+ }
+ template <typename T>
+ void reverse_visit(T &vis) const {
+ for (list<Op>::const_reverse_iterator i = ops.rbegin();
+ i != ops.rend();
+ ++i) {
+ boost::apply_visitor(vis, *i);
+ }
+ }
+ void get_append_objects(
+ set<hobject_t> *out) const;
+ void generate_transactions(
+ map<hobject_t, ECUtil::HashInfoRef> &hash_infos,
+ ErasureCodeInterfaceRef &ecimpl,
+ pg_t pgid,
+ const ECUtil::stripe_info_t &sinfo,
+ map<shard_id_t, ObjectStore::Transaction> *transactions,
+ set<hobject_t> *temp_added,
+ set<hobject_t> *temp_removed,
+ stringstream *out = 0) const;
+};
+
+#endif