--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_IO_BLOCK_GUARD_H
+#define CEPH_LIBRBD_IO_BLOCK_GUARD_H
+
+#include "include/int_types.h"
+#include "common/dout.h"
+#include "common/Mutex.h"
+#include <boost/intrusive/list.hpp>
+#include <boost/intrusive/set.hpp>
+#include <deque>
+#include <list>
+#include "include/assert.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::BlockGuard: " << this << " " \
+ << __func__ << ": "
+
+namespace librbd {
+
+struct BlockExtent {
+ uint64_t block_start = 0;
+ uint64_t block_end = 0;
+
+ BlockExtent() {
+ }
+ BlockExtent(uint64_t block_start, uint64_t block_end)
+ : block_start(block_start), block_end(block_end) {
+ }
+};
+
+struct BlockGuardCell {
+};
+
+/**
+ * Helper class to restrict and order concurrent IO to the same block. The
+ * definition of a block is dependent upon the user of this class. It might
+ * represent a backing object, 512 byte sectors, etc.
+ */
+template <typename BlockOperation>
+class BlockGuard {
+private:
+ struct DetainedBlockExtent;
+
+public:
+ typedef std::list<BlockOperation> BlockOperations;
+
+ BlockGuard(CephContext *cct)
+ : m_cct(cct), m_lock("librbd::BlockGuard::m_lock") {
+ }
+
+ BlockGuard(const BlockGuard&) = delete;
+ BlockGuard &operator=(const BlockGuard&) = delete;
+
+ /**
+ * Detain future IO for a range of blocks. the guard will assume
+ * ownership of the provided operation if the operation is blocked.
+ * @return 0 upon success and IO can be issued
+ * >0 if the IO is blocked,
+ * <0 upon error
+ */
+ int detain(const BlockExtent &block_extent, BlockOperation *block_operation,
+ BlockGuardCell **cell) {
+ Mutex::Locker locker(m_lock);
+ ldout(m_cct, 20) << "block_start=" << block_extent.block_start << ", "
+ << "block_end=" << block_extent.block_end << ", "
+ << "free_slots=" << m_free_detained_block_extents.size()
+ << dendl;
+
+ DetainedBlockExtent *detained_block_extent;
+ auto it = m_detained_block_extents.find(block_extent);
+ if (it != m_detained_block_extents.end()) {
+ // request against an already detained block
+ detained_block_extent = &(*it);
+ if (block_operation != nullptr) {
+ detained_block_extent->block_operations.emplace_back(
+ std::move(*block_operation));
+ }
+
+ // alert the caller that the IO was detained
+ *cell = nullptr;
+ return detained_block_extent->block_operations.size();
+ } else {
+ if (!m_free_detained_block_extents.empty()) {
+ detained_block_extent = &m_free_detained_block_extents.front();
+ detained_block_extent->block_operations.clear();
+ m_free_detained_block_extents.pop_front();
+ } else {
+ ldout(m_cct, 20) << "no free detained block cells" << dendl;
+ m_detained_block_extent_pool.emplace_back();
+ detained_block_extent = &m_detained_block_extent_pool.back();
+ }
+
+ detained_block_extent->block_extent = block_extent;
+ m_detained_block_extents.insert(*detained_block_extent);
+ *cell = reinterpret_cast<BlockGuardCell*>(detained_block_extent);
+ return 0;
+ }
+ }
+
+ /**
+ * Release any detained IO operations from the provided cell.
+ */
+ void release(BlockGuardCell *cell, BlockOperations *block_operations) {
+ Mutex::Locker locker(m_lock);
+
+ assert(cell != nullptr);
+ auto &detained_block_extent = reinterpret_cast<DetainedBlockExtent &>(
+ *cell);
+ ldout(m_cct, 20) << "block_start="
+ << detained_block_extent.block_extent.block_start << ", "
+ << "block_end="
+ << detained_block_extent.block_extent.block_end << ", "
+ << "pending_ops="
+ << (detained_block_extent.block_operations.empty() ?
+ 0 : detained_block_extent.block_operations.size() - 1)
+ << dendl;
+
+ *block_operations = std::move(detained_block_extent.block_operations);
+ m_detained_block_extents.erase(detained_block_extent.block_extent);
+ m_free_detained_block_extents.push_back(detained_block_extent);
+ }
+
+private:
+ struct DetainedBlockExtent : public boost::intrusive::list_base_hook<>,
+ public boost::intrusive::set_base_hook<> {
+ BlockExtent block_extent;
+ BlockOperations block_operations;
+ };
+
+ struct DetainedBlockExtentKey {
+ typedef BlockExtent type;
+ const BlockExtent &operator()(const DetainedBlockExtent &value) {
+ return value.block_extent;
+ }
+ };
+
+ struct DetainedBlockExtentCompare {
+ bool operator()(const BlockExtent &lhs,
+ const BlockExtent &rhs) const {
+ // check for range overlap (lhs < rhs)
+ if (lhs.block_end <= rhs.block_start) {
+ return true;
+ }
+ return false;
+ }
+ };
+
+ typedef std::deque<DetainedBlockExtent> DetainedBlockExtentsPool;
+ typedef boost::intrusive::list<DetainedBlockExtent> DetainedBlockExtents;
+ typedef boost::intrusive::set<
+ DetainedBlockExtent,
+ boost::intrusive::compare<DetainedBlockExtentCompare>,
+ boost::intrusive::key_of_value<DetainedBlockExtentKey> >
+ BlockExtentToDetainedBlockExtents;
+
+ CephContext *m_cct;
+
+ Mutex m_lock;
+ DetainedBlockExtentsPool m_detained_block_extent_pool;
+ DetainedBlockExtents m_free_detained_block_extents;
+ BlockExtentToDetainedBlockExtents m_detained_block_extents;
+
+};
+
+} // namespace librbd
+
+#undef dout_subsys
+#undef dout_prefix
+#define dout_prefix *_dout
+
+#endif // CEPH_LIBRBD_IO_BLOCK_GUARD_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/librbd/test_fixture.h"
+#include "test/librbd/test_support.h"
+#include "librbd/BlockGuard.h"
+
+namespace librbd {
+
+class TestIOBlockGuard : public TestFixture {
+public:
+ static uint32_t s_index;
+
+ struct Operation {
+ uint32_t index;
+ Operation() : index(++s_index) {
+ }
+ Operation(Operation &&rhs) : index(rhs.index) {
+ }
+ Operation(const Operation &) = delete;
+
+ Operation& operator=(Operation &&rhs) {
+ index = rhs.index;
+ return *this;
+ }
+
+ bool operator==(const Operation &rhs) const {
+ return index == rhs.index;
+ }
+ };
+
+ typedef std::list<Operation> Operations;
+
+ typedef BlockGuard<Operation> OpBlockGuard;
+
+ virtual void SetUp() override {
+ TestFixture::SetUp();
+ m_cct = reinterpret_cast<CephContext*>(m_ioctx.cct());
+ }
+
+ CephContext *m_cct;
+};
+
+TEST_F(TestIOBlockGuard, NonDetainedOps) {
+ OpBlockGuard op_block_guard(m_cct);
+
+ Operation op1;
+ BlockGuardCell *cell1;
+ ASSERT_EQ(0, op_block_guard.detain({1, 3}, &op1, &cell1));
+
+ Operation op2;
+ BlockGuardCell *cell2;
+ ASSERT_EQ(0, op_block_guard.detain({0, 1}, &op2, &cell2));
+
+ Operation op3;
+ BlockGuardCell *cell3;
+ ASSERT_EQ(0, op_block_guard.detain({3, 6}, &op3, &cell3));
+
+ Operations released_ops;
+ op_block_guard.release(cell1, &released_ops);
+ ASSERT_TRUE(released_ops.empty());
+
+ op_block_guard.release(cell2, &released_ops);
+ ASSERT_TRUE(released_ops.empty());
+
+ op_block_guard.release(cell3, &released_ops);
+ ASSERT_TRUE(released_ops.empty());
+}
+
+TEST_F(TestIOBlockGuard, DetainedOps) {
+ OpBlockGuard op_block_guard(m_cct);
+
+ Operation op1;
+ BlockGuardCell *cell1;
+ ASSERT_EQ(0, op_block_guard.detain({1, 3}, &op1, &cell1));
+
+ Operation op2;
+ BlockGuardCell *cell2;
+ ASSERT_EQ(1, op_block_guard.detain({2, 6}, &op2, &cell2));
+ ASSERT_EQ(nullptr, cell2);
+
+ Operation op3;
+ BlockGuardCell *cell3;
+ ASSERT_EQ(2, op_block_guard.detain({0, 2}, &op3, &cell3));
+ ASSERT_EQ(nullptr, cell3);
+
+ Operations expected_ops;
+ expected_ops.push_back(std::move(op2));
+ expected_ops.push_back(std::move(op3));
+ Operations released_ops;
+ op_block_guard.release(cell1, &released_ops);
+ ASSERT_EQ(expected_ops, released_ops);
+}
+
+uint32_t TestIOBlockGuard::s_index = 0;
+
+} // namespace librbd
+