From: Samuel Just Date: Wed, 10 Aug 2016 22:43:10 +0000 (-0700) Subject: common/: add interval_map X-Git-Tag: v11.1.0~245^2~31 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=6ae520da55bd2d60513557f0f64e22177827c1f1;p=ceph-ci.git common/: add interval_map Signed-off-by: Samuel Just --- diff --git a/src/common/interval_map.h b/src/common/interval_map.h new file mode 100644 index 00000000000..408a60eb267 --- /dev/null +++ b/src/common/interval_map.h @@ -0,0 +1,286 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef INTERVAL_MAP_H +#define INTERVAL_MAP_H + +#include +#include +#include +#include +#include +#include "include/interval_set.h" + +template +/** + * interval_map + * + * Maps intervals to values. Erasing or inserting over an existing + * range will use S::operator() to split any overlapping existing + * values. + * + * Surprisingly, boost/icl/interval_map doesn't seem to be appropriate + * for this use case. The aggregation concept seems to assume + * commutativity, which doesn't work if we want more recent insertions + * to overwrite previous ones. + */ +class interval_map { + S s; + using map = std::map >; + using mapiter = typename std::map >::iterator; + using cmapiter = typename std::map >::const_iterator; + map m; + std::pair get_range(K off, K len) { + // fst is first iterator with end after off (may be end) + auto fst = m.upper_bound(off); + if (fst != m.begin()) + --fst; + if (fst != m.end() && off >= (fst->first + fst->second.first)) + ++fst; + + // lst is first iterator with start after off + len (may be end) + auto lst = m.lower_bound(off + len); + return std::make_pair(fst, lst); + } + std::pair get_range(K off, K len) const { + // fst is first iterator with end after off (may be end) + auto fst = m.upper_bound(off); + if (fst != m.begin()) + --fst; + if (fst != m.end() && off >= (fst->first + fst->second.first)) + ++fst; + + // lst is first iterator with start after off + len (may be end) + auto lst = m.lower_bound(off + len); + return std::make_pair(fst, lst); + } + void try_merge(mapiter niter) { + if (niter != m.begin()) { + auto prev = niter; + prev--; + if (prev->first + prev->second.first == niter->first && + s.can_merge(prev->second.second, niter->second.second)) { + V n = s.merge( + std::move(prev->second.second), + std::move(niter->second.second)); + K off = prev->first; + K len = niter->first + niter->second.first - off; + niter++; + m.erase(prev, niter); + auto p = m.insert( + std::make_pair( + off, + std::make_pair(len, std::move(n)))); + assert(p.second); + niter = p.first; + } + } + auto next = niter; + next++; + if (next != m.end() && + niter->first + niter->second.first == next->first && + s.can_merge(niter->second.second, next->second.second)) { + V n = s.merge( + std::move(niter->second.second), + std::move(next->second.second)); + K off = niter->first; + K len = next->first + next->second.first - off; + next++; + m.erase(niter, next); + auto p = m.insert( + std::make_pair( + off, + std::make_pair(len, std::move(n)))); + assert(p.second); + } + } +public: + interval_map intersect(K off, K len) const { + interval_map ret; + auto limits = get_range(off, len); + for (auto i = limits.first; i != limits.second; ++i) { + K o = i->first; + K l = i->second.first; + V v = i->second.second; + if (o < off) { + V p = v; + l -= (off - o); + v = s.split(off - o, l, p); + o = off; + } + if ((o + l) > (off + len)) { + V p = v; + l -= (o + l) - (off + len); + v = s.split(0, l, p); + } + ret.insert(o, l, v); + } + return ret; + } + void clear() { + m.clear(); + } + void erase(K off, K len) { + if (len == 0) + return; + auto range = get_range(off, len); + std::vector< + std::pair< + K, + std::pair + >> to_insert; + for (auto i = range.first; i != range.second; ++i) { + if (i->first < off) { + to_insert.emplace_back( + std::make_pair( + i->first, + std::make_pair( + off - i->first, + s.split(0, off - i->first, i->second.second)))); + } + if ((off + len) < (i->first + i->second.first)) { + K nlen = (i->first + i->second.first) - (off + len); + to_insert.emplace_back( + std::make_pair( + off + len, + std::make_pair( + nlen, + s.split(i->second.first - nlen, nlen, i->second.second)))); + } + } + m.erase(range.first, range.second); + m.insert(to_insert.begin(), to_insert.end()); + } + void insert(K off, K len, V &&v) { + assert(len > 0); + assert(len == s.length(v)); + erase(off, len); + auto p = m.insert(make_pair(off, std::make_pair(len, std::forward(v)))); + assert(p.second); + try_merge(p.first); + } + void insert(interval_map &&other) { + for (auto i = other.m.begin(); + i != other.m.end(); + other.m.erase(i++)) { + insert(i->first, i->second.first, std::move(i->second.second)); + } + } + void insert(K off, K len, const V &v) { + assert(len > 0); + assert(len == s.length(v)); + erase(off, len); + auto p = m.insert(make_pair(off, std::make_pair(len, v))); + assert(p.second); + try_merge(p.first); + } + void insert(const interval_map &other) { + for (auto &&i: other) { + insert(i.get_off(), i.get_len(), i.get_val()); + } + } + bool empty() const { + return m.empty(); + } + interval_set get_interval_set() const { + interval_set ret; + for (auto &&i: *this) { + ret.insert(i.get_off(), i.get_len()); + } + return ret; + } + class const_iterator { + cmapiter it; + const_iterator(cmapiter &&it) : it(std::move(it)) {} + const_iterator(const cmapiter &it) : it(it) {} + + friend class interval_map; + public: + const_iterator(const const_iterator &) = default; + const_iterator &operator=(const const_iterator &) = default; + + const_iterator &operator++() { + ++it; + return *this; + } + const_iterator operator++(int) { + return const_iterator(it++); + } + const_iterator &operator--() { + --it; + return *this; + } + const_iterator operator--(int) { + return const_iterator(it--); + } + bool operator==(const const_iterator &rhs) const { + return it == rhs.it; + } + bool operator!=(const const_iterator &rhs) const { + return it != rhs.it; + } + K get_off() const { + return it->first; + } + K get_len() const { + return it->second.first; + } + const V &get_val() const { + return it->second.second; + } + const_iterator &operator*() { + return *this; + } + }; + const_iterator begin() const { + return const_iterator(m.begin()); + } + const_iterator end() const { + return const_iterator(m.end()); + } + std::pair get_containing_range( + K off, + K len) const { + auto rng = get_range(off, len); + return std::make_pair(const_iterator(rng.first), const_iterator(rng.second)); + } + unsigned ext_count() const { + return m.size(); + } + bool operator==(const interval_map &rhs) const { + return m == rhs.m; + } + + std::ostream &print(std::ostream &out) const { + bool first = true; + out << "{"; + for (auto &&i: *this) { + if (first) { + first = false; + } else { + out << ","; + } + out << i.get_off() << "~" << i.get_len() << "(" + << s.length(i.get_val()) << ")"; + } + return out << "}"; + } +}; + +template +std::ostream &operator<<(std::ostream &out, const interval_map &m) { + return m.print(out); +} + +#endif diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt index 3f71c209be1..4ee6e62508c 100644 --- a/src/test/common/CMakeLists.txt +++ b/src/test/common/CMakeLists.txt @@ -164,6 +164,13 @@ target_link_libraries(unittest_async_compressor global ${UNITTEST_LIBS}) add_dependencies(unittest_async_compressor ceph_snappy) set_target_properties(unittest_async_compressor PROPERTIES COMPILE_FLAGS ${UNITTEST_CXX_FLAGS}) +# unittest_interval_map +add_executable(unittest_interval_map + test_interval_map.cc +) +add_ceph_unittest(unittest_interval_map ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_interval_map) +target_link_libraries(unittest_interval_map global) + # unittest_interval_set add_executable(unittest_interval_set test_interval_set.cc diff --git a/src/test/common/test_interval_map.cc b/src/test/common/test_interval_map.cc new file mode 100644 index 00000000000..4a1c258bb7d --- /dev/null +++ b/src/test/common/test_interval_map.cc @@ -0,0 +1,337 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include +#include +#include "include/buffer.h" +#include "common/interval_map.h" + +using namespace std; + +template +class IntervalMapTest : public ::testing::Test { +public: + using TestType = T; +}; + +template +struct bufferlist_test_type { + using key = _key; + using value = bufferlist; + + struct make_splitter { + template + struct apply { + bufferlist split( + key offset, + key len, + bufferlist &bu) const { + bufferlist bl; + bl.substr_of(bu, offset, len); + return bl; + } + bool can_merge(const bufferlist &left, const bufferlist &right) const { + return merge_t::value; + } + bufferlist merge(bufferlist &&left, bufferlist &&right) const { + bufferlist bl; + left.claim_append(right); + return left; + } + uint64_t length(const bufferlist &r) const { + return r.length(); + } + }; + }; + + struct generate_random { + bufferlist operator()(key len) { + bufferlist bl; + boost::random::mt19937 rng; + boost::random::uniform_int_distribution<> chr(0,255); + for (key i = 0; i < len; ++i) { + bl.append((char)chr(rng)); + } + return bl; + } + }; +}; + +using IntervalMapTypes = ::testing::Types< bufferlist_test_type >; + +TYPED_TEST_CASE(IntervalMapTest, IntervalMapTypes); + +#define USING(_can_merge) \ + using TT = typename TestFixture::TestType; \ + using key = typename TT::key; (void)key(0); \ + using val = typename TT::value; (void)val(0); \ + using splitter = typename boost::mpl::apply< \ + typename TT::make_splitter, \ + _can_merge>; \ + using imap = interval_map; (void)imap(); \ + typename TT::generate_random gen; \ + val v(gen(5)); \ + splitter split; (void)split.split(0, 0, v); + +#define USING_NO_MERGE USING(std::false_type) +#define USING_WITH_MERGE USING(std::true_type) + +TYPED_TEST(IntervalMapTest, empty) { + USING_NO_MERGE; + imap m; + ASSERT_TRUE(m.empty()); +} + +TYPED_TEST(IntervalMapTest, insert) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(5), gen(5)}; + m.insert(0, 5, vals[0]); + m.insert(10, 5, vals[2]); + m.insert(5, 5, vals[1]); + ASSERT_EQ(m.ext_count(), 3u); + + unsigned i = 0; + for (auto &&ext: m) { + ASSERT_EQ(ext.get_len(), 5u); + ASSERT_EQ(ext.get_off(), 5u * i); + ASSERT_EQ(ext.get_val(), vals[i]); + ++i; + } + ASSERT_EQ(i, m.ext_count()); +} + +TYPED_TEST(IntervalMapTest, insert_begin_overlap) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(5), gen(5)}; + m.insert(5, 5, vals[1]); + m.insert(10, 5, vals[2]); + m.insert(1, 5, vals[0]); + + auto iter = m.begin(); + ASSERT_EQ(iter.get_off(), 1u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[0]); + ++iter; + + ASSERT_EQ(iter.get_off(), 6u); + ASSERT_EQ(iter.get_len(), 4u); + ASSERT_EQ(iter.get_val(), split.split(1, 4, vals[1])); + ++iter; + + ASSERT_EQ(iter.get_off(), 10u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[2]); + ++iter; + + ASSERT_EQ(iter, m.end()); +} + +TYPED_TEST(IntervalMapTest, insert_end_overlap) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(5), gen(5)}; + m.insert(0, 5, vals[0]); + m.insert(5, 5, vals[1]); + m.insert(8, 5, vals[2]); + + auto iter = m.begin(); + ASSERT_EQ(iter.get_off(), 0u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[0]); + ++iter; + + ASSERT_EQ(iter.get_off(), 5u); + ASSERT_EQ(iter.get_len(), 3u); + ASSERT_EQ(iter.get_val(), split.split(0, 3, vals[1])); + ++iter; + + ASSERT_EQ(iter.get_off(), 8u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[2]); + ++iter; + + ASSERT_EQ(iter, m.end()); +} + +TYPED_TEST(IntervalMapTest, insert_middle_overlap) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(7), gen(5)}; + m.insert(0, 5, vals[0]); + m.insert(10, 5, vals[2]); + m.insert(4, 7, vals[1]); + + auto iter = m.begin(); + ASSERT_EQ(iter.get_off(), 0u); + ASSERT_EQ(iter.get_len(), 4u); + ASSERT_EQ(iter.get_val(), split.split(0, 4, vals[0])); + ++iter; + + ASSERT_EQ(iter.get_off(), 4u); + ASSERT_EQ(iter.get_len(), 7u); + ASSERT_EQ(iter.get_val(), vals[1]); + ++iter; + + ASSERT_EQ(iter.get_off(), 11u); + ASSERT_EQ(iter.get_len(), 4u); + ASSERT_EQ(iter.get_val(), split.split(1, 4, vals[2])); + ++iter; + + ASSERT_EQ(iter, m.end()); +} + +TYPED_TEST(IntervalMapTest, insert_single_exact_overlap) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(5), gen(5)}; + m.insert(0, 5, gen(5)); + m.insert(5, 5, vals[1]); + m.insert(10, 5, vals[2]); + m.insert(0, 5, vals[0]); + + auto iter = m.begin(); + ASSERT_EQ(iter.get_off(), 0u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[0]); + ++iter; + + ASSERT_EQ(iter.get_off(), 5u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[1]); + ++iter; + + ASSERT_EQ(iter.get_off(), 10u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[2]); + ++iter; + + ASSERT_EQ(iter, m.end()); +} + +TYPED_TEST(IntervalMapTest, insert_single_exact_overlap_end) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(5), gen(5)}; + m.insert(0, 5, vals[0]); + m.insert(5, 5, vals[1]); + m.insert(10, 5, gen(5)); + m.insert(10, 5, vals[2]); + + auto iter = m.begin(); + ASSERT_EQ(iter.get_off(), 0u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[0]); + ++iter; + + ASSERT_EQ(iter.get_off(), 5u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[1]); + ++iter; + + ASSERT_EQ(iter.get_off(), 10u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[2]); + ++iter; + + ASSERT_EQ(iter, m.end()); +} + +TYPED_TEST(IntervalMapTest, erase) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(5), gen(5)}; + m.insert(0, 5, vals[0]); + m.insert(5, 5, vals[1]); + m.insert(10, 5, vals[2]); + + m.erase(3, 5); + + auto iter = m.begin(); + ASSERT_EQ(iter.get_off(), 0u); + ASSERT_EQ(iter.get_len(), 3u); + ASSERT_EQ(iter.get_val(), split.split(0, 3, vals[0])); + ++iter; + + ASSERT_EQ(iter.get_off(), 8u); + ASSERT_EQ(iter.get_len(), 2u); + ASSERT_EQ(iter.get_val(), split.split(3, 2, vals[1])); + ++iter; + + ASSERT_EQ(iter.get_off(), 10u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[2]); + ++iter; + + ASSERT_EQ(iter, m.end()); +} + +TYPED_TEST(IntervalMapTest, erase_exact) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(5), gen(5)}; + m.insert(0, 5, vals[0]); + m.insert(5, 5, vals[1]); + m.insert(10, 5, vals[2]); + + m.erase(5, 5); + + auto iter = m.begin(); + ASSERT_EQ(iter.get_off(), 0u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[0]); + ++iter; + + ASSERT_EQ(iter.get_off(), 10u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[2]); + ++iter; + + ASSERT_EQ(iter, m.end()); +} + +TYPED_TEST(IntervalMapTest, get_containing_range) { + USING_NO_MERGE; + imap m; + vector vals{gen(5), gen(5), gen(5), gen(5)}; + m.insert(0, 5, vals[0]); + m.insert(10, 5, vals[1]); + m.insert(20, 5, vals[2]); + m.insert(30, 5, vals[3]); + + auto rng = m.get_containing_range(5, 21); + auto iter = rng.first; + + ASSERT_EQ(iter.get_off(), 10u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[1]); + ++iter; + + ASSERT_EQ(iter.get_off(), 20u); + ASSERT_EQ(iter.get_len(), 5u); + ASSERT_EQ(iter.get_val(), vals[2]); + ++iter; + + ASSERT_EQ(iter, rng.second); +} + +TYPED_TEST(IntervalMapTest, merge) { + USING_WITH_MERGE; + imap m; + m.insert(10, 4, gen(4)); + m.insert(11, 1, gen(1)); +}