From 923f72d92733b216ffefc4bb8099d8e8e77f7c6e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 18 Aug 2008 13:57:04 -0700 Subject: [PATCH] osd: track byte range diffs between clones --- src/TODO | 3 +-- src/include/interval_set.h | 12 ++++++++++++ src/osd/ReplicatedPG.cc | 12 +++++++++++- src/osd/osd_types.h | 14 ++++++++++++++ 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/TODO b/src/TODO index b1e3da75f091d..4890b81ec2a0e 100644 --- a/src/TODO +++ b/src/TODO @@ -34,8 +34,7 @@ snaps on mds snaps on osd - garbage collection -- efficient recovery of clones - - include inter-clone diff intervals in SnapSet. +- efficient recovery of clones using the clone diff info userspace client - handle session STALE diff --git a/src/include/interval_set.h b/src/include/interval_set.h index 2b536020f391f..436074cb55f8b 100644 --- a/src/include/interval_set.h +++ b/src/include/interval_set.h @@ -199,6 +199,13 @@ class interval_set { } } } + + void swap(interval_set& other) { + m.swap(other.m); + int t = _size; + _size = other._size; + other._size = t; + } void erase(T val) { erase(val, 1); @@ -264,6 +271,11 @@ class interval_set { pa++; } } + void intersection_of(const interval_set& b) { + interval_set a; + a.m.swap(m); + intersection_of(a, b); + } void union_of(const interval_set &a, const interval_set &b) { assert(&a != this); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d4f4e219135bd..a4f0ccbc93fc9 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -745,6 +745,7 @@ void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t } snapset.clones.push_back(coid.oid.snap); + snapset.clone_diffs[coid.oid.snap].swap(snapset.head_diffs); at_version.version++; } @@ -813,7 +814,9 @@ void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t t.write(info.pgid.to_coll(), poid, offset, length, nbl); if (inc_lock) t.setattr(info.pgid.to_coll(), poid, "inc_lock", &inc_lock, sizeof(inc_lock)); snapset.head_exists = true; - snapset.head_diffs.insert(offset, length); + interval_set<__u64> ch; + ch.insert(offset, length); + snapset.head_diffs.union_of(ch); } break; @@ -821,6 +824,9 @@ void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t { // zero t.zero(info.pgid.to_coll(), poid, offset, length); if (inc_lock) t.setattr(info.pgid.to_coll(), poid, "inc_lock", &inc_lock, sizeof(inc_lock)); + interval_set<__u64> ch; + ch.insert(offset, length); + snapset.head_diffs.union_of(ch); } break; @@ -828,12 +834,16 @@ void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t { // truncate t.truncate(info.pgid.to_coll(), poid, length); if (inc_lock) t.setattr(info.pgid.to_coll(), poid, "inc_lock", &inc_lock, sizeof(inc_lock)); + interval_set<__u64> keep; + keep.insert(0, length); + snapset.head_diffs.intersection_of(keep); } break; case CEPH_OSD_OP_DELETE: { // delete t.remove(info.pgid.to_coll(), poid); + snapset.head_diffs.clear(); } break; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 1797b74135b49..c915e9721caf6 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -18,6 +18,7 @@ #include "msg/msg_types.h" #include "include/types.h" #include "include/pobject.h" +#include "include/interval_set.h" /* osdreqid_t - caller name + incarnation# + tid to unique identify this request * use for metadata and osd ops. @@ -424,6 +425,13 @@ inline ostream& operator<<(ostream& out, OSDSuperblock& sb) // ------- +inline void encode(const interval_set<__u64>& s, bufferlist& bl) { + ::encode(s.m, bl); +} +inline void decode(interval_set<__u64>& s, bufferlist::iterator& bl) { + ::decode(s.m, bl); +} + /* * attached to object head. describes most recent snap context, and * set of existing clones. @@ -433,6 +441,8 @@ struct SnapSet { bool head_exists; vector snaps; vector clones; + interval_set<__u64> head_diffs; // subset of data that is "new" + map > clone_diffs; // diff to previous SnapSet() : head_exists(false) {} @@ -441,12 +451,16 @@ struct SnapSet { ::encode(head_exists, bl); ::encode(snaps, bl); ::encode(clones, bl); + ::encode(head_diffs, bl); + ::encode(clone_diffs, bl); } void decode(bufferlist::iterator& bl) { ::decode(seq, bl); ::decode(head_exists, bl); ::decode(snaps, bl); ::decode(clones, bl); + ::decode(head_diffs, bl); + ::decode(clone_diffs, bl); } }; WRITE_CLASS_ENCODER(SnapSet) -- 2.39.5