snaps.clear();
}
bool empty() { return seq == 0; }
+
+ void encode(bufferlist& bl) const {
+ ::encode(seq, bl);
+ ::encode(snaps, bl);
+ }
+ void decode(bufferlist::iterator& bl) {
+ ::decode(seq, bl);
+ ::decode(snaps, bl);
+ }
};
+WRITE_CLASS_ENCODER(SnapContext)
inline ostream& operator<<(ostream& out, const SnapContext& snapc) {
return out << snapc.seq << "=" << snapc.snaps;
-
-// =======================
-// revisions
-
-
-/*
-int OSD::list_missing_revs(object_t oid, set<object_t>& revs, PG *pg)
-{
- int c = 0;
- oid.rev = 0;
-
- map<object_t,eversion_t>::iterator p = pg->missing.missing.lower_bound(oid);
- if (p == pg->missing.missing.end())
- return 0; // clearly not
-
- while (p->first.ino == oid.ino &&
- p->first.bno == oid.bno) {
- revs.insert(p->first);
- c++;
- }
- return c;
-}*/
-
-/*
-bool PG::pick_missing_object_rev(object_t& oid)
-{
- map<object_t,eversion_t>::iterator p = missing.missing.upper_bound(oid);
- if (p == missing.missing.end())
- return false; // clearly no candidate
-
- if (p->first.ino == oid.ino && p->first.bno == oid.bno) {
- oid = p->first; // yes! it's an upper bound revision for me.
- return true;
- }
- return false;
-}
-
-*/
-
-
-bool PG::pick_object_rev(pobject_t& poid, vector<snapid_t> &snapvec)
-{
- pobject_t t = poid;
- vector<snapid_t> csnap(1);
- int r;
-
- dout(10) << "pick_object_rev " << poid << " snapvec " << snapvec << dendl;
- snapid_t want = poid.oid.snap;
-
- for (int i=-1; i<(int)snapvec.size(); i++) {
- snapid_t last;
- if (i < 0)
- last = t.oid.snap = CEPH_NOSNAP;
- else
- last = t.oid.snap = snapvec[i];
- if (last < want) {
- dout(20) << "pick_object_rev stopping at clone " << t << ": last " << last << " < want " << want << dendl;
- return false;
- }
-
- if (last == CEPH_NOSNAP) {
- csnap.resize(1);
- csnap[0] = NOSNAP;
- } else {
- r = osd->store->getattr(info.pgid, t, "snaps", &csnap[0], 0);
- if (r == -ENOENT) {
- dout(20) << "pick_object_rev " << t << " dne" << dendl;
- continue;
- }
-
- csnap.resize(r / sizeof(csnap[0]));
- int r2 = osd->store->getattr(info.pgid, t, "snaps", &csnap[0], r);
- assert(r == r2);
- }
-
- snapid_t first = csnap[csnap.size()-1];
- dout(20) << "pick_object_rev ? " << t << " [" << first << "," << last << "] csnap " << csnap << dendl;
- assert(csnap[0] == last);
-
- if (first <= want) {
- dout(20) << "pick_object_rev " << t << " first " << first << " <= " << poid.oid.snap << " -- HIT" << dendl;
- poid = t;
- return true;
- }
-
- dout(20) << "pick_object_rev skipping clone " << t << ": first " << first << " > want " << want << dendl;
- while (i+1 < (int)snapvec.size() &&
- snapvec[i+1] > first) {
- i++;
- dout(20) << "pick_object_rev and snap " << snapvec[i] << dendl;
- }
- }
- return false;
-}
}
- bool pick_object_rev(pobject_t& oid, vector<snapid_t> &spanvec);
-
-
// abstract bits
virtual bool preprocess_op(MOSDOp *op, utime_t now) { return false; }
// ========================================================================
// READS
+
+/*
+ * return false if object doesn't (logically) exist
+ */
+bool ReplicatedPG::pick_read_snap(pobject_t& poid)
+{
+ pobject_t head = poid;
+ head.oid.snap = CEPH_NOSNAP;
+
+ bufferptr bp;
+ int r = osd->store->getattr(info.pgid, head, "snapc", bp);
+ if (r < 0)
+ return false; // if head doesn't exist, no snapped version will either.
+ bufferlist bl;
+ bl.push_back(bp);
+ bufferlist::iterator p = bl.begin();
+ SnapContext snapc;
+ ::decode(snapc, p);
+
+ dout(10) << "pick_read_snap " << poid << " snapc " << snapc << dendl;
+ snapid_t want = poid.oid.snap;
+ vector<snapid_t> csnap;
+ pobject_t t = poid;
+
+ for (int i=-1; i<(int)snapc.snaps.size(); i++) {
+ snapid_t last;
+ if (i < 0)
+ last = t.oid.snap = CEPH_NOSNAP;
+ else
+ last = t.oid.snap = snapc.snaps[i];
+ if (last < want) {
+ dout(20) << "pick_read_snap stopping (DNE) at clone " << t
+ << ": last " << last << " < want " << want << dendl;
+ return false;
+ }
+
+ if (last == CEPH_NOSNAP) {
+ csnap.resize(1);
+ csnap[0] = CEPH_NOSNAP;
+ } else {
+ bufferptr bp;
+ r = osd->store->getattr(info.pgid, t, "snaps", bp);
+ if (r < 0) {
+ dout(20) << "pick_read_snap " << t << " dne" << dendl;
+ continue;
+ }
+ bufferlist bl;
+ bl.push_back(bp);
+ bufferlist::iterator p = bl.begin();
+ ::decode(csnap, p);
+ dout(20) << "pick_read_snap " << t << " snaps " << csnap << dendl;
+ }
+
+ snapid_t first = csnap[csnap.size()-1];
+ dout(20) << "pick_read_snap ? " << t << " [" << first << "," << last
+ << "] csnap " << csnap << dendl;
+ assert(csnap[0] == last);
+
+ if (first <= want) {
+ dout(20) << "pick_read_snap " << t << " first " << first << " <= " << poid.oid.snap
+ << " -- HIT" << dendl;
+ poid = t;
+ return true;
+ }
+
+ dout(20) << "pick_read_snap skipping clone " << t
+ << ": first " << first << " > want " << want << dendl;
+ while (i+1 < (int)snapc.snaps.size() &&
+ snapc.snaps[i+1] > first) {
+ i++;
+ dout(20) << "pick_read_snap and snap " << snapc.snaps[i] << dendl;
+ }
+ }
+ return false;
+}
+
+
void ReplicatedPG::op_read(MOSDOp *op)
{
object_t oid = op->get_oid();
long r = 0;
// do it.
- if (poid.oid.snap && !pick_object_rev(poid, op->get_snaps())) {
+ if (poid.oid.snap && !pick_read_snap(poid)) {
// we have no revision for this request.
r = -EEXIST;
goto done;
bool do_recovery();
void do_peer_recovery();
-
+ bool pick_read_snap(pobject_t& poid);
void op_read(MOSDOp *op);
void op_modify(MOSDOp *op);