From de094224975a1dc7a0342b191b43554af64649b9 Mon Sep 17 00:00:00 2001 From: Colin Patrick McCabe Date: Mon, 22 Nov 2010 15:56:06 -0800 Subject: [PATCH] osd: object_info_t: add lost field We can now permanently mark objects as lost by setting the lost bit in their object_info_t. Rev the object_info_t struct. get_object_context: re-arrange this so that we're always setting the lost bit. Also avoid some unecessary steps. Signed-off-by: Colin McCabe --- src/osd/PG.cc | 42 ++++++++++++++++++++++++++++++++++++++--- src/osd/PG.h | 2 +- src/osd/ReplicatedPG.cc | 37 ++++++++++++++++++++++++------------ src/osd/ReplicatedPG.h | 8 ++++---- src/osd/osd_types.h | 12 +++++++++--- 5 files changed, 78 insertions(+), 23 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index db2d1943c8f42..495c46403928d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1007,6 +1007,10 @@ void PG::mark_all_unfound_as_lost() { dout(3) << __func__ << dendl; + dout(30) << __func__ << ": log before:\n"; + log.print(*_dout); + *_dout << dendl; + // Find out what to delete map::iterator m = missing.missing.begin(); map::iterator mend = missing.missing.end(); @@ -1030,17 +1034,36 @@ void PG::mark_all_unfound_as_lost() } } - // Remove deleted elements from missing. + // Remove deleted elements from missing, + // and add LOST log entries for them. + eversion_t v = info.last_update; + v.epoch = osd->osdmap->get_epoch(); + utime_t mtime = g_clock.now(); std::set ::iterator d = del.begin(); while (d != dend) { sobject_t lost_soid(**d); - // TODO: some kind of bit that we set inside the object store - dout(10) << __func__ << ": marked " << lost_soid << " as lost!" << dendl; + map::iterator ms = missing.missing.find(lost_soid); + assert(ms != mend); + v.version++; + Log::Entry e(Log::Entry::LOST, lost_soid, v, ms->second.need, osd_reqid_t(), mtime); + log.add(e); + + dout(10) << __func__ << ": created event " << e << dendl; missing.missing.erase(lost_soid); del.erase(d++); } + + info.last_update = v; + + dout(30) << __func__ << ": log after:\n"; + log.print(*_dout); + *_dout << dendl; + + // Send out the PG log to all replicas + // So that they know what is lost + share_pg_log(); } void PG::clear_prior() @@ -3326,6 +3349,19 @@ void PG::share_pg_info() } } +void PG::share_pg_log() +{ + dout(10) << __func__ << dendl; + + // share PG::Log with replicas + for (unsigned i=1; ilog = log; + m->missing = missing; + osd->cluster_messenger->send_message(m, osd->osdmap->get_cluster_inst(peer)); + } +} unsigned int PG::Missing::num_missing() const { diff --git a/src/osd/PG.h b/src/osd/PG.h index 6808ec4851cb1..19ee8ce06a818 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -987,7 +987,7 @@ public: void queue_snap_trim(); void share_pg_info(); - + void share_pg_log(); // abstract bits virtual void do_op(MOSDOp *op) = 0; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 368801e883754..54f1c92934f76 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1692,11 +1692,10 @@ void ReplicatedPG::make_writeable(OpContext *ctx) snaps[i] = snapc.snaps[i]; // prepare clone - object_info_t static_snap_oi(coid, oi.oloc); + object_info_t static_snap_oi(coid, oi.oloc, oi.lost); object_info_t *snap_oi; if (is_primary()) { - ctx->clone_obc = new ObjectContext(coid, oi.oloc); - ctx->clone_obc->obs.exists = true; + ctx->clone_obc = new ObjectContext(static_snap_oi, true, NULL); ctx->clone_obc->get(); register_object_context(ctx->clone_obc); snap_oi = &ctx->clone_obc->obs.oi; @@ -2288,10 +2287,19 @@ ReplicatedPG::ObjectContext *ReplicatedPG::get_object_context(const sobject_t& s // check disk bufferlist bv; int r = osd->store->getattr(coll_t(info.pgid), soid, OI_ATTR, bv); - if (r < 0 && !can_create) - return 0; // -ENOENT! - - obc = new ObjectContext(soid, oloc); + if (r < 0) { + if (!can_create) + return NULL; // -ENOENT! + object_info_t oi(soid, oloc, false); + obc = new ObjectContext(oi, false, NULL); + } + else { + object_info_t oi(bv); + SnapSetContext *ssc = NULL; + if (can_create) + ssc = get_snapset_context(soid.oid, true); + obc = new ObjectContext(oi, true, ssc); + } if (can_create) obc->obs.ssc = get_snapset_context(soid.oid, true); @@ -2535,10 +2543,15 @@ void ReplicatedPG::sub_op_modify(MOSDSubOp *op) } else { // do op - ObjectState obs(op->poid, op->oloc); - obs.oi.version = op->old_version; - obs.oi.size = op->old_size; - obs.exists = op->old_exists; + assert(0); + + // TODO: this is severely broken because we don't know whether this object is really lost or + // not. We just always assume that it's not right now. + // Also, we're taking the address of a variable on the stack. + object_info_t oi(soid, op->oloc, false); + oi.version = op->old_version; + oi.size = op->old_size; + ObjectState obs(oi, op->old_exists, NULL); rm->ctx = new OpContext(op, op->reqid, op->ops, &obs, this); @@ -3730,7 +3743,7 @@ int ReplicatedPG::recover_primary(int max) ObjectContext *headobc = get_object_context(head, OLOC_BLANK, false); - object_info_t oi(soid, headobc->obs.oi.oloc); + object_info_t oi(soid, headobc->obs.oi.oloc, headobc->obs.oi.lost); oi.version = latest->version; oi.prior_version = latest->prior_version; ::decode(oi.snaps, latest->snaps); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 433ad9f4ef8e4..a3ad3b73878e2 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -72,8 +72,8 @@ public: bool exists; SnapSetContext *ssc; // may be null - ObjectState(const sobject_t& s, const object_locator_t& ol) : - oi(s, ol), exists(false), ssc(NULL) {} + ObjectState(const object_info_t &oi_, bool exists_, SnapSetContext *ssc_) + : oi(oi_), exists(exists_), ssc(ssc_) {} }; @@ -233,8 +233,8 @@ public: Cond cond; int unstable_writes, readers, writers_waiting, readers_waiting; - ObjectContext(const sobject_t& s, const object_locator_t& ol) : - ref(0), registered(false), obs(s, ol), + ObjectContext(const object_info_t &oi_, bool exists_, SnapSetContext *ssc_) + : ref(0), registered(false), obs(oi_, exists_, ssc_), lock("ReplicatedPG::ObjectContext::lock"), unstable_writes(0), readers(0), writers_waiting(0), readers_waiting(0) {} diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 20c7bb439d2cb..3005a83b79eb9 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1294,6 +1294,7 @@ struct object_info_t { uint64_t size; utime_t mtime; + bool lost; osd_reqid_t wrlock_by; // [head] vector snaps; // [clone] @@ -1310,7 +1311,7 @@ struct object_info_t { } void encode(bufferlist& bl) const { - const __u8 v = 2; + const __u8 v = 3; ::encode(v, bl); ::encode(soid, bl); ::encode(oloc, bl); @@ -1325,6 +1326,7 @@ struct object_info_t { ::encode(snaps, bl); ::encode(truncate_seq, bl); ::encode(truncate_size, bl); + ::encode(lost, bl); } void decode(bufferlist::iterator& bl) { __u8 v; @@ -1343,15 +1345,17 @@ struct object_info_t { ::decode(snaps, bl); ::decode(truncate_seq, bl); ::decode(truncate_size, bl); + if (v >= 3) + ::decode(lost, bl); } void decode(bufferlist& bl) { bufferlist::iterator p = bl.begin(); decode(p); } - object_info_t(const sobject_t& s, const object_locator_t& o) : + object_info_t(const sobject_t& s, const object_locator_t& o, bool lost_) : soid(s), size(0), - truncate_seq(0), truncate_size(0) {} + lost(lost_), truncate_seq(0), truncate_size(0) {} object_info_t(bufferlist& bl) { decode(bl); } @@ -1366,6 +1370,8 @@ inline ostream& operator<<(ostream& out, const object_info_t& oi) { out << " wrlock_by=" << oi.wrlock_by; else out << " " << oi.snaps; + if (oi.lost) + out << " LOST"; out << ")"; return out; } -- 2.39.5