From 13b9dc708426c35436ba58388fc54f04f066f03a Mon Sep 17 00:00:00 2001 From: Zhiqiang Wang Date: Fri, 1 Aug 2014 16:09:50 +0800 Subject: [PATCH] osd: add local_mtime to struct object_info_t This fixes a bug when the time of the OSDs and clients are not synchronized (especially when client is ahead of OSD), and the cache tier dirty ratio reaches the threshold, the agent skips the flush work because it thinks the object is too young. Signed-off-by: Zhiqiang Wang --- src/osd/ReplicatedPG.cc | 11 ++++++++++- src/osd/osd_types.cc | 10 +++++++++- src/osd/osd_types.h | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 1d3af34c33ffc..8e17882b0e43b 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5147,6 +5147,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc dout(20) << __func__ << " " << soid << " " << ctx << " op " << pg_log_entry_t::get_op_name(log_op_type) << dendl; + utime_t now = ceph_clock_now(cct); // snapset bufferlist bss; @@ -5205,6 +5206,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc ctx->snapset_obc->obs.oi.version = ctx->at_version; ctx->snapset_obc->obs.oi.last_reqid = ctx->reqid; ctx->snapset_obc->obs.oi.mtime = ctx->mtime; + ctx->snapset_obc->obs.oi.local_mtime = now; bufferlist bv(sizeof(ctx->new_obs.oi)); ::encode(ctx->snapset_obc->obs.oi, bv); @@ -5245,6 +5247,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc if (ctx->mtime != utime_t()) { ctx->new_obs.oi.mtime = ctx->mtime; dout(10) << " set mtime to " << ctx->new_obs.oi.mtime << dendl; + ctx->new_obs.oi.local_mtime = now; } else { dout(10) << " mtime unchanged at " << ctx->new_obs.oi.mtime << dendl; } @@ -11278,7 +11281,13 @@ bool ReplicatedPG::agent_maybe_flush(ObjectContextRef& obc) } utime_t now = ceph_clock_now(NULL); - if (obc->obs.oi.mtime + utime_t(pool.info.cache_min_flush_age, 0) > now) { + utime_t ob_local_mtime; + if (obc->obs.oi.local_mtime != utime_t()) { + ob_local_mtime = obc->obs.oi.local_mtime; + } else { + ob_local_mtime = obc->obs.oi.mtime; + } + if (ob_local_mtime + utime_t(pool.info.cache_min_flush_age, 0) > now) { dout(20) << __func__ << " skip (too young) " << obc->obs.oi << dendl; osd->logger->inc(l_osd_agent_skip); return false; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 9cde9dcfe89eb..faa98e2364c47 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -3682,6 +3682,7 @@ void object_info_t::copy_user_bits(const object_info_t& other) // these bits are copied from head->clone. size = other.size; mtime = other.mtime; + local_mtime = other.local_mtime; last_reqid = other.last_reqid; truncate_seq = other.truncate_seq; truncate_size = other.truncate_size; @@ -3713,7 +3714,7 @@ void object_info_t::encode(bufferlist& bl) const ++i) { old_watchers.insert(make_pair(i->first.second, i->second)); } - ENCODE_START(13, 8, bl); + ENCODE_START(14, 8, bl); ::encode(soid, bl); ::encode(myoloc, bl); //Retained for compatibility ::encode(category, bl); @@ -3738,6 +3739,7 @@ void object_info_t::encode(bufferlist& bl) const ::encode(watchers, bl); __u32 _flags = flags; ::encode(_flags, bl); + ::encode(local_mtime, bl); ENCODE_FINISH(bl); } @@ -3816,6 +3818,11 @@ void object_info_t::decode(bufferlist::iterator& bl) ::decode(_flags, bl); flags = (flag_t)_flags; } + if (struct_v >= 14) { + ::decode(local_mtime, bl); + } else { + local_mtime = utime_t(); + } DECODE_FINISH(bl); } @@ -3831,6 +3838,7 @@ void object_info_t::dump(Formatter *f) const f->dump_unsigned("user_version", user_version); f->dump_unsigned("size", size); f->dump_stream("mtime") << mtime; + f->dump_stream("local_mtime") << local_mtime; f->dump_unsigned("lost", (int)is_lost()); f->dump_unsigned("flags", (int)flags); f->dump_stream("wrlock_by") << wrlock_by; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 93d4195bb9fa2..209d204d0e44b 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2590,6 +2590,7 @@ struct object_info_t { uint64_t size; utime_t mtime; + utime_t local_mtime; // local mtime // note: these are currently encoded into a total 16 bits; see // encode()/decode() for the weirdness. -- 2.39.5