From: lishuhao Date: Sun, 14 Apr 2019 08:26:04 +0000 (+0800) Subject: osd: add ObjectCleanRegions for pg log X-Git-Tag: v15.1.0~2697^2~13 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=28af7820f2cdf71e0cfd5fef95a34bd4cc2b1e80;p=ceph.git osd: add ObjectCleanRegions for pg log we use merge ObjectCleanRegions to calculate the data that needs recovery when merge log Signed-off-by: Ning Yao Signed-off-by: lishuhao --- diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 0ad9899d505..3a20ae09c7b 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -1425,11 +1425,12 @@ public: for (list::reverse_iterator i = log.log.rbegin(); i != log.log.rend(); ++i) { - if (!debug_verify_stored_missing && i->version <= info.last_complete) break; if (i->soid > info.last_backfill) continue; if (i->is_error()) continue; + if (!i->is_delete()) + missing.merge(*i); if (did.count(i->soid)) continue; did.insert(i->soid); @@ -1446,7 +1447,9 @@ public: object_info_t oi(bv); if (oi.version < i->version) { ldpp_dout(dpp, 15) << "read_log_and_missing missing " << *i - << " (have " << oi.version << ")" << dendl; + << " (have " << oi.version << ")" + << " clean_regions " << i->clean_regions << dendl; + if (debug_verify_stored_missing) { auto miter = missing.get_items().find(i->soid); ceph_assert(miter != missing.get_items().end()); @@ -1456,7 +1459,8 @@ public: ceph_assert(miter->second.have == oi.version || miter->second.have == eversion_t()); checked.insert(i->soid); } else { - missing.add(i->soid, i->version, oi.version, i->is_delete()); + missing.add(i->soid, i->version, oi.version, i->is_delete(), false); + missing.merge(*i); } } } else { @@ -1474,7 +1478,8 @@ public: } checked.insert(i->soid); } else { - missing.add(i->soid, i->version, eversion_t(), i->is_delete()); + missing.add(i->soid, i->version, eversion_t(), i->is_delete(), false); + missing.merge(*i); } } } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 1b6971b332b..58b6f931568 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -4419,7 +4419,7 @@ void pg_log_entry_t::decode_with_checksum(ceph::buffer::list::const_iterator& p) void pg_log_entry_t::encode(ceph::buffer::list &bl) const { - ENCODE_START(12, 4, bl); + ENCODE_START(13, 4, bl); encode(op, bl); encode(soid, bl); encode(version, bl); @@ -4448,6 +4448,7 @@ void pg_log_entry_t::encode(ceph::buffer::list &bl) const encode(return_code, bl); if (!extra_reqids.empty()) encode(extra_reqid_return_codes, bl); + encode(clean_regions, bl); ENCODE_FINISH(bl); } @@ -4509,6 +4510,10 @@ void pg_log_entry_t::decode(ceph::buffer::list::const_iterator &bl) decode(return_code, bl); if (struct_v >= 12 && !extra_reqids.empty()) decode(extra_reqid_return_codes, bl); + if (struct_v >= 13) + ::decode(clean_regions, bl); + else + clean_regions.mark_fully_dirty(); DECODE_FINISH(bl); } @@ -4556,6 +4561,11 @@ void pg_log_entry_t::dump(Formatter *f) const mod_desc.dump(f); f->close_section(); } + { + f->open_object_section("clean_regions"); + clean_regions.dump(f); + f->close_section(); + } } void pg_log_entry_t::generate_test_instances(list& o) @@ -4587,6 +4597,7 @@ ostream& operator<<(ostream& out, const pg_log_entry_t& e) } out << " snaps " << snaps; } + out << " ObjectCleanRegions " << e.clean_regions; return out; } @@ -4835,7 +4846,8 @@ ostream& operator<<(ostream& out, const pg_missing_item& i) out << i.need; if (i.have != eversion_t()) out << "(" << i.have << ")"; - out << " flags = " << i.flag_str(); + out << " flags = " << i.flag_str() + << " " << i.clean_regions; return out; } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 83addc9463a..57054e55ea5 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -3753,7 +3753,7 @@ private: bool new_object; int32_t max_num_intervals; - /** + /** * trim the number of intervals if clean_offsets.num_intervals() * exceeds the given upbound max_num_intervals * etc. max_num_intervals=2, clean_offsets:{[5~10], [20~5]} @@ -3775,7 +3775,7 @@ public: return clean_offsets == orc.clean_offsets && clean_omap == orc.clean_omap && max_num_intervals == orc.max_num_intervals; } - void merge(const ObjectCleanRegions &other); + void merge(const ObjectCleanRegions &other); void mark_data_region_dirty(uint64_t offset, uint64_t len); void mark_omap_dirty(); void mark_object_new(); @@ -3784,7 +3784,7 @@ public: bool omap_is_dirty() const; bool object_is_exist() const; - void encode(bufferlist &bl) const; + void encode(bufferlist &bl) const; void decode(bufferlist::const_iterator &bl); void dump(Formatter *f) const; static void generate_test_instances(list& o); @@ -3855,6 +3855,7 @@ struct pg_log_entry_t { __s32 op; bool invalid_hash; // only when decoding sobject_t based entries bool invalid_pool; // only when decoding pool-less hobject based entries + ObjectCleanRegions clean_regions; pg_log_entry_t() : user_version(0), return_code(0), op(0), @@ -4201,48 +4202,77 @@ inline std::ostream& operator<<(std::ostream& out, const pg_log_t& log) */ struct pg_missing_item { eversion_t need, have; + ObjectCleanRegions clean_regions; enum missing_flags_t { FLAG_NONE = 0, FLAG_DELETE = 1, } flags; pg_missing_item() : flags(FLAG_NONE) {} explicit pg_missing_item(eversion_t n) : need(n), flags(FLAG_NONE) {} // have no old version - pg_missing_item(eversion_t n, eversion_t h, bool is_delete=false) : need(n), have(h) { + pg_missing_item(eversion_t n, eversion_t h, bool is_delete=false, bool old_style = false,bool new_object = false) : + need(n), have(h) { + if (old_style) + clean_regions.mark_fully_dirty(); + if (new_object) + clean_regions.mark_object_new(); set_delete(is_delete); } void encode(ceph::buffer::list& bl, uint64_t features) const { using ceph::encode; - if (HAVE_FEATURE(features, OSD_RECOVERY_DELETES)) { - // encoding a zeroed eversion_t to differentiate between this and - // legacy unversioned encoding - a need value of 0'0 is not - // possible. This can be replaced with the legacy encoding - // macros post-luminous. - eversion_t e; - encode(e, bl); - encode(need, bl); - encode(have, bl); + // encoding a zeroed eversion_t to differentiate between OSD_RECOVERY_DELETES、 + // SERVER_NAUTILUS and legacy unversioned encoding - a need value of 0'0 is not + // possible. This can be replaced with the legacy encoding + + bool have_recovery_deletes = HAVE_FEATURE(features, OSD_RECOVERY_DELETES); + bool have_server_nautilus = HAVE_FEATURE(features, SERVER_NAUTILUS); + + if (have_recovery_deletes) + encode(eversion_t(), bl); + if (have_server_nautilus) + encode(eversion_t(-1, -1), bl); + + encode(need, bl); + encode(have, bl); + + if (have_recovery_deletes) encode(static_cast(flags), bl); - } else { - // legacy unversioned encoding - encode(need, bl); - encode(have, bl); - } + if (have_server_nautilus) + encode(clean_regions, bl); } void decode(ceph::buffer::list::const_iterator& bl) { using ceph::decode; eversion_t e; decode(e, bl); - if (e != eversion_t()) { - // legacy encoding, this is the need value - need = e; + + if(e == eversion_t()) { + eversion_t l; + decode(l, bl); + if(l == eversion_t(-1, -1)) { + // support all + decode(need, bl); + decode(have, bl); + uint8_t f; + decode(f, bl); + flags = static_cast(f); + decode(clean_regions, bl); + } else { + // support OSD_RECOVERY_DELETES + need = l; + decode(have, bl); + uint8_t f; + decode(f, bl); + flags = static_cast(f); + } + } else if (e == eversion_t(-1, -1)) { + // support NAUTILUS + decode(need, bl); decode(have, bl); + decode(clean_regions, bl); } else { - decode(need, bl); + // legacy encoding + need = e; decode(have, bl); - uint8_t f; - decode(f, bl); - flags = static_cast(f); } } @@ -4266,6 +4296,7 @@ struct pg_missing_item { f->dump_stream("need") << need; f->dump_stream("have") << have; f->dump_stream("flags") << flag_str(); + f->dump_stream("clean_regions") << clean_regions; } static void generate_test_instances(std::list& o) { o.push_back(new pg_missing_item); @@ -4275,6 +4306,8 @@ struct pg_missing_item { o.push_back(new pg_missing_item); o.back()->need = eversion_t(3, 5); o.back()->have = eversion_t(3, 4); + o.back()->clean_regions.mark_data_region_dirty(4096, 8192); + o.back()->clean_regions.mark_omap_dirty(); o.back()->flags = FLAG_DELETE; } bool operator==(const pg_missing_item &rhs) const { @@ -4369,6 +4402,11 @@ public: bool have_missing() const override { return !missing.empty(); } + void merge(const pg_log_entry_t& e) { + auto miter = missing.find(e.soid); + if (miter != missing.end() && miter->second.have != eversion_t() && e.version > miter->second.have) + miter->second.clean_regions.merge(e.clean_regions); + } bool is_missing(const hobject_t& oid, pg_missing_item *out = nullptr) const override { auto iter = missing.find(oid); if (iter == missing.end()) @@ -4413,31 +4451,43 @@ public: if (e.prior_version == eversion_t() || e.is_clone()) { // new object. if (is_missing_divergent_item) { // use iterator - rmissing.erase((missing_it->second).need.version); - missing_it->second = item(e.version, eversion_t(), e.is_delete()); // .have = nil - } else // create new element in missing map - missing[e.soid] = item(e.version, eversion_t(), e.is_delete()); // .have = nil + rmissing.erase((missing_it->second).need.version); + // .have = nil + missing_it->second = item(e.version, eversion_t(), e.is_delete()); + missing_it->second.clean_regions.mark_fully_dirty(); + } else { + // create new element in missing map + // .have = nil + missing[e.soid] = item(e.version, eversion_t(), e.is_delete()); + missing[e.soid].clean_regions = e.clean_regions; + missing[e.soid].clean_regions.mark_fully_dirty(); + } } else if (is_missing_divergent_item) { // already missing (prior). rmissing.erase((missing_it->second).need.version); (missing_it->second).need = e.version; // leave .have unchanged. - missing_it->second.set_delete(e.is_delete()); + (missing_it->second).set_delete(e.is_delete()); + (missing_it->second).clean_regions.merge(e.clean_regions); } else { // not missing, we must have prior_version (if any) ceph_assert(!is_missing_divergent_item); missing[e.soid] = item(e.version, e.prior_version, e.is_delete()); + missing[e.soid].clean_regions = e.clean_regions; } rmissing[e.version.version] = e.soid; tracker.changed(e.soid); } void revise_need(hobject_t oid, eversion_t need, bool is_delete) { - if (missing.count(oid)) { - rmissing.erase(missing[oid].need.version); - missing[oid].need = need; // no not adjust .have - missing[oid].set_delete(is_delete); + auto p = missing.find(oid); + if (p != missing.end()) { + rmissing.erase((p->second).need.version); + (p->second).need = need; // no not adjust .have + (p->second).set_delete(is_delete); + (p->second).clean_regions.mark_fully_dirty(); } else { missing[oid] = item(need, eversion_t(), is_delete); + missing[oid].clean_regions.mark_fully_dirty(); } rmissing[need.version] = oid; @@ -4445,15 +4495,16 @@ public: } void revise_have(hobject_t oid, eversion_t have) { - if (missing.count(oid)) { + auto p = missing.find(oid); + if (p != missing.end()) { tracker.changed(oid); - missing[oid].have = have; + (p->second).have = have; } } void add(const hobject_t& oid, eversion_t need, eversion_t have, - bool is_delete) { - missing[oid] = item(need, have, is_delete); + bool is_delete, bool make_dirty = true) { + missing[oid] = item(need, have, is_delete, make_dirty, have == eversion_t()); rmissing[need.version] = oid; tracker.changed(oid); } @@ -4509,16 +4560,16 @@ public: rmissing.clear(); } - void encode(ceph::buffer::list &bl) const { - ENCODE_START(4, 2, bl); - encode(missing, bl, may_include_deletes ? CEPH_FEATURE_OSD_RECOVERY_DELETES : 0); + void encode(ceph::buffer::list &bl, uint64_t features) const { + ENCODE_START(5, 2, bl) + encode(missing, bl, features); encode(may_include_deletes, bl); ENCODE_FINISH(bl); } void decode(ceph::buffer::list::const_iterator &bl, int64_t pool = -1) { for (auto const &i: missing) tracker.changed(i.first); - DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(5, 2, 2, bl); decode(missing, bl); if (struct_v >= 4) { decode(may_include_deletes, bl); @@ -4643,7 +4694,7 @@ template void encode( const pg_missing_set &c, ceph::buffer::list &bl, uint64_t features=0) { ENCODE_DUMP_PRE(); - c.encode(bl); + c.encode(bl, features); ENCODE_DUMP_POST(cl); } template diff --git a/src/tools/ceph-dencoder/types.h b/src/tools/ceph-dencoder/types.h index f43bcdf3d76..6b595c5b441 100644 --- a/src/tools/ceph-dencoder/types.h +++ b/src/tools/ceph-dencoder/types.h @@ -95,7 +95,7 @@ TYPE(pg_log_entry_t) TYPE(pg_log_dup_t) TYPE(pg_log_t) TYPE_FEATUREFUL(pg_missing_item) -TYPE(pg_missing_t) +TYPE_FEATUREFUL(pg_missing_t) TYPE(pg_nls_response_t) TYPE(pg_ls_response_t) TYPE(object_copy_cursor_t)