From b5381c22a7b0d8d4c14cd8feb3a115f7d80d5d81 Mon Sep 17 00:00:00 2001 From: Haomai Wang Date: Tue, 22 Jul 2014 11:27:02 +0800 Subject: [PATCH] ReplicatedPG: Make pull and push op use sparse read If user enable fiemap feature in osd side, there exists three ways to make read/write sprase: 1. normal sparse read/write op 2. clone op 3. recover op If any op doesn't support sparse way, it will destroy the advantage of this way and write zero hole to sparse file. Now clone op can support sparse write, this commit implement sparse read/write for recover op. Signed-off-by: Haomai Wang --- src/osd/ReplicatedPG.cc | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 5f3a7117a8609..82b6b9d4965c0 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -8588,6 +8588,7 @@ void ReplicatedBackend::submit_push_data( get_parent()->on_local_recover_start(recovery_info.soid, t); t->remove(get_temp_coll(t), recovery_info.soid); t->touch(target_coll, recovery_info.soid); + t->truncate(target_coll, recovery_info.soid, recovery_info.size); t->omap_setheader(target_coll, recovery_info.soid, omap_header); } uint64_t off = 0; @@ -8934,9 +8935,30 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info, } if (available > 0) { - out_op->data_included.span_of(recovery_info.copy_subset, - progress.data_recovered_to, - available); + if (!recovery_info.copy_subset.empty()) { + interval_set copy_subset = recovery_info.copy_subset; + bufferlist bl; + int r = store->fiemap(coll, recovery_info.soid, 0, + copy_subset.range_end(), bl); + if (r >= 0) { + interval_set fiemap_included; + map m; + bufferlist::iterator iter = bl.begin(); + ::decode(m, iter); + map::iterator miter; + for (miter = m.begin(); miter != m.end(); ++miter) { + fiemap_included.insert(miter->first, miter->second); + } + + copy_subset.intersection_of(fiemap_included); + } + out_op->data_included.span_of(copy_subset, progress.data_recovered_to, + available); + if (out_op->data_included.empty()) // zero filled section, skip to end! + new_progress.data_recovered_to = recovery_info.copy_subset.range_end(); + else + new_progress.data_recovered_to = out_op->data_included.range_end(); + } } else { out_op->data_included.clear(); } @@ -8957,9 +8979,6 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info, out_op->data.claim_append(bit); } - if (!out_op->data_included.empty()) - new_progress.data_recovered_to = out_op->data_included.range_end(); - if (new_progress.is_complete(recovery_info)) { new_progress.data_complete = true; if (stat) @@ -9160,7 +9179,7 @@ void ReplicatedBackend::handle_pull(pg_shard_t peer, PullOp &op, PushOp *reply) recovery_info.size = st.st_size; recovery_info.copy_subset.clear(); if (st.st_size) - recovery_info.copy_subset.insert(0, st.st_size); + recovery_info.copy_subset.insert(0, st.st_size); assert(recovery_info.clone_subset.empty()); } -- 2.39.5