From: Haomai Wang Date: Tue, 22 Jul 2014 03:27:02 +0000 (+0800) Subject: ReplicatedPG: Make pull and push op use sparse read X-Git-Tag: v0.91~53^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=b5381c22a7b0d8d4c14cd8feb3a115f7d80d5d81;p=ceph.git ReplicatedPG: Make pull and push op use sparse read If user enable fiemap feature in osd side, there exists three ways to make read/write sprase: 1. normal sparse read/write op 2. clone op 3. recover op If any op doesn't support sparse way, it will destroy the advantage of this way and write zero hole to sparse file. Now clone op can support sparse write, this commit implement sparse read/write for recover op. Signed-off-by: Haomai Wang --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 5f3a7117a8609..82b6b9d4965c0 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -8588,6 +8588,7 @@ void ReplicatedBackend::submit_push_data( get_parent()->on_local_recover_start(recovery_info.soid, t); t->remove(get_temp_coll(t), recovery_info.soid); t->touch(target_coll, recovery_info.soid); + t->truncate(target_coll, recovery_info.soid, recovery_info.size); t->omap_setheader(target_coll, recovery_info.soid, omap_header); } uint64_t off = 0; @@ -8934,9 +8935,30 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info, } if (available > 0) { - out_op->data_included.span_of(recovery_info.copy_subset, - progress.data_recovered_to, - available); + if (!recovery_info.copy_subset.empty()) { + interval_set copy_subset = recovery_info.copy_subset; + bufferlist bl; + int r = store->fiemap(coll, recovery_info.soid, 0, + copy_subset.range_end(), bl); + if (r >= 0) { + interval_set fiemap_included; + map m; + bufferlist::iterator iter = bl.begin(); + ::decode(m, iter); + map::iterator miter; + for (miter = m.begin(); miter != m.end(); ++miter) { + fiemap_included.insert(miter->first, miter->second); + } + + copy_subset.intersection_of(fiemap_included); + } + out_op->data_included.span_of(copy_subset, progress.data_recovered_to, + available); + if (out_op->data_included.empty()) // zero filled section, skip to end! + new_progress.data_recovered_to = recovery_info.copy_subset.range_end(); + else + new_progress.data_recovered_to = out_op->data_included.range_end(); + } } else { out_op->data_included.clear(); } @@ -8957,9 +8979,6 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info, out_op->data.claim_append(bit); } - if (!out_op->data_included.empty()) - new_progress.data_recovered_to = out_op->data_included.range_end(); - if (new_progress.is_complete(recovery_info)) { new_progress.data_complete = true; if (stat) @@ -9160,7 +9179,7 @@ void ReplicatedBackend::handle_pull(pg_shard_t peer, PullOp &op, PushOp *reply) recovery_info.size = st.st_size; recovery_info.copy_subset.clear(); if (st.st_size) - recovery_info.copy_subset.insert(0, st.st_size); + recovery_info.copy_subset.insert(0, st.st_size); assert(recovery_info.clone_subset.empty()); }