]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ReplicatedPG: Make pull and push op use sparse read 2137/head
authorHaomai Wang <haomaiwang@gmail.com>
Tue, 22 Jul 2014 03:27:02 +0000 (11:27 +0800)
committerHaomai Wang <haomaiwang@gmail.com>
Mon, 15 Dec 2014 15:05:25 +0000 (23:05 +0800)
If user enable fiemap feature in osd side, there exists three ways to make
read/write sprase:

1. normal sparse read/write op
2. clone op
3. recover op

If any op doesn't support sparse way, it will destroy the advantage of this
way and write zero hole to sparse file. Now clone op can support sparse write,
this commit implement sparse read/write for recover op.

Signed-off-by: Haomai Wang <haomaiwang@gmail.com>
src/osd/ReplicatedPG.cc

index 5f3a7117a8609aee5a720429fe4dc3f358969aff..82b6b9d4965c07a61d4ae2a166acf19dcb23f72a 100644 (file)
@@ -8588,6 +8588,7 @@ void ReplicatedBackend::submit_push_data(
     get_parent()->on_local_recover_start(recovery_info.soid, t);
     t->remove(get_temp_coll(t), recovery_info.soid);
     t->touch(target_coll, recovery_info.soid);
+    t->truncate(target_coll, recovery_info.soid, recovery_info.size);
     t->omap_setheader(target_coll, recovery_info.soid, omap_header);
   }
   uint64_t off = 0;
@@ -8934,9 +8935,30 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
   }
 
   if (available > 0) {
-    out_op->data_included.span_of(recovery_info.copy_subset,
-                                progress.data_recovered_to,
-                                available);
+    if (!recovery_info.copy_subset.empty()) {
+      interval_set<uint64_t> copy_subset = recovery_info.copy_subset;
+      bufferlist bl;
+      int r = store->fiemap(coll, recovery_info.soid, 0,
+                            copy_subset.range_end(), bl);
+      if (r >= 0)  {
+        interval_set<uint64_t> fiemap_included;
+        map<uint64_t, uint64_t> m;
+        bufferlist::iterator iter = bl.begin();
+        ::decode(m, iter);
+        map<uint64_t, uint64_t>::iterator miter;
+        for (miter = m.begin(); miter != m.end(); ++miter) {
+          fiemap_included.insert(miter->first, miter->second);
+        }
+
+        copy_subset.intersection_of(fiemap_included);
+      }
+      out_op->data_included.span_of(copy_subset, progress.data_recovered_to,
+                                    available);
+      if (out_op->data_included.empty()) // zero filled section, skip to end!
+        new_progress.data_recovered_to = recovery_info.copy_subset.range_end();
+      else
+        new_progress.data_recovered_to = out_op->data_included.range_end();
+    }
   } else {
     out_op->data_included.clear();
   }
@@ -8957,9 +8979,6 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
     out_op->data.claim_append(bit);
   }
 
-  if (!out_op->data_included.empty())
-    new_progress.data_recovered_to = out_op->data_included.range_end();
-
   if (new_progress.is_complete(recovery_info)) {
     new_progress.data_complete = true;
     if (stat)
@@ -9160,7 +9179,7 @@ void ReplicatedBackend::handle_pull(pg_shard_t peer, PullOp &op, PushOp *reply)
       recovery_info.size = st.st_size;
       recovery_info.copy_subset.clear();
       if (st.st_size)
-       recovery_info.copy_subset.insert(0, st.st_size);
+        recovery_info.copy_subset.insert(0, st.st_size);
       assert(recovery_info.clone_subset.empty());
     }