]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ReplicatedPG: Add omap to recovery
authorSamuel Just <samuel.just@dreamhost.com>
Mon, 13 Feb 2012 01:58:50 +0000 (17:58 -0800)
committerSamuel Just <samuel.just@dreamhost.com>
Thu, 1 Mar 2012 18:11:42 +0000 (10:11 -0800)
Signed-off-by: Samuel Just <samuel.just@dreamhost.com>
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h
src/osd/osd_types.h

index d675f57fa3208f893cdc053326bfcc3585d286ea..2b177637dc072135a7b07da6c8e085c75248a5db 100644 (file)
@@ -4187,6 +4187,7 @@ int ReplicatedPG::pull(const hobject_t& soid, eversion_t v)
   recovery_info.version = v;
   ObjectRecoveryProgress progress;
   progress.data_complete = false;
+  progress.omap_complete = false;
   progress.data_recovered_to = 0;
   progress.first = true;
   assert(!pulling.count(soid));
@@ -4308,6 +4309,7 @@ void ReplicatedPG::push_start(
   pi.recovery_progress.first = true;
   pi.recovery_progress.data_recovered_to = 0;
   pi.recovery_progress.data_complete = 0;
+  pi.recovery_progress.omap_complete = 0;
 
   ObjectRecoveryProgress new_progress;
   send_push(peer, pi.recovery_info, pi.recovery_progress, &new_progress);
@@ -4351,6 +4353,7 @@ void ReplicatedPG::submit_push_data(
   const interval_set<uint64_t> &intervals_included,
   bufferlist data_included,
   map<string, bufferptr> &attrs,
+  map<string, bufferlist> &omap_entries,
   ObjectStore::Transaction *t)
 {
   if (first) {
@@ -4368,6 +4371,8 @@ void ReplicatedPG::submit_push_data(
     off += p.get_len();
   }
 
+  t->omap_setkeys(coll_t::TEMP_COLL, recovery_info.soid,
+                 omap_entries);
   t->setattrs(coll_t::TEMP_COLL, recovery_info.soid,
              attrs);
 }
@@ -4532,6 +4537,7 @@ void ReplicatedPG::handle_pull_response(OpRequest *op)
   Context *onreadable_sync = 0;
   submit_push_data(pi.recovery_info, first,
                   data_included, data, m->attrset,
+                  m->omap_entries,
                   t);
 
   if (complete) {
@@ -4592,7 +4598,8 @@ void ReplicatedPG::handle_push(OpRequest *op)
   bufferlist data;
   m->claim_data(data);
   bool first = m->current_progress.first;
-  bool complete = m->recovery_progress.data_complete;
+  bool complete = m->recovery_progress.data_complete &&
+    m->recovery_progress.omap_complete;
   ObjectStore::Transaction *t = new ObjectStore::Transaction;
   Context *onreadable = new ObjectStore::C_DeleteTransaction(t);
   Context *onreadable_sync = 0;
@@ -4601,6 +4608,7 @@ void ReplicatedPG::handle_push(OpRequest *op)
                   m->data_included,
                   data,
                   m->attrset,
+                  m->omap_entries,
                   t);
   if (complete)
     submit_push_complete(m->recovery_info,
@@ -4666,10 +4674,28 @@ int ReplicatedPG::send_push(int peer,
     new_progress.first = false;
   }
 
+  uint64_t available = g_conf->osd_recovery_max_chunk;
+  if (!progress.omap_complete) {
+    ObjectMap::ObjectMapIterator iter =
+      osd->store->get_omap_iterator(coll,
+                                   recovery_info.soid);
+    for (iter->upper_bound(progress.omap_recovered_to);
+        iter->valid();
+        iter->next()) {
+      if (available < (iter->key().size() + iter->value().length()))
+       break;
+      subop->omap_entries.insert(make_pair(iter->key(), iter->value()));
+      available -= (iter->key().size() + iter->value().length());
+    }
+    if (!iter->valid())
+      new_progress.omap_complete = true;
+    else
+      new_progress.omap_recovered_to = iter->key();
+  }
 
   subop->data_included.span_of(recovery_info.copy_subset,
                               progress.data_recovered_to,
-                              g_conf->osd_recovery_max_chunk);
+                              available);
 
   for (interval_set<uint64_t>::iterator p = subop->data_included.begin();
        p != subop->data_included.end();
index 7deeee3e95790e9b207298bd08f3d8ce55a52c92..7531840203689ee32276ecc0da6ccbe0136d59a8 100644 (file)
@@ -581,6 +581,7 @@ protected:
                        const interval_set<uint64_t> &intervals_included,
                        bufferlist data_included,
                        map<string, bufferptr> &attrs,
+                       map<string, bufferlist> &omap_entries,
                        ObjectStore::Transaction *t);
   void submit_push_complete(ObjectRecoveryInfo &recovery_info,
                            ObjectStore::Transaction *t);
index 667bdb2b49813302a7dba3ac16f57fade22121f2..4e2c0f14a63e00925f3a73189d62efa43cfbf2f0 100644 (file)
@@ -1651,7 +1651,10 @@ struct ObjectRecoveryProgress {
       data_complete(false), omap_complete(false) { }
 
   bool is_complete(const ObjectRecoveryInfo& info) const {
-    return data_recovered_to >= (info.copy_subset.empty() ? 0 : info.copy_subset.range_end());
+    return (data_recovered_to >= (
+      info.copy_subset.empty() ?
+      0 : info.copy_subset.range_end())) &&
+      omap_complete;
   }
 
   static void generate_test_instances(list<ObjectRecoveryProgress*>& o);