]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd : populate the needs_recovery_map source fast when only one peer has missing 3919/head
authorGuang Yang <yguang@yahoo-inc.com>
Tue, 10 Mar 2015 07:06:09 +0000 (07:06 +0000)
committerGuang Yang <yguang@yahoo-inc.com>
Fri, 10 Apr 2015 06:03:59 +0000 (06:03 +0000)
The most common case for peering, is that one OSD/host get down for some time, and
the OSD(s) are still in. Once bring them up again, the primary OSD need to populate
the recovery map and add the recovery source. If there are N replicas, M missing objects,
the current complexity of the routine is N*M*logN.

This patch detects if there is only one peers has missing, it goes through a fast routine
add populate the map with complexity of N*logN.

Fixes: 9558
Signed-off-by: Guang Yang <yguang@yahoo-inc.com>
src/osd/PG.cc
src/osd/PG.h

index 7447ecca9638970fbcfd975a53b0ca83513cf74d..566bd1fb459d77c4b270b57b573b4b5009bc7c97 100644 (file)
@@ -453,6 +453,18 @@ bool PG::MissingLoc::readable_with_acting(
   return (*is_readable)(have_acting);
 }
 
+void PG::MissingLoc::add_batch_sources_info(
+  const set<pg_shard_t> &sources)
+{
+  dout(10) << __func__ << ": adding sources in batch " << sources.size() << dendl;
+  for (map<hobject_t, pg_missing_t::item>::const_iterator i = needs_recovery_map.begin(); 
+      i != needs_recovery_map.end();
+      ++i) {
+    missing_loc[i->first].insert(sources.begin(), sources.end());
+    missing_loc_sources.insert(sources.begin(), sources.end());
+    }
+}
+
 bool PG::MissingLoc::add_source_info(
   pg_shard_t fromosd,
   const pg_info_t &oinfo,
@@ -1652,14 +1664,19 @@ void PG::activate(ObjectStore::Transaction& t,
     }
 
     // Set up missing_loc
+    set<pg_shard_t> complete_shards;
     for (set<pg_shard_t>::iterator i = actingbackfill.begin();
         i != actingbackfill.end();
         ++i) {
       if (*i == get_primary()) {
-       missing_loc.add_active_missing(pg_log.get_missing());
+       missing_loc.add_active_missing(missing);
+        if (!missing.have_missing())
+          complete_shards.insert(*i);
       } else {
        assert(peer_missing.count(*i));
        missing_loc.add_active_missing(peer_missing[*i]);
+        if (!peer_missing[*i].have_missing() && peer_info[*i].last_backfill == hobject_t::get_max())
+          complete_shards.insert(*i);
       }
     }
     // If necessary, create might_have_unfound to help us find our unfound objects.
@@ -1667,19 +1684,27 @@ void PG::activate(ObjectStore::Transaction& t,
     // past intervals.
     might_have_unfound.clear();
     if (needs_recovery()) {
-      missing_loc.add_source_info(pg_whoami, info, pg_log.get_missing(), ctx->handle);
-      for (set<pg_shard_t>::iterator i = actingbackfill.begin();
-          i != actingbackfill.end();
-          ++i) {
-       if (*i == pg_whoami) continue;
-       dout(10) << __func__ << ": adding " << *i << " as a source" << dendl;
-       assert(peer_missing.count(*i));
-       assert(peer_info.count(*i));
-       missing_loc.add_source_info(
-         *i,
-         peer_info[*i],
-         peer_missing[*i],
-          ctx->handle);
+      // If only one shard has missing, we do a trick to add all others as recovery
+      // source, this is considered safe since the PGLogs have been merged locally,
+      // and covers vast majority of the use cases, like one OSD/host is down for
+      // a while for hardware repairing
+      if (complete_shards.size() + 1 == actingbackfill.size()) {
+        missing_loc.add_batch_sources_info(complete_shards);
+      } else {
+        missing_loc.add_source_info(pg_whoami, info, pg_log.get_missing(), ctx->handle);
+        for (set<pg_shard_t>::iterator i = actingbackfill.begin();
+            i != actingbackfill.end();
+            ++i) {
+         if (*i == pg_whoami) continue;
+         dout(10) << __func__ << ": adding " << *i << " as a source" << dendl;
+         assert(peer_missing.count(*i));
+         assert(peer_info.count(*i));
+         missing_loc.add_source_info(
+           *i,
+           peer_info[*i],
+           peer_missing[*i],
+            ctx->handle);
+        }
       }
       for (map<pg_shard_t, pg_missing_t>::iterator i = peer_missing.begin();
           i != peer_missing.end();
index 870fdf22e2632306a593eb18d22725cc8b4d8a52..abcbe1b8005501258056b00e376602a941bfac84 100644 (file)
@@ -442,6 +442,11 @@ public:
       ThreadPool::TPHandle* handle  ///< [in] ThreadPool handle
       ); ///< @return whether a new object location was discovered
 
+    /// Adds recovery sources in batch
+    void add_batch_sources_info(
+      const set<pg_shard_t> &sources  ///< [in] a set of resources which can be used for all objects
+      );
+
     /// Uses osdmap to update structures for now down sources
     void check_recovery_sources(const OSDMapRef osdmap);