]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: don't mark objs as lost unless we're active
authorColin Patrick McCabe <cmccabe@alumni.cmu.edu>
Tue, 23 Nov 2010 22:30:06 +0000 (14:30 -0800)
committerColin Patrick McCabe <cmccabe@alumni.cmu.edu>
Tue, 30 Nov 2010 23:47:09 +0000 (15:47 -0800)
We don't have enough information to mark objects as lost until we
activate the PG. might_have_unfound isn't even built until PG::activate.

Signed-off-by: Colin McCabe <colinm@hq.newdream.net>
src/osd/OSD.cc
src/test/test_common.sh
src/test/test_lost.sh

index 90af2959d6f5cceee13382cb1bfd34cc6260e268..744e8a75e41f13edf39834080d11de3de46c9042 100644 (file)
@@ -1880,49 +1880,62 @@ void OSD::handle_command(MMonCommand *m)
     ss << g_conf.name << " stopped profiler";
     logclient.log(LOG_INFO, ss);
   }
-  else if (m->cmd.size() == 2 && m->cmd[0] == "dump_missing") {
-    const string &file_name(m->cmd[1]);
-    std::ofstream fout(file_name.c_str());
-    if (!fout.is_open()) {
-      stringstream ss;
-      ss << "failed to open file '" << file_name << "'";
-      logclient.log(LOG_INFO, ss);
-      goto done;
-    }
+  else if (m->cmd.size() > 1 && m->cmd[0] == "debug") {
+    if (m->cmd.size() == 3 && m->cmd[1] == "dump_missing") {
+      const string &file_name(m->cmd[2]);
+      std::ofstream fout(file_name.c_str());
+      if (!fout.is_open()) {
+       stringstream ss;
+       ss << "failed to open file '" << file_name << "'";
+       logclient.log(LOG_INFO, ss);
+       goto done;
+      }
 
-    std::set <pg_t> keys;
-    for (hash_map<pg_t, PG*>::const_iterator pg_map_e = pg_map.begin();
-        pg_map_e != pg_map.end(); ++pg_map_e) {
-      keys.insert(pg_map_e->first);
-    }
+      std::set <pg_t> keys;
+      for (hash_map<pg_t, PG*>::const_iterator pg_map_e = pg_map.begin();
+          pg_map_e != pg_map.end(); ++pg_map_e) {
+       keys.insert(pg_map_e->first);
+      }
 
-    fout << "*** osd " << whoami << ": dump_missing ***" << std::endl;
-    for (std::set <pg_t>::iterator p = keys.begin();
-        p != keys.end(); ++p) {
-      hash_map<pg_t, PG*>::iterator q = pg_map.find(*p);
-      assert(q != pg_map.end());
-      PG *pg = q->second;
-      pg->lock();
+      fout << "*** osd " << whoami << ": dump_missing ***" << std::endl;
+      for (std::set <pg_t>::iterator p = keys.begin();
+          p != keys.end(); ++p) {
+       hash_map<pg_t, PG*>::iterator q = pg_map.find(*p);
+       assert(q != pg_map.end());
+       PG *pg = q->second;
+       pg->lock();
 
-      fout << *pg << std::endl;
-      std::map<sobject_t, PG::Missing::item>::iterator mend = pg->missing.missing.end();
-      std::map<sobject_t, PG::Missing::item>::iterator m = pg->missing.missing.begin();
-      for (; m != mend; ++m) {
-       fout << m->first << " -> " << m->second << std::endl;
-       map<sobject_t, set<int> >::const_iterator mli =
-         pg->missing_loc.find(m->first);
-       if (mli == pg->missing_loc.end())
-         continue;
-       const set<int> &mls(mli->second);
-       if (mls.empty())
-         continue;
-       fout << "missing_loc: " << mls << std::endl;
+       fout << *pg << std::endl;
+       std::map<sobject_t, PG::Missing::item>::iterator mend = pg->missing.missing.end();
+       std::map<sobject_t, PG::Missing::item>::iterator m = pg->missing.missing.begin();
+       for (; m != mend; ++m) {
+         fout << m->first << " -> " << m->second << std::endl;
+         map<sobject_t, set<int> >::const_iterator mli =
+           pg->missing_loc.find(m->first);
+         if (mli == pg->missing_loc.end())
+           continue;
+         const set<int> &mls(mli->second);
+         if (mls.empty())
+           continue;
+         fout << "missing_loc: " << mls << std::endl;
+       }
+       pg->unlock();
+       fout << std::endl;
       }
-      pg->unlock();
-      fout << std::endl;
+
+      fout.close();
     }
+    else if (m->cmd.size() == 3 && m->cmd[1] == "kick_recovery_wq") {
+      g_conf.osd_recovery_delay_start = atoi(m->cmd[2].c_str());
+      stringstream ss;
+      ss << "kicking recovery queue. set osd_recovery_delay_start to "
+        << g_conf.osd_recovery_delay_start;
+      logclient.log(LOG_INFO, ss);
 
-    fout.close();
+      defer_recovery_until = g_clock.now();
+      defer_recovery_until += g_conf.osd_recovery_delay_start;
+      recovery_wq._kick();
+    }
   }
   else dout(0) << "unrecognized command! " << m->cmd << dendl;
 
@@ -3079,7 +3092,7 @@ void OSD::activate_map(ObjectStore::Transaction& t, list<Context*>& tfin)
     if (g_conf.osd_check_for_log_corruption)
       pg->check_log_for_corruption(store);
 
-    if (pg->is_primary() &&
+    if (pg->is_active() && pg->is_primary() &&
        (pg->missing.num_missing() > pg->missing_loc.size())) {
       if (pg->all_unfound_are_lost(osdmap)) {
        pg->mark_all_unfound_as_lost();
index 0c437dc9e02643dc02e3026fed7d97660a49abe0..f34fda5ffd34b0fb5ec819c63e76b370d8567daf 100755 (executable)
@@ -130,7 +130,7 @@ start_recovery() {
         CEPH_NUM_OSD=$1
         osd=0
         while [ $osd -lt $CEPH_NUM_OSD ]; do
-                ./ceph osd tell $osd injectargs 'osd recovery delay start = 0'
+                ./ceph osd tell $osd debug kick_recovery_wq 0
                 osd=$((osd+1))
         done
 }
index 6279dcc797735e5848c8edeaae887c8864a8cf47..c93f1a656a5b0ea9120b08fb30cd5d945fcb89b9 100755 (executable)
@@ -9,7 +9,7 @@ source "`dirname $0`/test_common.sh"
 
 # Functions
 my_write_objects() {
-        write_objects $1 $2 10 1000000
+        write_objects $1 $2 200 4000
 }
 
 setup() {
@@ -49,6 +49,10 @@ recovery1_impl() {
         [ $? -eq 1 ] || die "Failed to see unfound objects."
         echo "Got unfound objects."
 
+        restart_osd 0
+       sleep 20
+       start_recovery 2
+
         # Turn on recovery and wait for it to complete.
        poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
         [ $? -eq 1 ] || die "Failed to recover unfound objects."