osd/PeeringState: restart peering on any previous down acting member coming back

author xie xingguo <xie.xingguo@zte.com.cn>

Fri, 14 Feb 2020 10:26:52 +0000 (18:26 +0800)

committer Yan Jun <yan.jun8@zte.com.cn>

Fri, 21 Feb 2020 09:52:52 +0000 (17:52 +0800)
author xie xingguo <xie.xingguo@zte.com.cn>
Fri, 14 Feb 2020 10:26:52 +0000 (18:26 +0800)
committer Yan Jun <yan.jun8@zte.com.cn>
Fri, 21 Feb 2020 09:52:52 +0000 (17:52 +0800)
diff --git a/qa/standalone/osd/repeer-on-acting-back.sh b/qa/standalone/osd/repeer-on-acting-back.sh

new file mode 100755 (executable)

index 0000000..af406ef
--- /dev/null
+++ b/qa/standalone/osd/repeer-on-acting-back.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020  ZTE Corporation <contact@zte.com.cn>
+#
+# Author: xie xingguo <xie.xingguo@zte.com.cn>
+# Author: Yan Jun <yan.jun8@zte.com.cn>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export poolname=test
+    export testobjects=100
+    export loglen=12
+    export trim=$(expr $loglen / 2)
+    export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    # so we will not force auth_log_shard to be acting_primary
+    CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+    # use small pg_log settings, so we always do backfill instead of recovery
+    CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+function TEST_repeer_on_down_acting_member_coming_back() {
+    local dir=$1
+    local dummyfile='/etc/fstab'
+
+    local num_osds=6
+    local osds="$(seq 0 $(expr $num_osds - 1))"
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $osds
+    do
+      run_osd $dir $i || return 1
+    done
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 3
+    ceph osd pool set $poolname min_size 2
+    local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid')
+    local pgid=$poolid.0
+
+    # enable required feature-bits for upmap
+    ceph osd set-require-min-compat-client luminous
+    # reset up to [1,2,3]
+    ceph osd pg-upmap $pgid 1 2 3 || return 1
+
+    flush_pg_stats || return 1
+    wait_for_clean || return 1
+
+    echo "writing initial objects"
+    # write a bunch of objects
+    for i in $(seq 1 $testobjects)
+    do
+      rados -p $poolname put existing_$i $dummyfile
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # reset up to [1,4,5]
+    ceph osd pg-upmap $pgid 1 4 5 || return 1
+
+    # wait for peering to complete
+    sleep 2
+
+    # make sure osd.2 belongs to current acting set
+    ceph pg $pgid query | jq '.acting' | grep 2 || return 1
+
+    # kill osd.2
+    kill_daemons $dir KILL osd.2 || return 1
+    ceph osd down osd.2
+
+    # again, wait for peering to complete
+    sleep 2
+
+    # osd.2 should have been moved out from acting set
+    ceph pg $pgid query | jq '.acting' | grep 2 && return 1
+
+    # bring up osd.2
+    activate_osd $dir 2 || return 1
+    wait_for_osd up 2
+
+    # again, wait for peering to complete
+    sleep 2
+
+    # primary should be able to re-add osd.2 into acting
+    ceph pg $pgid query | jq '.acting' | grep 2 || return 1
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log')
+    then
+            echo failure
+            return 1
+    fi
+    echo "success"
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+main repeer-on-acting-back "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh"
+# End:
diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc

index aa9e047c9337125748a6788e4c3c41183259e8bb..342fe8e637bb4c12ab0ea07e30e4e3861c9a79d2 100644 (file)
--- a/src/osd/PeeringState.cc
+++ b/src/osd/PeeringState.cc
@@ -1996,7 +1996,8 @@ void PeeringState::choose_async_recovery_replicated(
   */
  bool PeeringState::choose_acting(pg_shard_t &auth_log_shard_id,
                                  bool restrict_to_up_acting,
-                                bool *history_les_bound)
+                                bool *history_les_bound,
+                                bool request_pg_temp_change_only)
  {
    map<pg_shard_t, pg_info_t> all_info(peer_info.begin(), peer_info.end());
    all_info[pg_whoami] = info;
@@ -2105,6 +2106,8 @@ bool PeeringState::choose_acting(pg_shard_t &auth_log_shard_id,
      }
      return false;
    }
+  if (request_pg_temp_change_only)
+    return true;
    want_acting.clear();
    acting_recovery_backfill = want_acting_backfill;
    psdout(10) << "acting_recovery_backfill is "
@@ -5653,6 +5656,16 @@ boost::statechart::result PeeringState::Active::react(const MNotifyRec& notevt)
        ps->discover_all_missing(
         context<PeeringMachine>().get_recovery_ctx().msgs);
      }
+    // check if it is a previous down acting member that's coming back.
+    // if so, request pg_temp change to trigger a new interval transition
+    pg_shard_t auth_log_shard;
+    bool history_les_bound = false;
+    ps->choose_acting(auth_log_shard, false, &history_les_bound, true);
+    if (!ps->want_acting.empty() && ps->want_acting != ps->acting) {
+      psdout(10) << "Active: got notify from previous acting member "
+                 << notevt.from << ", requesting pg_temp change"
+                 << dendl;
+    }
    }
    return discard_event();
  }
diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h

index 327e35eeb975a03f3bd2c768197873a6af831bb2..0de153e48d86f5c8a4d661c88fc140012e12dfc8 100644 (file)
--- a/src/osd/PeeringState.h
+++ b/src/osd/PeeringState.h
@@ -1556,7 +1556,8 @@ public:
    bool recoverable(const vector<int> &want) const;
    bool choose_acting(pg_shard_t &auth_log_shard,
                      bool restrict_to_up_acting,
-                    bool *history_les_bound);
+                    bool *history_les_bound,
+                    bool request_pg_temp_change_only = false);
  
    bool search_for_missing(
      const pg_info_t &oinfo, const pg_missing_t &omissing,
author	xie xingguo <xie.xingguo@zte.com.cn>
	Fri, 14 Feb 2020 10:26:52 +0000 (18:26 +0800)
committer	Yan Jun <yan.jun8@zte.com.cn>
	Fri, 21 Feb 2020 09:52:52 +0000 (17:52 +0800)
qa/standalone/osd/repeer-on-acting-back.sh	[new file with mode: 0755]	patch \| blob
src/osd/PeeringState.cc		patch \| blob \| history
src/osd/PeeringState.h		patch \| blob \| history