]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Revert "Merge pull request #47092 from dparmar18/wip-dparmar-cephadm-simple-1" 47732/head
authorPatrick Donnelly <pdonnell@redhat.com>
Mon, 22 Aug 2022 17:09:58 +0000 (13:09 -0400)
committerPatrick Donnelly <pdonnell@redhat.com>
Mon, 22 Aug 2022 17:10:27 +0000 (13:10 -0400)
This reverts commit 1c4da3dbd20d9683fe681cd7083478809cbb19b9, reversing
changes made to ee1e163b1e69c4db558cdf6b857c7c82cd4820d8.

Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
35 files changed:
PendingReleaseNotes
doc/cephadm/upgrade.rst
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/% [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/.qa [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/bluestore-bitmap.yaml [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/centos_8.stream_container_tools.yaml [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/conf [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/% [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/.qa [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_health.yaml [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_wrongly_marked_down.yaml [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/pg-warn.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/syntax.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/roles.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/% [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/.qa [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/.qa [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/pacific.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/v16.2.4.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/% [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/.qa [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/0-create.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/.qa [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/1.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/.qa [deleted symlink]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/no.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/no.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/yes.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/4-verify.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/2-client.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/3-upgrade-with-workload.yaml [deleted file]
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/4-verify.yaml [deleted file]
qa/tasks/fs.py
src/pybind/mgr/cephadm/upgrade.py
src/pybind/mgr/orchestrator/module.py

index ddc6be10d149f89d3e56d5c43f22ec33f39f3e1c..7e8da3f8ba1658981bb7319f1108c67b76408332 100644 (file)
@@ -99,3 +99,4 @@ of the feature, refer this link on how to perform it:
 https://docs.ceph.com/en/quincy/cephadm/upgrade/#staggered-upgrade
 Relevant tracker: https://tracker.ceph.com/issues/55715
 
+  Relevant tracker: https://tracker.ceph.com/issues/5614
index 8e62af61e440be1934107c95e654ee0f83036c9d..221f212449f792df0ffbceec5b94831910f37212 100644 (file)
@@ -48,31 +48,6 @@ The automated upgrade process follows Ceph best practices.  For example:
 Starting the upgrade
 ====================
 
-.. note::
-   .. note::
-      `Staggered Upgrade`_ of the mons/mgrs may be necessary to have access
-      to this new feature.
-
-   Cephadm by default reduces `max_mds` to `1`. This can be disruptive for large
-   scale CephFS deployments because the cluster cannot quickly reduce active MDS(s)
-   to `1` and a single active MDS cannot easily handle the load of all clients
-   even for a short time. Therefore, to upgrade MDS(s) without reducing `max_mds`,
-   the `fail_fs` option can to be set to `true` (default value is `false`) prior
-   to initiating the upgrade:
-
-   .. prompt:: bash #
-
-      ceph config set mgr mgr/orchestrator/fail_fs true
-
-   This would:
-               #. Fail CephFS filesystems, bringing active MDS daemon(s) to
-                  `up:standby` state.
-
-               #. Upgrade MDS daemons safely.
-
-               #. Bring CephFS filesystems back up, bringing the state of active
-                  MDS daemon(s) from `up:standby` to `up:active`.
-
 Before you use cephadm to upgrade Ceph, verify that all hosts are currently online and that your cluster is healthy by running the following command:
 
 .. prompt:: bash #
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/% b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/%
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/.qa
deleted file mode 120000 (symlink)
index a602a03..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/bluestore-bitmap.yaml
deleted file mode 120000 (symlink)
index fb603bc..0000000
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/centos_8.stream_container_tools.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/centos_8.stream_container_tools.yaml
deleted file mode 120000 (symlink)
index 7a86f96..0000000
+++ /dev/null
@@ -1 +0,0 @@
-.qa/distros/podman/centos_8.stream_container_tools.yaml
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/conf b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/conf
deleted file mode 120000 (symlink)
index 6d47129..0000000
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/conf/
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/% b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/%
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/.qa
deleted file mode 120000 (symlink)
index a602a03..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_health.yaml
deleted file mode 120000 (symlink)
index 5cb891a..0000000
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_wrongly_marked_down.yaml
deleted file mode 120000 (symlink)
index f317cb7..0000000
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/pg-warn.yaml
deleted file mode 100644 (file)
index 4ae54a4..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-overrides:
-  ceph:
-    conf:
-      global:
-        mon pg warn min per osd: 0
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/syntax.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/syntax.yaml
deleted file mode 100644 (file)
index 84d5d43..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-overrides:
-  kclient:
-      syntax: 'v1'
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/roles.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/roles.yaml
deleted file mode 100644 (file)
index bce4ecd..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-roles:
-- - host.a
-  - client.0
-  - osd.0
-  - osd.1
-  - osd.2
-- - host.b
-  - client.1
-  - osd.3
-  - osd.4
-  - osd.5
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/% b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/%
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/.qa
deleted file mode 120000 (symlink)
index a602a03..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/.qa
deleted file mode 120000 (symlink)
index a602a03..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/pacific.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/pacific.yaml
deleted file mode 100644 (file)
index 67c27ba..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-meta:
-- desc: |
-   setup ceph/pacific
-
-tasks:
-- install:
-    branch: pacific
-    exclude_packages:
-      - ceph-volume
-- print: "**** done install task..."
-- cephadm:
-    image: quay.io/ceph/daemon-base:latest-pacific
-    roleless: true
-    cephadm_branch: pacific
-    cephadm_git_url: https://github.com/ceph/ceph
-    conf:
-      osd:
-        #set config option for which cls modules are allowed to be loaded / used
-        osd_class_load_list: "*"
-        osd_class_default_list: "*"
-- print: "**** done end installing pacific cephadm ..."
-- cephadm.shell:
-    host.a:
-      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
-- print: "**** done cephadm.shell ceph config set mgr..."
-- cephadm.shell:
-    host.a:
-      - ceph orch status
-      - ceph orch ps
-      - ceph orch ls
-      - ceph orch host ls
-      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/v16.2.4.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/v16.2.4.yaml
deleted file mode 100644 (file)
index c732d69..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-meta:
-- desc: |
-   setup ceph/pacific v16.2.4
-
-tasks:
-# Disable metrics sending by kclient as it may crash (assert) a v16.2.4 MDS
-- pexec:
-    clients:
-      - sudo modprobe -r ceph
-      - sudo modprobe ceph disable_send_metrics=on
-- install:
-    tag: v16.2.4
-    exclude_packages:
-      - ceph-volume
-- print: "**** done install task..."
-- cephadm:
-    roleless: true
-    image: quay.io/ceph/ceph:v16.2.4
-    cephadm_branch: v16.2.4
-    cephadm_git_url: https://github.com/ceph/ceph
-    # needed for v16.2.4 due to --skip-admin-label
-    avoid_pacific_features: true
-- print: "**** done starting v16.2.4"
-- cephadm.shell:
-    host.a:
-      - ceph orch status
-      - ceph orch ps
-      - ceph orch ls
-      - ceph orch host ls
-      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/% b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/%
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/.qa
deleted file mode 120000 (symlink)
index a602a03..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/0-create.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/0-create.yaml
deleted file mode 100644 (file)
index 5ee0022..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs volume create cephfs --placement=4
-      - ceph fs dump
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/.qa
deleted file mode 120000 (symlink)
index a602a03..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/1.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/1.yaml
deleted file mode 100644 (file)
index fcd3b1e..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs set cephfs max_mds 2
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/.qa
deleted file mode 120000 (symlink)
index a602a03..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/no.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/no.yaml
deleted file mode 100644 (file)
index 3dbc810..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs set cephfs allow_standby_replay false
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/no.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/no.yaml
deleted file mode 100644 (file)
index 107f30e..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs set cephfs inline_data false
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/yes.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/yes.yaml
deleted file mode 100644 (file)
index 246ed71..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs set cephfs inline_data true --yes-i-really-really-mean-it
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/4-verify.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/4-verify.yaml
deleted file mode 100644 (file)
index e71365a..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs dump
-      - ceph --format=json fs dump | jq -e ".filesystems | length == 1"
-      - while ! ceph --format=json mds versions | jq -e ". | add == 4"; do sleep 1; done
-- fs.pre_upgrade_save:
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/2-client.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/2-client.yaml
deleted file mode 100644 (file)
index 92b9dda..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-tasks:
-- kclient:
-- print: "**** done client"
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/3-upgrade-with-workload.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/3-upgrade-with-workload.yaml
deleted file mode 100644 (file)
index 876cffd..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-tasks:
-- parallel:
-  - upgrade-tasks
-  - workload-tasks
-
-upgrade-tasks:
-  sequential:
-  - cephadm.shell:
-      env: [sha1]
-      host.a:
-        - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
-        - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
-        - ceph config set global log_to_journald false --force
-        - ceph orch ps
-        - ceph versions
-        - ceph -s
-        - ceph orch ls
-        - ceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1
-        - ceph orch ps --refresh
-        - sleep 300
-        - ceph orch ps
-        - ceph versions
-        - ceph -s
-        - ceph versions | jq -e '.mgr | length == 2'
-        - ceph mgr fail
-        - sleep 180
-        - ceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1
-        - ceph orch ps --refresh
-        - sleep 180
-        - ceph orch ps
-        - ceph versions
-        - ceph -s
-        - ceph mgr fail
-        - sleep 300
-        - ceph orch ps
-        - ceph versions
-        - ceph -s
-        - ceph versions | jq -e '.mgr | length == 1'
-        - ceph mgr fail
-        - sleep 180
-        - ceph orch ps
-        - ceph versions
-        - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mgr
-        - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
-        - ceph versions | jq -e '.mgr | length == 1'
-        - ceph versions | jq -e '.mgr | keys' | grep $sha1
-        - ceph versions | jq -e '.overall | length == 2'
-        - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 2'
-        - ceph orch ps --refresh
-        - sleep 180
-        - ceph config set mgr mgr/orchestrator/fail_fs true
-        - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
-  - cephadm.shell:
-      env: [sha1]
-      host.a:
-        - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph fs dump; ceph orch upgrade status ; sleep 30 ; done
-        - ceph orch ps
-        - ceph versions
-        - echo "wait for servicemap items w/ changing names to refresh"
-        - sleep 60
-        - ceph orch ps
-        - ceph health detail
-        - ceph orch upgrade status
-        - ceph versions
-        - ceph versions | jq -e '.overall | length == 1'
-        - ceph versions | jq -e '.overall | keys' | grep $sha1
-
-workload-tasks:
-  sequential:
-  - workunit:
-      clients:
-        all:
-          - suites/fsstress.sh
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/4-verify.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/4-verify.yaml
deleted file mode 100644 (file)
index c2b657e..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs dump
-- fs.post_upgrade_checks:
index 7e62c80318726538c615d02202498f828dde4652..f7a9330e29b50541e97bac6ff3aea3601ba2c9f8 100644 (file)
@@ -11,7 +11,6 @@ log = logging.getLogger(__name__)
 
 # Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
 CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5)
-CEPH_MDSMAP_NOT_JOINABLE = (1 << 0)
 CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
 UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1)
 def pre_upgrade_save(ctx, config):
@@ -60,35 +59,21 @@ def post_upgrade_checks(ctx, config):
         epoch = mdsmap['epoch']
         pre_upgrade_epoch = fs_state['epoch']
         assert pre_upgrade_epoch < epoch
-        multiple_max_mds = fs_state['max_mds'] > 1
+        should_decrease_max_mds = fs_state['max_mds'] > 1
         did_decrease_max_mds = False
         should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
         did_disable_allow_standby_replay = False
-        did_fail_fs = False
         for i in range(pre_upgrade_epoch+1, mdsmap['epoch']):
             old_status = mdsc.status(epoch=i)
             old_fs = old_status.get_fsmap(fscid)
             old_mdsmap = old_fs['mdsmap']
-            if not multiple_max_mds \
-                    and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE):
-                raise RuntimeError('mgr is failing fs when there is only one '
-                                   f'rank in epoch {i}.')
-            if multiple_max_mds \
-                    and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \
-                    and old_mdsmap['max_mds'] == 1:
-                raise RuntimeError('mgr is failing fs as well the max_mds '
-                                   f'is reduced in epoch {i}')
-            if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE:
-                log.debug(f"max_mds not reduced in epoch {i} as fs was failed "
-                          "for carrying out rapid multi-rank mds upgrade")
-                did_fail_fs = True
-            if multiple_max_mds and old_mdsmap['max_mds'] == 1:
+            if should_decrease_max_mds and old_mdsmap['max_mds'] == 1:
                 log.debug(f"max_mds reduced in epoch {i}")
                 did_decrease_max_mds = True
             if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY):
                 log.debug(f"allow_standby_replay disabled in epoch {i}")
                 did_disable_allow_standby_replay = True
-        assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds
+        assert not should_decrease_max_mds or did_decrease_max_mds
         assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay
 
 
index b7ad4a8b66eb3cc3711f318b3be692537c2c58e0..c2cc0aff9775a7429d21eeb4672800d9f8d11439 100644 (file)
@@ -2,7 +2,7 @@ import json
 import logging
 import time
 import uuid
-from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any, cast
+from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any
 
 import orchestrator
 from cephadm.registry import Registry
@@ -20,7 +20,6 @@ logger = logging.getLogger(__name__)
 
 # from ceph_fs.h
 CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1 << 5)
-CEPH_MDSMAP_NOT_JOINABLE = (1 << 0)
 
 
 def normalize_image_digest(digest: str, default_registry: str) -> str:
@@ -59,7 +58,6 @@ class UpgradeState:
                  target_version: Optional[str] = None,
                  error: Optional[str] = None,
                  paused: Optional[bool] = None,
-                 fail_fs: bool = False,
                  fs_original_max_mds: Optional[Dict[str, int]] = None,
                  fs_original_allow_standby_replay: Optional[Dict[str, bool]] = None,
                  daemon_types: Optional[List[str]] = None,
@@ -78,7 +76,6 @@ class UpgradeState:
         self.fs_original_max_mds: Optional[Dict[str, int]] = fs_original_max_mds
         self.fs_original_allow_standby_replay: Optional[Dict[str,
                                                              bool]] = fs_original_allow_standby_replay
-        self.fail_fs = fail_fs
         self.daemon_types = daemon_types
         self.hosts = hosts
         self.services = services
@@ -92,7 +89,6 @@ class UpgradeState:
             'target_id': self.target_id,
             'target_digests': self.target_digests,
             'target_version': self.target_version,
-            'fail_fs': self.fail_fs,
             'fs_original_max_mds': self.fs_original_max_mds,
             'fs_original_allow_standby_replay': self.fs_original_allow_standby_replay,
             'error': self.error,
@@ -303,8 +299,6 @@ class CephadmUpgrade:
 
     def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None,
                       hosts: Optional[List[str]] = None, services: Optional[List[str]] = None, limit: Optional[int] = None) -> str:
-        fail_fs_value = cast(bool, self.mgr.get_module_option_ex(
-            'orchestrator', 'fail_fs', False))
         if self.mgr.mode != 'root':
             raise OrchestratorError('upgrade is not supported in %s mode' % (
                 self.mgr.mode))
@@ -342,7 +336,6 @@ class CephadmUpgrade:
         self.upgrade_state = UpgradeState(
             target_name=target_name,
             progress_id=str(uuid.uuid4()),
-            fail_fs=fail_fs_value,
             daemon_types=daemon_types,
             hosts=hosts,
             services=services,
@@ -619,43 +612,27 @@ class CephadmUpgrade:
 
             # scale down this filesystem?
             if mdsmap["max_mds"] > 1:
-                if self.upgrade_state.fail_fs:
-                    if not (mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) and \
-                            len(mdsmap['up']) > 0:
-                        self.mgr.log.info(f'Upgrade: failing fs {fs_name} for '
-                                          f'rapid multi-rank mds upgrade')
-                        ret, out, err = self.mgr.check_mon_command({
-                            'prefix': 'fs fail',
-                            'fs_name': fs_name
-                        })
-                        if ret != 0:
-                            continue_upgrade = False
-                    continue
-                else:
-                    self.mgr.log.info('Upgrade: Scaling down filesystem %s' % (
-                        fs_name
-                    ))
-                    if fscid not in self.upgrade_state.fs_original_max_mds:
-                        self.upgrade_state.fs_original_max_mds[fscid] = \
-                            mdsmap['max_mds']
-                        self._save_upgrade_state()
-                    ret, out, err = self.mgr.check_mon_command({
-                        'prefix': 'fs set',
-                        'fs_name': fs_name,
-                        'var': 'max_mds',
-                        'val': '1',
-                    })
-                    continue_upgrade = False
-                    continue
+                self.mgr.log.info('Upgrade: Scaling down filesystem %s' % (
+                    fs_name
+                ))
+                if fscid not in self.upgrade_state.fs_original_max_mds:
+                    self.upgrade_state.fs_original_max_mds[fscid] = mdsmap['max_mds']
+                    self._save_upgrade_state()
+                ret, out, err = self.mgr.check_mon_command({
+                    'prefix': 'fs set',
+                    'fs_name': fs_name,
+                    'var': 'max_mds',
+                    'val': '1',
+                })
+                continue_upgrade = False
+                continue
 
-            if not self.upgrade_state.fail_fs:
-                if not (mdsmap['in'] == [0] and len(mdsmap['up']) <= 1):
-                    self.mgr.log.info(
-                        'Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (
-                            fs_name))
-                    time.sleep(10)
-                    continue_upgrade = False
-                    continue
+            if not (mdsmap['in'] == [0] and len(mdsmap['up']) <= 1):
+                self.mgr.log.info(
+                    'Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (fs_name))
+                time.sleep(10)
+                continue_upgrade = False
+                continue
 
             if len(mdsmap['up']) == 0:
                 self.mgr.log.warning(
@@ -799,15 +776,7 @@ class CephadmUpgrade:
                     return False, to_upgrade
 
             if d.daemon_type == 'mds' and self._enough_mds_for_ok_to_stop(d):
-                # when fail_fs is set to true, all MDS daemons will be moved to
-                # up:standby state, so Cephadm won't be able to upgrade due to
-                # this check and and will warn with "It is NOT safe to stop
-                # mds.<daemon_name> at this time: one or more filesystems is
-                # currently degraded", therefore we bypass this check for that
-                # case.
-                assert self.upgrade_state is not None
-                if not self.upgrade_state.fail_fs \
-                        and not self._wait_for_ok_to_stop(d, known_ok_to_stop):
+                if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
                     return False, to_upgrade
 
             to_upgrade.append(d_entry)
@@ -953,25 +922,7 @@ class CephadmUpgrade:
 
     def _complete_mds_upgrade(self) -> None:
         assert self.upgrade_state is not None
-        if self.upgrade_state.fail_fs:
-            for fs in self.mgr.get("fs_map")['filesystems']:
-                fs_name = fs['mdsmap']['fs_name']
-                self.mgr.log.info('Upgrade: Setting filesystem '
-                                  f'{fs_name} Joinable')
-                try:
-                    ret, _, err = self.mgr.check_mon_command({
-                        'prefix': 'fs set',
-                        'fs_name': fs_name,
-                        'var': 'joinable',
-                        'val': 'true',
-                    })
-                except Exception as e:
-                    logger.error("Failed to set fs joinable "
-                                 f"true due to {e}")
-                    raise OrchestratorError("Failed to set"
-                                            "fs joinable true"
-                                            f"due to {e}")
-        elif self.upgrade_state.fs_original_max_mds:
+        if self.upgrade_state.fs_original_max_mds:
             for fs in self.mgr.get("fs_map")['filesystems']:
                 fscid = fs["id"]
                 fs_name = fs['mdsmap']['fs_name']
index 753ae6b7cfcf7646b280a6cd214534393c4e8db3..a11d87cb08c2b3fba6a6bdb8c51ab796c70777f4 100644 (file)
@@ -213,13 +213,7 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
             desc='Orchestrator backend',
             enum_allowed=['cephadm', 'rook', 'test_orchestrator'],
             runtime=True,
-        ),
-        Option(
-            'fail_fs',
-            type='bool',
-            default=False,
-            desc='Fail filesystem for rapid multi-rank mds upgrade'
-        ),
+        )
     ]
     NATIVE_OPTIONS = []  # type: List[dict]
 
@@ -345,9 +339,6 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
     def _select_orchestrator(self) -> str:
         return cast(str, self.get_module_option("orchestrator"))
 
-    def _get_fail_fs_value(self) -> bool:
-        return bool(self.get_module_option("fail_fs"))
-
     @_cli_write_command('orch host add')
     def _add_host(self,
                   hostname: str,
@@ -1493,12 +1484,6 @@ Usage:
         self._set_backend('')
         assert self._select_orchestrator() is None
         self._set_backend(old_orch)
-        old_fs_fail_value = self._get_fail_fs_value()
-        self.set_module_option("fail_fs", True)
-        assert self._get_fail_fs_value() is True
-        self.set_module_option("fail_fs", False)
-        assert self._get_fail_fs_value() is False
-        self.set_module_option("fail_fs", old_fs_fail_value)
 
         e1 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "ZeroDivisionError")
         try: