Revert "Merge pull request #47092 from dparmar18/wip-dparmar-cephadm-simple-1"

author Patrick Donnelly <pdonnell@redhat.com>

Mon, 22 Aug 2022 17:09:58 +0000 (13:09 -0400)

committer Patrick Donnelly <pdonnell@redhat.com>

Mon, 22 Aug 2022 17:10:27 +0000 (13:10 -0400)
author Patrick Donnelly <pdonnell@redhat.com>
Mon, 22 Aug 2022 17:09:58 +0000 (13:09 -0400)
committer Patrick Donnelly <pdonnell@redhat.com>
Mon, 22 Aug 2022 17:10:27 +0000 (13:10 -0400)
diff --git a/PendingReleaseNotes b/PendingReleaseNotes

index ddc6be10d149f89d3e56d5c43f22ec33f39f3e1c..7e8da3f8ba1658981bb7319f1108c67b76408332 100644 (file)
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -99,3 +99,4 @@ of the feature, refer this link on how to perform it:
  https://docs.ceph.com/en/quincy/cephadm/upgrade/#staggered-upgrade
  Relevant tracker: https://tracker.ceph.com/issues/55715
  
+  Relevant tracker: https://tracker.ceph.com/issues/5614
diff --git a/doc/cephadm/upgrade.rst b/doc/cephadm/upgrade.rst

index 8e62af61e440be1934107c95e654ee0f83036c9d..221f212449f792df0ffbceec5b94831910f37212 100644 (file)
--- a/doc/cephadm/upgrade.rst
+++ b/doc/cephadm/upgrade.rst
@@ -48,31 +48,6 @@ The automated upgrade process follows Ceph best practices.  For example:
  Starting the upgrade
  ====================
  
-.. note::
-   .. note::
-      `Staggered Upgrade`_ of the mons/mgrs may be necessary to have access
-      to this new feature.
-
-   Cephadm by default reduces `max_mds` to `1`. This can be disruptive for large
-   scale CephFS deployments because the cluster cannot quickly reduce active MDS(s)
-   to `1` and a single active MDS cannot easily handle the load of all clients
-   even for a short time. Therefore, to upgrade MDS(s) without reducing `max_mds`,
-   the `fail_fs` option can to be set to `true` (default value is `false`) prior
-   to initiating the upgrade:
-
-   .. prompt:: bash #
-
-      ceph config set mgr mgr/orchestrator/fail_fs true
-
-   This would:
-               #. Fail CephFS filesystems, bringing active MDS daemon(s) to
-                  `up:standby` state.
-
-               #. Upgrade MDS daemons safely.
-
-               #. Bring CephFS filesystems back up, bringing the state of active
-                  MDS daemon(s) from `up:standby` to `up:active`.
-
  Before you use cephadm to upgrade Ceph, verify that all hosts are currently online and that your cluster is healthy by running the following command:
  
  .. prompt:: bash #
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/% b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/%

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/.qa

deleted file mode 120000 (symlink)

index a602a03..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/bluestore-bitmap.yaml

deleted file mode 120000 (symlink)

index fb603bc..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/bluestore-bitmap.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/centos_8.stream_container_tools.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/centos_8.stream_container_tools.yaml

deleted file mode 120000 (symlink)

index 7a86f96..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/centos_8.stream_container_tools.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/distros/podman/centos_8.stream_container_tools.yaml
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/conf b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/conf

deleted file mode 120000 (symlink)

index 6d47129..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/conf
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/conf/
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/% b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/%

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/.qa

deleted file mode 120000 (symlink)

index a602a03..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_health.yaml

deleted file mode 120000 (symlink)

index 5cb891a..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_health.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/overrides/ignorelist_health.yaml
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_wrongly_marked_down.yaml

deleted file mode 120000 (symlink)

index f317cb7..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_wrongly_marked_down.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/pg-warn.yaml

deleted file mode 100644 (file)

index 4ae54a4..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/pg-warn.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-overrides:
-  ceph:
-    conf:
-      global:
-        mon pg warn min per osd: 0
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/syntax.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/syntax.yaml

deleted file mode 100644 (file)

index 84d5d43..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/syntax.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-overrides:
-  kclient:
-      syntax: 'v1'
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/roles.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/roles.yaml

deleted file mode 100644 (file)

index bce4ecd..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/roles.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-roles:
-- - host.a
-  - client.0
-  - osd.0
-  - osd.1
-  - osd.2
-- - host.b
-  - client.1
-  - osd.3
-  - osd.4
-  - osd.5
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/% b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/%

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/.qa

deleted file mode 120000 (symlink)

index a602a03..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/.qa

deleted file mode 120000 (symlink)

index a602a03..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/pacific.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/pacific.yaml

deleted file mode 100644 (file)

index 67c27ba..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/pacific.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-meta:
-- desc: |
-   setup ceph/pacific
-
-tasks:
-- install:
-    branch: pacific
-    exclude_packages:
-      - ceph-volume
-- print: "**** done install task..."
-- cephadm:
-    image: quay.io/ceph/daemon-base:latest-pacific
-    roleless: true
-    cephadm_branch: pacific
-    cephadm_git_url: https://github.com/ceph/ceph
-    conf:
-      osd:
-        #set config option for which cls modules are allowed to be loaded / used
-        osd_class_load_list: "*"
-        osd_class_default_list: "*"
-- print: "**** done end installing pacific cephadm ..."
-- cephadm.shell:
-    host.a:
-      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
-- print: "**** done cephadm.shell ceph config set mgr..."
-- cephadm.shell:
-    host.a:
-      - ceph orch status
-      - ceph orch ps
-      - ceph orch ls
-      - ceph orch host ls
-      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/v16.2.4.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/v16.2.4.yaml

deleted file mode 100644 (file)

index c732d69..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/v16.2.4.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-meta:
-- desc: |
-   setup ceph/pacific v16.2.4
-
-tasks:
-# Disable metrics sending by kclient as it may crash (assert) a v16.2.4 MDS
-- pexec:
-    clients:
-      - sudo modprobe -r ceph
-      - sudo modprobe ceph disable_send_metrics=on
-- install:
-    tag: v16.2.4
-    exclude_packages:
-      - ceph-volume
-- print: "**** done install task..."
-- cephadm:
-    roleless: true
-    image: quay.io/ceph/ceph:v16.2.4
-    cephadm_branch: v16.2.4
-    cephadm_git_url: https://github.com/ceph/ceph
-    # needed for v16.2.4 due to --skip-admin-label
-    avoid_pacific_features: true
-- print: "**** done starting v16.2.4"
-- cephadm.shell:
-    host.a:
-      - ceph orch status
-      - ceph orch ps
-      - ceph orch ls
-      - ceph orch host ls
-      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/% b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/%

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/.qa

deleted file mode 120000 (symlink)

index a602a03..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/0-create.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/0-create.yaml

deleted file mode 100644 (file)

index 5ee0022..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/0-create.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs volume create cephfs --placement=4
-      - ceph fs dump
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/.qa

deleted file mode 120000 (symlink)

index a602a03..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/1.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/1.yaml

deleted file mode 100644 (file)

index fcd3b1e..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/1.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs set cephfs max_mds 2
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/.qa b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/.qa

deleted file mode 120000 (symlink)

index a602a03..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/.qa
+++ /dev/null
@@ -1 +0,0 @@
-../.qa/
-\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/no.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/no.yaml

deleted file mode 100644 (file)

index 3dbc810..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/no.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs set cephfs allow_standby_replay false
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/no.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/no.yaml

deleted file mode 100644 (file)

index 107f30e..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/no.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs set cephfs inline_data false
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/yes.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/yes.yaml

deleted file mode 100644 (file)

index 246ed71..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/yes.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs set cephfs inline_data true --yes-i-really-really-mean-it
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/4-verify.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/4-verify.yaml

deleted file mode 100644 (file)

index e71365a..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/4-verify.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs dump
-      - ceph --format=json fs dump | jq -e ".filesystems | length == 1"
-      - while ! ceph --format=json mds versions | jq -e ". | add == 4"; do sleep 1; done
-- fs.pre_upgrade_save:
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/2-client.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/2-client.yaml

deleted file mode 100644 (file)

index 92b9dda..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/2-client.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-tasks:
-- kclient:
-- print: "**** done client"
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/3-upgrade-with-workload.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/3-upgrade-with-workload.yaml

deleted file mode 100644 (file)

index 876cffd..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/3-upgrade-with-workload.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-tasks:
-- parallel:
-  - upgrade-tasks
-  - workload-tasks
-
-upgrade-tasks:
-  sequential:
-  - cephadm.shell:
-      env: [sha1]
-      host.a:
-        - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
-        - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
-        - ceph config set global log_to_journald false --force
-        - ceph orch ps
-        - ceph versions
-        - ceph -s
-        - ceph orch ls
-        - ceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1
-        - ceph orch ps --refresh
-        - sleep 300
-        - ceph orch ps
-        - ceph versions
-        - ceph -s
-        - ceph versions | jq -e '.mgr | length == 2'
-        - ceph mgr fail
-        - sleep 180
-        - ceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1
-        - ceph orch ps --refresh
-        - sleep 180
-        - ceph orch ps
-        - ceph versions
-        - ceph -s
-        - ceph mgr fail
-        - sleep 300
-        - ceph orch ps
-        - ceph versions
-        - ceph -s
-        - ceph versions | jq -e '.mgr | length == 1'
-        - ceph mgr fail
-        - sleep 180
-        - ceph orch ps
-        - ceph versions
-        - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mgr
-        - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
-        - ceph versions | jq -e '.mgr | length == 1'
-        - ceph versions | jq -e '.mgr | keys' | grep $sha1
-        - ceph versions | jq -e '.overall | length == 2'
-        - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 2'
-        - ceph orch ps --refresh
-        - sleep 180
-        - ceph config set mgr mgr/orchestrator/fail_fs true
-        - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
-  - cephadm.shell:
-      env: [sha1]
-      host.a:
-        - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph fs dump; ceph orch upgrade status ; sleep 30 ; done
-        - ceph orch ps
-        - ceph versions
-        - echo "wait for servicemap items w/ changing names to refresh"
-        - sleep 60
-        - ceph orch ps
-        - ceph health detail
-        - ceph orch upgrade status
-        - ceph versions
-        - ceph versions | jq -e '.overall | length == 1'
-        - ceph versions | jq -e '.overall | keys' | grep $sha1
-
-workload-tasks:
-  sequential:
-  - workunit:
-      clients:
-        all:
-          - suites/fsstress.sh
diff --git a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/4-verify.yaml b/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/4-verify.yaml

deleted file mode 100644 (file)

index c2b657e..0000000
--- a/qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/4-verify.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-tasks:
-- cephadm.shell:
-    host.a:
-      - ceph fs dump
-- fs.post_upgrade_checks:
diff --git a/qa/tasks/fs.py b/qa/tasks/fs.py

index 7e62c80318726538c615d02202498f828dde4652..f7a9330e29b50541e97bac6ff3aea3601ba2c9f8 100644 (file)
--- a/qa/tasks/fs.py
+++ b/qa/tasks/fs.py
@@ -11,7 +11,6 @@ log = logging.getLogger(__name__)
  
  # Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
  CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5)
-CEPH_MDSMAP_NOT_JOINABLE = (1 << 0)
  CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
  UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1)
  def pre_upgrade_save(ctx, config):
@@ -60,35 +59,21 @@ def post_upgrade_checks(ctx, config):
          epoch = mdsmap['epoch']
          pre_upgrade_epoch = fs_state['epoch']
          assert pre_upgrade_epoch < epoch
-        multiple_max_mds = fs_state['max_mds'] > 1
+        should_decrease_max_mds = fs_state['max_mds'] > 1
          did_decrease_max_mds = False
          should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
          did_disable_allow_standby_replay = False
-        did_fail_fs = False
          for i in range(pre_upgrade_epoch+1, mdsmap['epoch']):
              old_status = mdsc.status(epoch=i)
              old_fs = old_status.get_fsmap(fscid)
              old_mdsmap = old_fs['mdsmap']
-            if not multiple_max_mds \
-                    and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE):
-                raise RuntimeError('mgr is failing fs when there is only one '
-                                   f'rank in epoch {i}.')
-            if multiple_max_mds \
-                    and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \
-                    and old_mdsmap['max_mds'] == 1:
-                raise RuntimeError('mgr is failing fs as well the max_mds '
-                                   f'is reduced in epoch {i}')
-            if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE:
-                log.debug(f"max_mds not reduced in epoch {i} as fs was failed "
-                          "for carrying out rapid multi-rank mds upgrade")
-                did_fail_fs = True
-            if multiple_max_mds and old_mdsmap['max_mds'] == 1:
+            if should_decrease_max_mds and old_mdsmap['max_mds'] == 1:
                  log.debug(f"max_mds reduced in epoch {i}")
                  did_decrease_max_mds = True
              if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY):
                  log.debug(f"allow_standby_replay disabled in epoch {i}")
                  did_disable_allow_standby_replay = True
-        assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds
+        assert not should_decrease_max_mds or did_decrease_max_mds
          assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay
  
  
diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py

index b7ad4a8b66eb3cc3711f318b3be692537c2c58e0..c2cc0aff9775a7429d21eeb4672800d9f8d11439 100644 (file)
--- a/src/pybind/mgr/cephadm/upgrade.py
+++ b/src/pybind/mgr/cephadm/upgrade.py
@@ -2,7 +2,7 @@ import json
  import logging
  import time
  import uuid
-from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any, cast
+from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any
  
  import orchestrator
  from cephadm.registry import Registry
@@ -20,7 +20,6 @@ logger = logging.getLogger(__name__)
  
  # from ceph_fs.h
  CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1 << 5)
-CEPH_MDSMAP_NOT_JOINABLE = (1 << 0)
  
  
  def normalize_image_digest(digest: str, default_registry: str) -> str:
@@ -59,7 +58,6 @@ class UpgradeState:
                   target_version: Optional[str] = None,
                   error: Optional[str] = None,
                   paused: Optional[bool] = None,
-                 fail_fs: bool = False,
                   fs_original_max_mds: Optional[Dict[str, int]] = None,
                   fs_original_allow_standby_replay: Optional[Dict[str, bool]] = None,
                   daemon_types: Optional[List[str]] = None,
@@ -78,7 +76,6 @@ class UpgradeState:
          self.fs_original_max_mds: Optional[Dict[str, int]] = fs_original_max_mds
          self.fs_original_allow_standby_replay: Optional[Dict[str,
                                                               bool]] = fs_original_allow_standby_replay
-        self.fail_fs = fail_fs
          self.daemon_types = daemon_types
          self.hosts = hosts
          self.services = services
@@ -92,7 +89,6 @@ class UpgradeState:
              'target_id': self.target_id,
              'target_digests': self.target_digests,
              'target_version': self.target_version,
-            'fail_fs': self.fail_fs,
              'fs_original_max_mds': self.fs_original_max_mds,
              'fs_original_allow_standby_replay': self.fs_original_allow_standby_replay,
              'error': self.error,
@@ -303,8 +299,6 @@ class CephadmUpgrade:
  
      def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None,
                        hosts: Optional[List[str]] = None, services: Optional[List[str]] = None, limit: Optional[int] = None) -> str:
-        fail_fs_value = cast(bool, self.mgr.get_module_option_ex(
-            'orchestrator', 'fail_fs', False))
          if self.mgr.mode != 'root':
              raise OrchestratorError('upgrade is not supported in %s mode' % (
                  self.mgr.mode))
@@ -342,7 +336,6 @@ class CephadmUpgrade:
          self.upgrade_state = UpgradeState(
              target_name=target_name,
              progress_id=str(uuid.uuid4()),
-            fail_fs=fail_fs_value,
              daemon_types=daemon_types,
              hosts=hosts,
              services=services,
@@ -619,43 +612,27 @@ class CephadmUpgrade:
  
              # scale down this filesystem?
              if mdsmap["max_mds"] > 1:
-                if self.upgrade_state.fail_fs:
-                    if not (mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) and \
-                            len(mdsmap['up']) > 0:
-                        self.mgr.log.info(f'Upgrade: failing fs {fs_name} for '
-                                          f'rapid multi-rank mds upgrade')
-                        ret, out, err = self.mgr.check_mon_command({
-                            'prefix': 'fs fail',
-                            'fs_name': fs_name
-                        })
-                        if ret != 0:
-                            continue_upgrade = False
-                    continue
-                else:
-                    self.mgr.log.info('Upgrade: Scaling down filesystem %s' % (
-                        fs_name
-                    ))
-                    if fscid not in self.upgrade_state.fs_original_max_mds:
-                        self.upgrade_state.fs_original_max_mds[fscid] = \
-                            mdsmap['max_mds']
-                        self._save_upgrade_state()
-                    ret, out, err = self.mgr.check_mon_command({
-                        'prefix': 'fs set',
-                        'fs_name': fs_name,
-                        'var': 'max_mds',
-                        'val': '1',
-                    })
-                    continue_upgrade = False
-                    continue
+                self.mgr.log.info('Upgrade: Scaling down filesystem %s' % (
+                    fs_name
+                ))
+                if fscid not in self.upgrade_state.fs_original_max_mds:
+                    self.upgrade_state.fs_original_max_mds[fscid] = mdsmap['max_mds']
+                    self._save_upgrade_state()
+                ret, out, err = self.mgr.check_mon_command({
+                    'prefix': 'fs set',
+                    'fs_name': fs_name,
+                    'var': 'max_mds',
+                    'val': '1',
+                })
+                continue_upgrade = False
+                continue
  
-            if not self.upgrade_state.fail_fs:
-                if not (mdsmap['in'] == [0] and len(mdsmap['up']) <= 1):
-                    self.mgr.log.info(
-                        'Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (
-                            fs_name))
-                    time.sleep(10)
-                    continue_upgrade = False
-                    continue
+            if not (mdsmap['in'] == [0] and len(mdsmap['up']) <= 1):
+                self.mgr.log.info(
+                    'Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (fs_name))
+                time.sleep(10)
+                continue_upgrade = False
+                continue
  
              if len(mdsmap['up']) == 0:
                  self.mgr.log.warning(
@@ -799,15 +776,7 @@ class CephadmUpgrade:
                      return False, to_upgrade
  
              if d.daemon_type == 'mds' and self._enough_mds_for_ok_to_stop(d):
-                # when fail_fs is set to true, all MDS daemons will be moved to
-                # up:standby state, so Cephadm won't be able to upgrade due to
-                # this check and and will warn with "It is NOT safe to stop
-                # mds.<daemon_name> at this time: one or more filesystems is
-                # currently degraded", therefore we bypass this check for that
-                # case.
-                assert self.upgrade_state is not None
-                if not self.upgrade_state.fail_fs \
-                        and not self._wait_for_ok_to_stop(d, known_ok_to_stop):
+                if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
                      return False, to_upgrade
  
              to_upgrade.append(d_entry)
@@ -953,25 +922,7 @@ class CephadmUpgrade:
  
      def _complete_mds_upgrade(self) -> None:
          assert self.upgrade_state is not None
-        if self.upgrade_state.fail_fs:
-            for fs in self.mgr.get("fs_map")['filesystems']:
-                fs_name = fs['mdsmap']['fs_name']
-                self.mgr.log.info('Upgrade: Setting filesystem '
-                                  f'{fs_name} Joinable')
-                try:
-                    ret, _, err = self.mgr.check_mon_command({
-                        'prefix': 'fs set',
-                        'fs_name': fs_name,
-                        'var': 'joinable',
-                        'val': 'true',
-                    })
-                except Exception as e:
-                    logger.error("Failed to set fs joinable "
-                                 f"true due to {e}")
-                    raise OrchestratorError("Failed to set"
-                                            "fs joinable true"
-                                            f"due to {e}")
-        elif self.upgrade_state.fs_original_max_mds:
+        if self.upgrade_state.fs_original_max_mds:
              for fs in self.mgr.get("fs_map")['filesystems']:
                  fscid = fs["id"]
                  fs_name = fs['mdsmap']['fs_name']
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py

index 753ae6b7cfcf7646b280a6cd214534393c4e8db3..a11d87cb08c2b3fba6a6bdb8c51ab796c70777f4 100644 (file)
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -213,13 +213,7 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
              desc='Orchestrator backend',
              enum_allowed=['cephadm', 'rook', 'test_orchestrator'],
              runtime=True,
-        ),
-        Option(
-            'fail_fs',
-            type='bool',
-            default=False,
-            desc='Fail filesystem for rapid multi-rank mds upgrade'
-        ),
+        )
      ]
      NATIVE_OPTIONS = []  # type: List[dict]
  
@@ -345,9 +339,6 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
      def _select_orchestrator(self) -> str:
          return cast(str, self.get_module_option("orchestrator"))
  
-    def _get_fail_fs_value(self) -> bool:
-        return bool(self.get_module_option("fail_fs"))
-
      @_cli_write_command('orch host add')
      def _add_host(self,
                    hostname: str,
@@ -1493,12 +1484,6 @@ Usage:
          self._set_backend('')
          assert self._select_orchestrator() is None
          self._set_backend(old_orch)
-        old_fs_fail_value = self._get_fail_fs_value()
-        self.set_module_option("fail_fs", True)
-        assert self._get_fail_fs_value() is True
-        self.set_module_option("fail_fs", False)
-        assert self._get_fail_fs_value() is False
-        self.set_module_option("fail_fs", old_fs_fail_value)
  
          e1 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "ZeroDivisionError")
          try:
author	Patrick Donnelly <pdonnell@redhat.com>
	Mon, 22 Aug 2022 17:09:58 +0000 (13:09 -0400)
committer	Patrick Donnelly <pdonnell@redhat.com>
	Mon, 22 Aug 2022 17:10:27 +0000 (13:10 -0400)
PendingReleaseNotes		patch \| blob \| history
doc/cephadm/upgrade.rst		patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/%	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/.qa	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/bluestore-bitmap.yaml	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/centos_8.stream_container_tools.yaml	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/conf	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/%	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/.qa	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_health.yaml	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/ignorelist_wrongly_marked_down.yaml	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/pg-warn.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/overrides/syntax.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/roles.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/%	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/.qa	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/.qa	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/pacific.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/0-from/v16.2.4.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/%	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/.qa	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/0-create.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/.qa	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/1-ranks/1.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/.qa	[deleted symlink]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/2-allow_standby_replay/no.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/no.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/3-inline/yes.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/1-volume/4-verify.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/2-client.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/3-upgrade-with-workload.yaml	[deleted file]	patch \| blob \| history
qa/suites/fs/upgrade/upgrade_without_reducing_max_mds/tasks/4-verify.yaml	[deleted file]	patch \| blob \| history
qa/tasks/fs.py		patch \| blob \| history
src/pybind/mgr/cephadm/upgrade.py		patch \| blob \| history
src/pybind/mgr/orchestrator/module.py		patch \| blob \| history