From: Adam King Date: Mon, 3 Jul 2023 18:33:34 +0000 (-0400) Subject: mgr/cephadm: fix rgw spec migration with simple specs X-Git-Tag: v19.0.0~925^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=1860ef83877c76c51a20aac48036bd9590572cc2;p=ceph.git mgr/cephadm: fix rgw spec migration with simple specs The rgw spec migration code, intended to formalize the rgw_frontend_type spec option, doesn't work with simple specs i.e. service_type: rgw service_id: rgw.1 service_name: rgw.rgw.1 placement: label: rgw because the migration code assumes there will always be a "spec" section inside the spec. This is the case for more involved rgw specs such as service_type: rgw service_id: foo placement: label: rgw count_per_host: 2 spec: rgw_realm: myrealm rgw_zone: myzone rgw_frontend_type: "beast" rgw_frontend_port: 5000 which is what the migration is actually concerned about (verification of the rgw_frontend_type in these specs). In the case where the spec is more simple, we should just leave the spec alone and move on. Unfortunately the current code assumes the field will always be there and hits an unhandled KeyError when trying to migrate the more simple specs. This causes the cephadm module to crash shortly after starting an upgrade to a version that includes this migration and it's very difficult to find the root cause. This can be worked around by adding fields to the rgw spec before upgrade so the "spec" field exists in the spec and the migration works as intended. This commit fixes the migration in the simple case as well as adding testing for that case to both the unit tests and orch/cephadm teuthology upgrade tests Fixes: https://tracker.ceph.com/issues/61889 Signed-off-by: Adam King --- diff --git a/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml b/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml index 70fe3f444f991..f10a49beafe7c 100644 --- a/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml +++ b/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml @@ -8,6 +8,8 @@ tasks: - radosgw-admin zone create --rgw-zonegroup=default --rgw-zone=z --master --default - radosgw-admin period update --rgw-realm=r --commit - ceph orch apply rgw foo --realm r --zone z --placement=2 --port=8000 + # simple rgw spec (will have no "spec" field) to make sure that works with rgw spec migration + - ceph orch apply rgw smpl # setup iscsi - ceph osd pool create foo - rbd pool init foo diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py index 2e8587abfc613..52a8605bc1d14 100644 --- a/src/pybind/mgr/cephadm/migrations.py +++ b/src/pybind/mgr/cephadm/migrations.py @@ -370,6 +370,11 @@ class Migrations: return RGWSpec.from_json(new_spec) def rgw_spec_needs_migration(self, spec: Dict[Any, Any]) -> bool: + if 'spec' not in spec: + # if users allowed cephadm to set up most of the + # attributes, it's possible there is no "spec" section + # inside the spec. In that case, no migration is needed + return False return 'rgw_frontend_type' in spec['spec'] \ and spec['spec']['rgw_frontend_type'] is not None \ and spec['spec']['rgw_frontend_type'].strip() not in ['beast', 'civetweb'] diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py index ed4c8fc42fa95..1f1d32e8b40ce 100644 --- a/src/pybind/mgr/cephadm/tests/test_migration.py +++ b/src/pybind/mgr/cephadm/tests/test_migration.py @@ -1,4 +1,5 @@ import json +import pytest from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec from ceph.utils import datetime_to_str, datetime_now @@ -259,26 +260,43 @@ def test_migrate_set_sane_value(cephadm_module: CephadmOrchestrator): assert cephadm_module.migration_current == 0 +@pytest.mark.parametrize( + "rgw_spec_store_entry, should_migrate", + [ + ({ + 'spec': { + 'service_type': 'rgw', + 'service_name': 'rgw.foo', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'rgw_frontend_type': 'beast tcp_nodelay=1 request_timeout_ms=65000 rgw_thread_pool_size=512', + 'rgw_frontend_port': '5000', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, True), + ({ + 'spec': { + 'service_type': 'rgw', + 'service_name': 'rgw.foo', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + }, + 'created': datetime_to_str(datetime_now()), + }, False), + ] +) @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) -def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator): +def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator, rgw_spec_store_entry, should_migrate): with with_host(cephadm_module, 'host1'): cephadm_module.set_store( SPEC_STORE_PREFIX + 'rgw', - json.dumps({ - 'spec': { - 'service_type': 'rgw', - 'service_name': 'rgw.foo', - 'service_id': 'foo', - 'placement': { - 'hosts': ['host1'] - }, - 'spec': { - 'rgw_frontend_type': 'beast tcp_nodelay=1 request_timeout_ms=65000 rgw_thread_pool_size=512', - 'rgw_frontend_port': '5000', - }, - }, - 'created': datetime_to_str(datetime_now()), - }, sort_keys=True), + json.dumps(rgw_spec_store_entry, sort_keys=True), ) # make sure rgw_migration_queue is populated accordingly @@ -296,19 +314,27 @@ def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator): cephadm_module.migration.migrate() assert cephadm_module.migration_current == LAST_MIGRATION - # make sure the spec has been migrated and the the param=value entries - # that were part of the rgw_frontend_type are now in the new - # 'rgw_frontend_extra_args' list - assert 'rgw.foo' in cephadm_module.spec_store.all_specs - rgw_spec = cephadm_module.spec_store.all_specs['rgw.foo'] - assert dict(rgw_spec.to_json()) == {'service_type': 'rgw', - 'service_id': 'foo', - 'service_name': 'rgw.foo', - 'placement': {'hosts': ['host1']}, - 'spec': { - 'rgw_frontend_extra_args': ['tcp_nodelay=1', - 'request_timeout_ms=65000', - 'rgw_thread_pool_size=512'], - 'rgw_frontend_port': '5000', - 'rgw_frontend_type': 'beast', - }} + if should_migrate: + # make sure the spec has been migrated and the the param=value entries + # that were part of the rgw_frontend_type are now in the new + # 'rgw_frontend_extra_args' list + assert 'rgw.foo' in cephadm_module.spec_store.all_specs + rgw_spec = cephadm_module.spec_store.all_specs['rgw.foo'] + assert dict(rgw_spec.to_json()) == {'service_type': 'rgw', + 'service_id': 'foo', + 'service_name': 'rgw.foo', + 'placement': {'hosts': ['host1']}, + 'spec': { + 'rgw_frontend_extra_args': ['tcp_nodelay=1', + 'request_timeout_ms=65000', + 'rgw_thread_pool_size=512'], + 'rgw_frontend_port': '5000', + 'rgw_frontend_type': 'beast', + }} + else: + # in a real environment, we still expect the spec to be there, + # just untouched by the migration. For this test specifically + # though, the spec will only have ended up in the spec store + # if it was migrated, so we can use this to test the spec + # was untouched + assert 'rgw.foo' not in cephadm_module.spec_store.all_specs