From a43c7b3a235e3b518b674217edcf1dc43a46c09b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 24 Apr 2018 12:08:07 -0500 Subject: [PATCH] qa/suites/rados/thrash-old-clients: do some thrashing with jewel and luminous clients Signed-off-by: Sage Weil --- qa/suites/rados/thrash-old-clients/% | 0 .../2-size-2-min-size.yaml | 1 + .../3-size-2-min-size.yaml | 1 + .../thrash-old-clients/1-install/jewel.yaml | 7 ++++ .../1-install/luminous.yaml | 6 ++++ .../thrash-old-clients/backoff/normal.yaml | 0 .../thrash-old-clients/backoff/peering.yaml | 5 +++ .../backoff/peering_and_degraded.yaml | 6 ++++ qa/suites/rados/thrash-old-clients/ceph.yaml | 2 ++ qa/suites/rados/thrash-old-clients/clusters/+ | 0 .../clusters/openstack.yaml | 4 +++ .../clusters/two-plus-three.yaml | 13 +++++++ .../d-balancer/crush-compat.yaml | 6 ++++ .../thrash-old-clients/d-balancer/off.yaml | 0 qa/suites/rados/thrash-old-clients/msgr | 1 + .../msgr-failures/fastclose.yaml | 6 ++++ .../thrash-old-clients/msgr-failures/few.yaml | 7 ++++ .../msgr-failures/osd-delay.yaml | 9 +++++ qa/suites/rados/thrash-old-clients/rados.yaml | 1 + .../rados/thrash-old-clients/rocksdb.yaml | 1 + .../thrash-old-clients/thrashers/default.yaml | 24 +++++++++++++ .../thrash-old-clients/thrashers/mapgap.yaml | 26 ++++++++++++++ .../thrashers/morepggrow.yaml | 22 ++++++++++++ .../thrash-old-clients/thrashers/none.yaml | 0 .../thrash-old-clients/thrashers/pggrow.yaml | 24 +++++++++++++ .../thrash-old-clients/thrashosds-health.yaml | 1 + .../workloads/cache-snaps.yaml | 34 +++++++++++++++++++ .../workloads/radosbench.yaml | 33 ++++++++++++++++++ .../workloads/snaps-few-objects.yaml | 13 +++++++ 29 files changed, 253 insertions(+) create mode 100644 qa/suites/rados/thrash-old-clients/% create mode 120000 qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml create mode 120000 qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml create mode 100644 qa/suites/rados/thrash-old-clients/1-install/jewel.yaml create mode 100644 qa/suites/rados/thrash-old-clients/1-install/luminous.yaml create mode 100644 qa/suites/rados/thrash-old-clients/backoff/normal.yaml create mode 100644 qa/suites/rados/thrash-old-clients/backoff/peering.yaml create mode 100644 qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml create mode 100644 qa/suites/rados/thrash-old-clients/ceph.yaml create mode 100644 qa/suites/rados/thrash-old-clients/clusters/+ create mode 100644 qa/suites/rados/thrash-old-clients/clusters/openstack.yaml create mode 100644 qa/suites/rados/thrash-old-clients/clusters/two-plus-three.yaml create mode 100644 qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml create mode 100644 qa/suites/rados/thrash-old-clients/d-balancer/off.yaml create mode 120000 qa/suites/rados/thrash-old-clients/msgr create mode 100644 qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml create mode 100644 qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml create mode 100644 qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml create mode 120000 qa/suites/rados/thrash-old-clients/rados.yaml create mode 120000 qa/suites/rados/thrash-old-clients/rocksdb.yaml create mode 100644 qa/suites/rados/thrash-old-clients/thrashers/default.yaml create mode 100644 qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml create mode 100644 qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml create mode 100644 qa/suites/rados/thrash-old-clients/thrashers/none.yaml create mode 100644 qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml create mode 120000 qa/suites/rados/thrash-old-clients/thrashosds-health.yaml create mode 100644 qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml create mode 100644 qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml create mode 100644 qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml diff --git a/qa/suites/rados/thrash-old-clients/% b/qa/suites/rados/thrash-old-clients/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml new file mode 120000 index 0000000000000..c429b07b99923 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml @@ -0,0 +1 @@ +../../../../overrides/2-size-2-min-size.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml new file mode 120000 index 0000000000000..8d529f0a9ad90 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml @@ -0,0 +1 @@ +../../../../overrides/3-size-2-min-size.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/1-install/jewel.yaml b/qa/suites/rados/thrash-old-clients/1-install/jewel.yaml new file mode 100644 index 0000000000000..b68ee3d30bc9d --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/jewel.yaml @@ -0,0 +1,7 @@ +tasks: +- install: + branch: jewel + exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev'] +- install.upgrade: + mon.a: + mon.b: diff --git a/qa/suites/rados/thrash-old-clients/1-install/luminous.yaml b/qa/suites/rados/thrash-old-clients/1-install/luminous.yaml new file mode 100644 index 0000000000000..1961c8914ec46 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/luminous.yaml @@ -0,0 +1,6 @@ +tasks: +- install: + branch: luminous +- install.upgrade: + mon.a: + mon.b: diff --git a/qa/suites/rados/thrash-old-clients/backoff/normal.yaml b/qa/suites/rados/thrash-old-clients/backoff/normal.yaml new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/rados/thrash-old-clients/backoff/peering.yaml b/qa/suites/rados/thrash-old-clients/backoff/peering.yaml new file mode 100644 index 0000000000000..66d06117ea224 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/backoff/peering.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true diff --git a/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml b/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml new file mode 100644 index 0000000000000..e6109906503bf --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true + osd backoff on degraded: true diff --git a/qa/suites/rados/thrash-old-clients/ceph.yaml b/qa/suites/rados/thrash-old-clients/ceph.yaml new file mode 100644 index 0000000000000..5df450dc9565b --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/ceph.yaml @@ -0,0 +1,2 @@ +tasks: +- ceph: diff --git a/qa/suites/rados/thrash-old-clients/clusters/+ b/qa/suites/rados/thrash-old-clients/clusters/+ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml b/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml new file mode 100644 index 0000000000000..b0f3b9b4da228 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rados/thrash-old-clients/clusters/two-plus-three.yaml b/qa/suites/rados/thrash-old-clients/clusters/two-plus-three.yaml new file mode 100644 index 0000000000000..fc86b982d2e2a --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/clusters/two-plus-three.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, mon.c, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mgr.x, osd.4, osd.5, osd.6, osd.7, client.1] +- [client.2] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml b/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml new file mode 100644 index 0000000000000..aa867660d8959 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + mon.a: + - while ! ceph balancer status ; do sleep 1 ; done + - ceph balancer mode crush-compat + - ceph balancer on diff --git a/qa/suites/rados/thrash-old-clients/d-balancer/off.yaml b/qa/suites/rados/thrash-old-clients/d-balancer/off.yaml new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/rados/thrash-old-clients/msgr b/qa/suites/rados/thrash-old-clients/msgr new file mode 120000 index 0000000000000..b29ecddaed7eb --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/msgr @@ -0,0 +1 @@ +../basic/msgr \ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml new file mode 100644 index 0000000000000..77fd730aff70f --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms tcp read timeout: 5 diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml new file mode 100644 index 0000000000000..477bffe619bcf --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + osd: + osd heartbeat use min delay socket: true diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml new file mode 100644 index 0000000000000..a33ba89e14fb8 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms inject delay type: osd + ms inject delay probability: .005 + ms inject delay max: 1 + ms inject internal delays: .002 diff --git a/qa/suites/rados/thrash-old-clients/rados.yaml b/qa/suites/rados/thrash-old-clients/rados.yaml new file mode 120000 index 0000000000000..b756e57bcf090 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/rados.yaml @@ -0,0 +1 @@ +../../../config/rados.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/rocksdb.yaml b/qa/suites/rados/thrash-old-clients/rocksdb.yaml new file mode 120000 index 0000000000000..f26e095f9860a --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/rocksdb.yaml @@ -0,0 +1 @@ +../../../mon_kv_backend/rocksdb.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/thrashers/default.yaml b/qa/suites/rados/thrash-old-clients/thrashers/default.yaml new file mode 100644 index 0000000000000..e8e2007f86d32 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/default.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + log-whitelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml b/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml new file mode 100644 index 0000000000000..7b55097f77b6b --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + log-whitelist: + - but it is still running + - objects unfound and apparently lost + - osd_map_cache_size + conf: + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + osd: + osd map cache size: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd scrub during recovery: false + osd max backfills: 6 +tasks: +- thrashosds: + timeout: 1800 + chance_pgnum_grow: 0.25 + chance_pgpnum_fix: 0.25 + chance_test_map_discontinuity: 2 diff --git a/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml b/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml new file mode 100644 index 0000000000000..91d2173e87eaf --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + journal throttle high multiple: 2 + journal throttle max multiple: 10 + filestore queue throttle high multiple: 2 + filestore queue throttle max multiple: 10 + osd max backfills: 9 + log-whitelist: + - but it is still running + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 +openstack: +- volumes: + size: 50 diff --git a/qa/suites/rados/thrash-old-clients/thrashers/none.yaml b/qa/suites/rados/thrash-old-clients/thrashers/none.yaml new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml b/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml new file mode 100644 index 0000000000000..8721fd1818839 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + log-whitelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + filestore odsync write: true + osd max backfills: 2 + osd snap trim sleep: .5 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml b/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml b/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml new file mode 100644 index 0000000000000..fc1f5b45cd103 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml @@ -0,0 +1,34 @@ +overrides: + ceph: + log-whitelist: + - must scrub before tier agent can activate +tasks: +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 3600 + - sudo ceph osd pool set cache target_max_objects 250 + - sudo ceph osd pool set cache min_read_recency_for_promote 2 +- rados: + clients: [client.2] + pools: [base] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + cache_flush: 50 + cache_try_flush: 50 + cache_evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml b/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml new file mode 100644 index 0000000000000..6a89a4e6ee4e5 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml @@ -0,0 +1,33 @@ +overrides: + ceph: + conf: + client.2: + debug ms: 1 + debug objecter: 20 + debug rados: 20 +tasks: +- full_sequential: + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 diff --git a/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml b/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml new file mode 100644 index 0000000000000..f0a5735a92cbb --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.2] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 -- 2.39.5