From: Shylesh Kumar Date: Mon, 21 May 2018 17:27:39 +0000 (+0530) Subject: thrash refactoring X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8cedda936afd5c847f6d45425ce25d88aae1d176;p=ceph.git thrash refactoring Signed-off-by: Shylesh Kumar --- diff --git a/qa/downstream-config/clusters/fixed-6.yaml b/qa/downstream-config/clusters/fixed-6.yaml index db8f36ca350..c65d7c3b221 100644 --- a/qa/downstream-config/clusters/fixed-6.yaml +++ b/qa/downstream-config/clusters/fixed-6.yaml @@ -1,10 +1,9 @@ roles: -- [mon.a, mon.c, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0] -- [mon.b, mgr.x, client.1] +- [mon.a, mon.b, mgr.x, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.c, mgr.y, client.1] - [osd.4, osd.5] - [osd.6, osd.7] -- [client.2, client.3] -- [client.4, client.5] + openstack: - volumes: # attached to each instance count: 4 diff --git a/qa/suites/rados/downstream/singleton/all/admin_socket_objecter_requests.yaml b/qa/suites/rados/downstream/singleton/all/admin_socket_objecter_requests.yaml new file mode 100644 index 00000000000..d155cf8fbfe --- /dev/null +++ b/qa/suites/rados/downstream/singleton/all/admin_socket_objecter_requests.yaml @@ -0,0 +1,27 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x +- - osd.0 + - osd.1 + - osd.2 +- - client.0 + +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + conf: + client.0: + admin socket: /var/run/ceph/ceph-$name.asok +- radosbench: + clients: [client.0] + time: 150 +- admin_socket: + client.0: + objecter_requests: + test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}" diff --git a/qa/suites/rados/downstream/singleton/all/pool-snaps-few-objects.yaml b/qa/suites/rados/downstream/singleton/all/pool-snaps-few-objects.yaml new file mode 100644 index 00000000000..c78fe177b81 --- /dev/null +++ b/qa/suites/rados/downstream/singleton/all/pool-snaps-few-objects.yaml @@ -0,0 +1,32 @@ +roles: +- - mon.a + - mon.b + - mon.c +- - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - client.0 +openstack: + - volumes: + count: 3 + size: 10 +tasks: +- install: +- ceph: + conf: + osd: + osd deep scrub update digest min age: 0 +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/downstream/singleton/all/snaps-few-objects.yaml b/qa/suites/rados/downstream/singleton/all/snaps-few-objects.yaml new file mode 100644 index 00000000000..96775266e75 --- /dev/null +++ b/qa/suites/rados/downstream/singleton/all/snaps-few-objects.yaml @@ -0,0 +1,32 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x +- - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 +- - client.0 +openstack: + - volumes: + count: 3 + size: 10 + +tasks: +- install: +- ceph: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/downstream/thrash/% b/qa/suites/rados/downstream/thrash/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled b/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled new file mode 120000 index 00000000000..a0d7915038f --- /dev/null +++ b/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled @@ -0,0 +1 @@ +../../../../../overrides/2-size-2-min-size.yaml \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml new file mode 120000 index 00000000000..a5a5633f963 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml @@ -0,0 +1 @@ +../../../../../overrides/3-size-2-min-size.yaml \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/1-pg-log-overrides/normal_pg_log.yaml b/qa/suites/rados/downstream/thrash/1-pg-log-overrides/normal_pg_log.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/downstream/thrash/1-pg-log-overrides/short_pg_log.yaml b/qa/suites/rados/downstream/thrash/1-pg-log-overrides/short_pg_log.yaml new file mode 120000 index 00000000000..1c7107e1c2c --- /dev/null +++ b/qa/suites/rados/downstream/thrash/1-pg-log-overrides/short_pg_log.yaml @@ -0,0 +1 @@ +../../../../../overrides/short_pg_log.yaml \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/backoff/normal.yaml b/qa/suites/rados/downstream/thrash/backoff/normal.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/downstream/thrash/backoff/peering.yaml b/qa/suites/rados/downstream/thrash/backoff/peering.yaml new file mode 100644 index 00000000000..66d06117ea2 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/backoff/peering.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true diff --git a/qa/suites/rados/downstream/thrash/backoff/peering_and_degraded.yaml b/qa/suites/rados/downstream/thrash/backoff/peering_and_degraded.yaml new file mode 100644 index 00000000000..e6109906503 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/backoff/peering_and_degraded.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true + osd backoff on degraded: true diff --git a/qa/suites/rados/downstream/thrash/ceph.yaml b/qa/suites/rados/downstream/thrash/ceph.yaml new file mode 100644 index 00000000000..2030acb9083 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rados/downstream/thrash/clusters/+ b/qa/suites/rados/downstream/thrash/clusters/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/downstream/thrash/clusters/fixed-2.yaml b/qa/suites/rados/downstream/thrash/clusters/fixed-2.yaml new file mode 120000 index 00000000000..42415827f14 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/clusters/fixed-2.yaml @@ -0,0 +1 @@ +../../../../../downstream-config/clusters/fixed-2.yaml \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/clusters/openstack.yaml b/qa/suites/rados/downstream/thrash/clusters/openstack.yaml new file mode 100644 index 00000000000..b0f3b9b4da2 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rados/downstream/thrash/d-require-luminous/at-end.yaml b/qa/suites/rados/downstream/thrash/d-require-luminous/at-end.yaml new file mode 100644 index 00000000000..f91ab63ebb7 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/d-require-luminous/at-end.yaml @@ -0,0 +1,33 @@ +# do not require luminous osds at mkfs time; only set flag at +# the end of the test run, then do a final scrub (to convert any +# legacy snapsets), and verify we are healthy. +tasks: +- full_sequential_finally: + - exec: + mon.a: + - ceph osd require-osd-release luminous + - ceph osd pool application enable base rados || true +# make sure osds have latest map + - rados -p rbd bench 5 write -b 4096 + - ceph.healthy: + - ceph.osd_scrub_pgs: + cluster: ceph + - exec: + mon.a: + - sleep 150 + - ceph osd dump | grep purged_snapdirs || echo "Failed grep purged_snapdirs" + - ceph pg dump -f json-pretty + - "ceph pg dump sum -f json-pretty | grep num_legacy_snapsets | head -1 | grep ': 0'" +overrides: + ceph: + conf: + global: + mon debug no require luminous: true + +# setting luminous triggers peering, which *might* trigger health alerts + log-whitelist: + - overall HEALTH_ + - \(PG_AVAILABILITY\) + - \(PG_DEGRADED\) + thrashosds: + chance_thrash_cluster_full: 0 diff --git a/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-crush-compat.yaml b/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-crush-compat.yaml new file mode 100644 index 00000000000..c855f70cac1 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-crush-compat.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + conf: + mgr: + debug osd: 20 +tasks: +- exec: + retry: 20 + sleep_for_retry: 10 + mon.a: + - "ceph balancer status" +- exec: + retry: 20 + sleep_for_retry: 10 + mon.a: + - "ceph balancer mode crush-compat" + - "ceph balancer on" diff --git a/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-upmap.yaml b/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-upmap.yaml new file mode 100644 index 00000000000..62611d34311 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-upmap.yaml @@ -0,0 +1,20 @@ +overrides: + ceph: + conf: + mgr: + debug osd: 20 +tasks: +- exec: + retry: 10 + sleep_for_retry: 20 + mon.a: + - "ceph balancer status" + - "ceph balancer mode upmap" + - "ceph balancer on" + +- exec: + retry: 10 + sleep_for_retry: 20 + mon.a: + - "ceph balancer mode upmap" + - "ceph balancer on" diff --git a/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs.yaml b/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/downstream/thrash/msgr b/qa/suites/rados/downstream/thrash/msgr new file mode 120000 index 00000000000..b29ecddaed7 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/msgr @@ -0,0 +1 @@ +../basic/msgr \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/msgr-failures/fastclose.yaml b/qa/suites/rados/downstream/thrash/msgr-failures/fastclose.yaml new file mode 100644 index 00000000000..77fd730aff7 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/msgr-failures/fastclose.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms tcp read timeout: 5 diff --git a/qa/suites/rados/downstream/thrash/msgr-failures/few.yaml b/qa/suites/rados/downstream/thrash/msgr-failures/few.yaml new file mode 100644 index 00000000000..477bffe619b --- /dev/null +++ b/qa/suites/rados/downstream/thrash/msgr-failures/few.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + osd: + osd heartbeat use min delay socket: true diff --git a/qa/suites/rados/downstream/thrash/msgr-failures/osd-delay.yaml b/qa/suites/rados/downstream/thrash/msgr-failures/osd-delay.yaml new file mode 100644 index 00000000000..a33ba89e14f --- /dev/null +++ b/qa/suites/rados/downstream/thrash/msgr-failures/osd-delay.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms inject delay type: osd + ms inject delay probability: .005 + ms inject delay max: 1 + ms inject internal delays: .002 diff --git a/qa/suites/rados/downstream/thrash/objectstore b/qa/suites/rados/downstream/thrash/objectstore new file mode 120000 index 00000000000..071b204e60c --- /dev/null +++ b/qa/suites/rados/downstream/thrash/objectstore @@ -0,0 +1 @@ +../../../../downstream-config/objectstore/ \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/rados.yaml b/qa/suites/rados/downstream/thrash/rados.yaml new file mode 120000 index 00000000000..b81af134b22 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/rados.yaml @@ -0,0 +1 @@ +../../../../downstream-config/config/rados.yaml \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/rocksdb.yaml b/qa/suites/rados/downstream/thrash/rocksdb.yaml new file mode 120000 index 00000000000..2bddc204af3 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/rocksdb.yaml @@ -0,0 +1 @@ +../../../../downstream-config/mon_kv_backend/rocksdb.yaml \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/thrashers/default.yaml b/qa/suites/rados/downstream/thrash/thrashers/default.yaml new file mode 100644 index 00000000000..9e2b5b188a1 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/thrashers/default.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-whitelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/downstream/thrash/thrashers/mapgap.yaml b/qa/suites/rados/downstream/thrash/thrashers/mapgap.yaml new file mode 100644 index 00000000000..8962ff1ebe1 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/thrashers/mapgap.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + log-whitelist: + - but it is still running + - objects unfound and apparently lost + - osd_map_cache_size + conf: + mon: + mon min osdmap epochs: 2 + osd: + osd map cache size: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd scrub during recovery: false + osd max backfills: 6 +tasks: +- thrashosds: + timeout: 1800 + chance_pgnum_grow: 0.25 + chance_pgpnum_fix: 0.25 + chance_test_map_discontinuity: 2 diff --git a/qa/suites/rados/downstream/thrash/thrashers/morepggrow.yaml b/qa/suites/rados/downstream/thrash/thrashers/morepggrow.yaml new file mode 100644 index 00000000000..91d2173e87e --- /dev/null +++ b/qa/suites/rados/downstream/thrash/thrashers/morepggrow.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + journal throttle high multiple: 2 + journal throttle max multiple: 10 + filestore queue throttle high multiple: 2 + filestore queue throttle max multiple: 10 + osd max backfills: 9 + log-whitelist: + - but it is still running + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 +openstack: +- volumes: + size: 50 diff --git a/qa/suites/rados/downstream/thrash/thrashers/none.yaml b/qa/suites/rados/downstream/thrash/thrashers/none.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/downstream/thrash/thrashers/pggrow.yaml b/qa/suites/rados/downstream/thrash/thrashers/pggrow.yaml new file mode 100644 index 00000000000..2a8087f8b73 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/thrashers/pggrow.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-whitelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + filestore odsync write: true + osd max backfills: 2 + osd snap trim sleep: .5 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/downstream/thrash/thrashosds-health.yaml b/qa/suites/rados/downstream/thrash/thrashosds-health.yaml new file mode 120000 index 00000000000..e0426dbe499 --- /dev/null +++ b/qa/suites/rados/downstream/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/downstream/thrash/workloads/radosbench-small-objects-write-fadvice-dn.yaml b/qa/suites/rados/downstream/thrash/workloads/radosbench-small-objects-write-fadvice-dn.yaml new file mode 100644 index 00000000000..5143c67887b --- /dev/null +++ b/qa/suites/rados/downstream/thrash/workloads/radosbench-small-objects-write-fadvice-dn.yaml @@ -0,0 +1,67 @@ +overrides: + ceph: + crush_tunables: jewel + conf: + mon: + mon osd initial require min compat client: jewel + client.0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 +tasks: +- parallel: + - therados1 + - therados2 + - full_sequential +therados1: +- rados: + clients: [client.1] + ops: 400000 + objects: 1024 + max_seconds: 600 + max_in_flight: 64 + size: 16384 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 +therados2: +- rados: + clients: [client.1] + ops: 4000 + objects: 500 + write_fadvise_dontneed: true + op_weights: + write: 100 +full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 +