]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
thrash refactoring
authorShylesh Kumar <shylesh.mohan@gmail.com>
Mon, 21 May 2018 17:27:39 +0000 (22:57 +0530)
committerShylesh Kumar <shmohan@redhat.com>
Tue, 25 Sep 2018 19:28:44 +0000 (12:28 -0700)
Signed-off-by: Shylesh Kumar <shylesh.mohan@gmail.com>
34 files changed:
qa/downstream-config/clusters/fixed-6.yaml
qa/suites/rados/downstream/singleton/all/admin_socket_objecter_requests.yaml [new file with mode: 0644]
qa/suites/rados/downstream/singleton/all/pool-snaps-few-objects.yaml [new file with mode: 0644]
qa/suites/rados/downstream/singleton/all/snaps-few-objects.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/% [new file with mode: 0644]
qa/suites/rados/downstream/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled [new symlink]
qa/suites/rados/downstream/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml [new symlink]
qa/suites/rados/downstream/thrash/1-pg-log-overrides/normal_pg_log.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/1-pg-log-overrides/short_pg_log.yaml [new symlink]
qa/suites/rados/downstream/thrash/backoff/normal.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/backoff/peering.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/backoff/peering_and_degraded.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/ceph.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/clusters/+ [new file with mode: 0644]
qa/suites/rados/downstream/thrash/clusters/fixed-2.yaml [new symlink]
qa/suites/rados/downstream/thrash/clusters/openstack.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/d-require-luminous/at-end.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-crush-compat.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-upmap.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/msgr [new symlink]
qa/suites/rados/downstream/thrash/msgr-failures/fastclose.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/msgr-failures/few.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/msgr-failures/osd-delay.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/objectstore [new symlink]
qa/suites/rados/downstream/thrash/rados.yaml [new symlink]
qa/suites/rados/downstream/thrash/rocksdb.yaml [new symlink]
qa/suites/rados/downstream/thrash/thrashers/default.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/thrashers/mapgap.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/thrashers/morepggrow.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/thrashers/none.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/thrashers/pggrow.yaml [new file with mode: 0644]
qa/suites/rados/downstream/thrash/thrashosds-health.yaml [new symlink]
qa/suites/rados/downstream/thrash/workloads/radosbench-small-objects-write-fadvice-dn.yaml [new file with mode: 0644]

index db8f36ca3507694e6a471bc9b34c7cb5fe1e58ab..c65d7c3b2211874de56f2d23ed1b37d548291370 100644 (file)
@@ -1,10 +1,9 @@
 roles:
-- [mon.a, mon.c, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0]
-- [mon.b, mgr.x, client.1]
+- [mon.a, mon.b, mgr.x, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.c, mgr.y, client.1]
 - [osd.4, osd.5]
 - [osd.6, osd.7]
-- [client.2, client.3]
-- [client.4, client.5]
+
 openstack:
 - volumes: # attached to each instance
     count: 4
diff --git a/qa/suites/rados/downstream/singleton/all/admin_socket_objecter_requests.yaml b/qa/suites/rados/downstream/singleton/all/admin_socket_objecter_requests.yaml
new file mode 100644 (file)
index 0000000..d155cf8
--- /dev/null
@@ -0,0 +1,27 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+- - osd.0
+  - osd.1
+  - osd.2
+- - client.0
+
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    conf:
+      client.0:
+        admin socket: /var/run/ceph/ceph-$name.asok
+- radosbench:
+    clients: [client.0]
+    time: 150
+- admin_socket:
+    client.0:
+      objecter_requests:
+        test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}"
diff --git a/qa/suites/rados/downstream/singleton/all/pool-snaps-few-objects.yaml b/qa/suites/rados/downstream/singleton/all/pool-snaps-few-objects.yaml
new file mode 100644 (file)
index 0000000..c78fe17
--- /dev/null
@@ -0,0 +1,32 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+- - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+- - client.0
+openstack:
+  - volumes:
+      count: 3
+      size: 10
+tasks:
+- install:
+- ceph:
+    conf:
+      osd:
+        osd deep scrub update digest min age: 0
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    pool_snaps: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/downstream/singleton/all/snaps-few-objects.yaml b/qa/suites/rados/downstream/singleton/all/snaps-few-objects.yaml
new file mode 100644 (file)
index 0000000..9677526
--- /dev/null
@@ -0,0 +1,32 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+- - osd.0
+  - osd.1
+  - osd.2
+- - osd.3
+  - osd.4
+  - osd.5
+- - client.0
+openstack:
+  - volumes:
+      count: 3
+      size: 10
+
+tasks:
+- install:
+- ceph:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/downstream/thrash/% b/qa/suites/rados/downstream/thrash/%
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled b/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled
new file mode 120000 (symlink)
index 0000000..a0d7915
--- /dev/null
@@ -0,0 +1 @@
+../../../../../overrides/2-size-2-min-size.yaml
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/rados/downstream/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
new file mode 120000 (symlink)
index 0000000..a5a5633
--- /dev/null
@@ -0,0 +1 @@
+../../../../../overrides/3-size-2-min-size.yaml
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/1-pg-log-overrides/normal_pg_log.yaml b/qa/suites/rados/downstream/thrash/1-pg-log-overrides/normal_pg_log.yaml
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/rados/downstream/thrash/1-pg-log-overrides/short_pg_log.yaml b/qa/suites/rados/downstream/thrash/1-pg-log-overrides/short_pg_log.yaml
new file mode 120000 (symlink)
index 0000000..1c7107e
--- /dev/null
@@ -0,0 +1 @@
+../../../../../overrides/short_pg_log.yaml
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/backoff/normal.yaml b/qa/suites/rados/downstream/thrash/backoff/normal.yaml
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/rados/downstream/thrash/backoff/peering.yaml b/qa/suites/rados/downstream/thrash/backoff/peering.yaml
new file mode 100644 (file)
index 0000000..66d0611
--- /dev/null
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd backoff on peering: true
diff --git a/qa/suites/rados/downstream/thrash/backoff/peering_and_degraded.yaml b/qa/suites/rados/downstream/thrash/backoff/peering_and_degraded.yaml
new file mode 100644 (file)
index 0000000..e610990
--- /dev/null
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd backoff on peering: true
+        osd backoff on degraded: true
diff --git a/qa/suites/rados/downstream/thrash/ceph.yaml b/qa/suites/rados/downstream/thrash/ceph.yaml
new file mode 100644 (file)
index 0000000..2030acb
--- /dev/null
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rados/downstream/thrash/clusters/+ b/qa/suites/rados/downstream/thrash/clusters/+
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/rados/downstream/thrash/clusters/fixed-2.yaml b/qa/suites/rados/downstream/thrash/clusters/fixed-2.yaml
new file mode 120000 (symlink)
index 0000000..4241582
--- /dev/null
@@ -0,0 +1 @@
+../../../../../downstream-config/clusters/fixed-2.yaml
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/clusters/openstack.yaml b/qa/suites/rados/downstream/thrash/clusters/openstack.yaml
new file mode 100644 (file)
index 0000000..b0f3b9b
--- /dev/null
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rados/downstream/thrash/d-require-luminous/at-end.yaml b/qa/suites/rados/downstream/thrash/d-require-luminous/at-end.yaml
new file mode 100644 (file)
index 0000000..f91ab63
--- /dev/null
@@ -0,0 +1,33 @@
+# do not require luminous osds at mkfs time; only set flag at
+# the end of the test run, then do a final scrub (to convert any
+# legacy snapsets), and verify we are healthy.
+tasks:
+- full_sequential_finally:
+  - exec:
+      mon.a:
+        - ceph osd require-osd-release luminous
+        - ceph osd pool application enable base rados || true
+# make sure osds have latest map
+        - rados -p rbd bench 5 write -b 4096
+  - ceph.healthy:
+  - ceph.osd_scrub_pgs:
+      cluster: ceph
+  - exec:
+      mon.a:
+        - sleep 150
+        - ceph osd dump | grep purged_snapdirs || echo "Failed grep purged_snapdirs"
+        - ceph pg dump -f json-pretty
+        - "ceph pg dump sum -f json-pretty | grep num_legacy_snapsets | head -1 | grep ': 0'"
+overrides:
+  ceph:
+    conf:
+      global:
+        mon debug no require luminous: true
+
+# setting luminous triggers peering, which *might* trigger health alerts
+    log-whitelist:
+      - overall HEALTH_
+      - \(PG_AVAILABILITY\)
+      - \(PG_DEGRADED\)
+  thrashosds:
+    chance_thrash_cluster_full: 0
diff --git a/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-crush-compat.yaml b/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-crush-compat.yaml
new file mode 100644 (file)
index 0000000..c855f70
--- /dev/null
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug osd: 20
+tasks:
+- exec:
+    retry: 20
+    sleep_for_retry: 10
+    mon.a:
+      - "ceph balancer status"
+- exec:
+    retry: 20
+    sleep_for_retry: 10
+    mon.a:
+      - "ceph balancer mode crush-compat"
+      - "ceph balancer on"
diff --git a/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-upmap.yaml b/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs-balancer-upmap.yaml
new file mode 100644 (file)
index 0000000..62611d3
--- /dev/null
@@ -0,0 +1,20 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug osd: 20
+tasks:
+- exec:
+    retry: 10
+    sleep_for_retry: 20
+    mon.a:
+      - "ceph balancer status"
+      - "ceph balancer mode upmap"
+      - "ceph balancer on"
+
+- exec:
+    retry: 10
+    sleep_for_retry: 20
+    mon.a:
+      - "ceph balancer mode upmap"
+      - "ceph balancer on"
diff --git a/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs.yaml b/qa/suites/rados/downstream/thrash/d-require-luminous/at-mkfs.yaml
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/rados/downstream/thrash/msgr b/qa/suites/rados/downstream/thrash/msgr
new file mode 120000 (symlink)
index 0000000..b29ecdd
--- /dev/null
@@ -0,0 +1 @@
+../basic/msgr
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/msgr-failures/fastclose.yaml b/qa/suites/rados/downstream/thrash/msgr-failures/fastclose.yaml
new file mode 100644 (file)
index 0000000..77fd730
--- /dev/null
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 2500
+        ms tcp read timeout: 5
diff --git a/qa/suites/rados/downstream/thrash/msgr-failures/few.yaml b/qa/suites/rados/downstream/thrash/msgr-failures/few.yaml
new file mode 100644 (file)
index 0000000..477bffe
--- /dev/null
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+      osd:
+        osd heartbeat use min delay socket: true
diff --git a/qa/suites/rados/downstream/thrash/msgr-failures/osd-delay.yaml b/qa/suites/rados/downstream/thrash/msgr-failures/osd-delay.yaml
new file mode 100644 (file)
index 0000000..a33ba89
--- /dev/null
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 2500
+        ms inject delay type: osd
+        ms inject delay probability: .005
+        ms inject delay max: 1
+        ms inject internal delays: .002
diff --git a/qa/suites/rados/downstream/thrash/objectstore b/qa/suites/rados/downstream/thrash/objectstore
new file mode 120000 (symlink)
index 0000000..071b204
--- /dev/null
@@ -0,0 +1 @@
+../../../../downstream-config/objectstore/
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/rados.yaml b/qa/suites/rados/downstream/thrash/rados.yaml
new file mode 120000 (symlink)
index 0000000..b81af13
--- /dev/null
@@ -0,0 +1 @@
+../../../../downstream-config/config/rados.yaml
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/rocksdb.yaml b/qa/suites/rados/downstream/thrash/rocksdb.yaml
new file mode 120000 (symlink)
index 0000000..2bddc20
--- /dev/null
@@ -0,0 +1 @@
+../../../../downstream-config/mon_kv_backend/rocksdb.yaml
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/thrashers/default.yaml b/qa/suites/rados/downstream/thrash/thrashers/default.yaml
new file mode 100644 (file)
index 0000000..9e2b5b1
--- /dev/null
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-whitelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/downstream/thrash/thrashers/mapgap.yaml b/qa/suites/rados/downstream/thrash/thrashers/mapgap.yaml
new file mode 100644 (file)
index 0000000..8962ff1
--- /dev/null
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    log-whitelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - osd_map_cache_size
+    conf:
+      mon:
+        mon min osdmap epochs: 2
+      osd:
+        osd map cache size: 1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd scrub during recovery: false
+        osd max backfills: 6
+tasks:
+- thrashosds:
+    timeout: 1800
+    chance_pgnum_grow: 0.25
+    chance_pgpnum_fix: 0.25
+    chance_test_map_discontinuity: 2
diff --git a/qa/suites/rados/downstream/thrash/thrashers/morepggrow.yaml b/qa/suites/rados/downstream/thrash/thrashers/morepggrow.yaml
new file mode 100644 (file)
index 0000000..91d2173
--- /dev/null
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        journal throttle high multiple: 2
+        journal throttle max multiple: 10
+        filestore queue throttle high multiple: 2
+        filestore queue throttle max multiple: 10
+        osd max backfills: 9
+    log-whitelist:
+    - but it is still running
+    - objects unfound and apparently lost
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 3
+    chance_pgpnum_fix: 1
+openstack:
+- volumes:
+    size: 50
diff --git a/qa/suites/rados/downstream/thrash/thrashers/none.yaml b/qa/suites/rados/downstream/thrash/thrashers/none.yaml
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/rados/downstream/thrash/thrashers/pggrow.yaml b/qa/suites/rados/downstream/thrash/thrashers/pggrow.yaml
new file mode 100644 (file)
index 0000000..2a8087f
--- /dev/null
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-whitelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        filestore odsync write: true
+        osd max backfills: 2
+        osd snap trim sleep: .5
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 2
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/downstream/thrash/thrashosds-health.yaml b/qa/suites/rados/downstream/thrash/thrashosds-health.yaml
new file mode 120000 (symlink)
index 0000000..e0426db
--- /dev/null
@@ -0,0 +1 @@
+../../../../tasks/thrashosds-health.yaml
\ No newline at end of file
diff --git a/qa/suites/rados/downstream/thrash/workloads/radosbench-small-objects-write-fadvice-dn.yaml b/qa/suites/rados/downstream/thrash/workloads/radosbench-small-objects-write-fadvice-dn.yaml
new file mode 100644 (file)
index 0000000..5143c67
--- /dev/null
@@ -0,0 +1,67 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+    conf:
+      mon:
+        mon osd initial require min compat client: jewel
+      client.0:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+tasks:
+- parallel:
+  - therados1
+  - therados2
+  - full_sequential
+therados1:
+- rados:
+    clients: [client.1]
+    ops: 400000
+    objects: 1024
+    max_seconds: 600
+    max_in_flight: 64
+    size: 16384
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25 
+therados2:
+- rados:
+    clients: [client.1]
+    ops: 4000
+    objects: 500
+    write_fadvise_dontneed: true
+    op_weights:
+      write: 100
+full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+