From: Sage Weil Date: Wed, 30 Nov 2016 17:13:14 +0000 (-0500) Subject: rados/thrash*: vary osd_max_backfills, always >1 X-Git-Tag: v11.1.1~58^2^2~30^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9717bc34a241d18c6f5c5ad20fc940d72fd5ca74;p=ceph.git rados/thrash*: vary osd_max_backfills, always >1 This can lead to a copy-from vs backfill deadlock; see http://tracker.ceph.com/issues/18085 This effectively reverts 5e880228fd7f59063d22a51eb5488b369b0c8360 which aimed to catch bugs in recovery reservations. Signed-off-by: Sage Weil --- diff --git a/suites/rados/thrash-erasure-code-big/thrashers/default.yaml b/suites/rados/thrash-erasure-code-big/thrashers/default.yaml index e5728c969fe..5acfcc3ddd9 100644 --- a/suites/rados/thrash-erasure-code-big/thrashers/default.yaml +++ b/suites/rados/thrash-erasure-code-big/thrashers/default.yaml @@ -8,9 +8,9 @@ tasks: conf: osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 6 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml b/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml index 1a6e900a6f1..5ded6ba7148 100644 --- a/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml +++ b/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml @@ -9,9 +9,9 @@ tasks: mon osd pool ec fast read: 1 osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 2 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml b/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml index 99906ba2024..f09ec08cfa9 100644 --- a/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml +++ b/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml @@ -3,9 +3,9 @@ tasks: - ceph: conf: osd: - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 9 log-whitelist: - wrongly marked me down - objects unfound and apparently lost diff --git a/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml b/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml index e5728c969fe..2bece1f939b 100644 --- a/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml +++ b/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml @@ -8,9 +8,9 @@ tasks: conf: osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 3 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code/thrashers/default.yaml b/suites/rados/thrash-erasure-code/thrashers/default.yaml index fade054b1b7..211d99a64fb 100644 --- a/suites/rados/thrash-erasure-code/thrashers/default.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/default.yaml @@ -7,9 +7,9 @@ tasks: conf: osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 2 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code/thrashers/fastread.yaml b/suites/rados/thrash-erasure-code/thrashers/fastread.yaml index 1a6e900a6f1..8f7c455c693 100644 --- a/suites/rados/thrash-erasure-code/thrashers/fastread.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/fastread.yaml @@ -9,9 +9,9 @@ tasks: mon osd pool ec fast read: 1 osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 3 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml b/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml index c37147fda22..bd448e27285 100644 --- a/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml @@ -7,6 +7,7 @@ overrides: osd map cache size: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 5 tasks: - install: - ceph: diff --git a/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml b/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml index 9ba1b9e5867..3fe730673b6 100644 --- a/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml @@ -3,9 +3,9 @@ tasks: - ceph: conf: osd: - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 9 log-whitelist: - wrongly marked me down - objects unfound and apparently lost diff --git a/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml b/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml index 744761d8cce..ecb239a061e 100644 --- a/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml @@ -8,6 +8,7 @@ tasks: osd: osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 4 - thrashosds: timeout: 1200 chance_pgnum_grow: 2 diff --git a/suites/rados/thrash/thrashers/default.yaml b/suites/rados/thrash/thrashers/default.yaml index fabfc4f8c40..f5e432d3fca 100644 --- a/suites/rados/thrash/thrashers/default.yaml +++ b/suites/rados/thrash/thrashers/default.yaml @@ -7,9 +7,9 @@ tasks: conf: osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 3 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash/thrashers/mapgap.yaml b/suites/rados/thrash/thrashers/mapgap.yaml index 7854f8356d0..5c59340c7eb 100644 --- a/suites/rados/thrash/thrashers/mapgap.yaml +++ b/suites/rados/thrash/thrashers/mapgap.yaml @@ -8,6 +8,7 @@ overrides: osd scrub min interval: 60 osd scrub max interval: 120 osd scrub during recovery: false + osd max backfills: 6 tasks: - install: - ceph: diff --git a/suites/rados/thrash/thrashers/morepggrow.yaml b/suites/rados/thrash/thrashers/morepggrow.yaml index 20c84b1f1bb..a22f80c5845 100644 --- a/suites/rados/thrash/thrashers/morepggrow.yaml +++ b/suites/rados/thrash/thrashers/morepggrow.yaml @@ -3,13 +3,13 @@ tasks: - ceph: conf: osd: - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 journal throttle high multiple: 2 journal throttle max multiple: 10 filestore queue throttle high multiple: 2 filestore queue throttle max multiple: 10 + osd max backfills: 9 log-whitelist: - wrongly marked me down - objects unfound and apparently lost diff --git a/suites/rados/thrash/thrashers/pggrow.yaml b/suites/rados/thrash/thrashers/pggrow.yaml index 8b1cf46af50..d381026af3b 100644 --- a/suites/rados/thrash/thrashers/pggrow.yaml +++ b/suites/rados/thrash/thrashers/pggrow.yaml @@ -9,6 +9,7 @@ tasks: osd scrub min interval: 60 osd scrub max interval: 120 filestore odsync write: true + osd max backfills: 2 - thrashosds: timeout: 1200 chance_pgnum_grow: 2