From 9717bc34a241d18c6f5c5ad20fc940d72fd5ca74 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 30 Nov 2016 12:13:14 -0500 Subject: [PATCH] rados/thrash*: vary osd_max_backfills, always >1 This can lead to a copy-from vs backfill deadlock; see http://tracker.ceph.com/issues/18085 This effectively reverts 5e880228fd7f59063d22a51eb5488b369b0c8360 which aimed to catch bugs in recovery reservations. Signed-off-by: Sage Weil --- suites/rados/thrash-erasure-code-big/thrashers/default.yaml | 2 +- suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml | 2 +- suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml | 2 +- suites/rados/thrash-erasure-code-shec/thrashers/default.yaml | 2 +- suites/rados/thrash-erasure-code/thrashers/default.yaml | 2 +- suites/rados/thrash-erasure-code/thrashers/fastread.yaml | 2 +- suites/rados/thrash-erasure-code/thrashers/mapgap.yaml | 1 + suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml | 2 +- suites/rados/thrash-erasure-code/thrashers/pggrow.yaml | 1 + suites/rados/thrash/thrashers/default.yaml | 2 +- suites/rados/thrash/thrashers/mapgap.yaml | 1 + suites/rados/thrash/thrashers/morepggrow.yaml | 2 +- suites/rados/thrash/thrashers/pggrow.yaml | 1 + 13 files changed, 13 insertions(+), 9 deletions(-) diff --git a/suites/rados/thrash-erasure-code-big/thrashers/default.yaml b/suites/rados/thrash-erasure-code-big/thrashers/default.yaml index e5728c969fe6d..5acfcc3ddd969 100644 --- a/suites/rados/thrash-erasure-code-big/thrashers/default.yaml +++ b/suites/rados/thrash-erasure-code-big/thrashers/default.yaml @@ -8,9 +8,9 @@ tasks: conf: osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 6 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml b/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml index 1a6e900a6f14b..5ded6ba7148d8 100644 --- a/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml +++ b/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml @@ -9,9 +9,9 @@ tasks: mon osd pool ec fast read: 1 osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 2 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml b/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml index 99906ba2024cf..f09ec08cfa9c8 100644 --- a/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml +++ b/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml @@ -3,9 +3,9 @@ tasks: - ceph: conf: osd: - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 9 log-whitelist: - wrongly marked me down - objects unfound and apparently lost diff --git a/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml b/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml index e5728c969fe6d..2bece1f939be4 100644 --- a/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml +++ b/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml @@ -8,9 +8,9 @@ tasks: conf: osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 3 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code/thrashers/default.yaml b/suites/rados/thrash-erasure-code/thrashers/default.yaml index fade054b1b7cf..211d99a64fb42 100644 --- a/suites/rados/thrash-erasure-code/thrashers/default.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/default.yaml @@ -7,9 +7,9 @@ tasks: conf: osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 2 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code/thrashers/fastread.yaml b/suites/rados/thrash-erasure-code/thrashers/fastread.yaml index 1a6e900a6f14b..8f7c455c693c4 100644 --- a/suites/rados/thrash-erasure-code/thrashers/fastread.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/fastread.yaml @@ -9,9 +9,9 @@ tasks: mon osd pool ec fast read: 1 osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 3 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml b/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml index c37147fda2291..bd448e27285a8 100644 --- a/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml @@ -7,6 +7,7 @@ overrides: osd map cache size: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 5 tasks: - install: - ceph: diff --git a/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml b/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml index 9ba1b9e5867a5..3fe730673b6a6 100644 --- a/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml @@ -3,9 +3,9 @@ tasks: - ceph: conf: osd: - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 9 log-whitelist: - wrongly marked me down - objects unfound and apparently lost diff --git a/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml b/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml index 744761d8cce7e..ecb239a061e46 100644 --- a/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml +++ b/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml @@ -8,6 +8,7 @@ tasks: osd: osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 4 - thrashosds: timeout: 1200 chance_pgnum_grow: 2 diff --git a/suites/rados/thrash/thrashers/default.yaml b/suites/rados/thrash/thrashers/default.yaml index fabfc4f8c402d..f5e432d3fca05 100644 --- a/suites/rados/thrash/thrashers/default.yaml +++ b/suites/rados/thrash/thrashers/default.yaml @@ -7,9 +7,9 @@ tasks: conf: osd: osd debug reject backfill probability: .3 - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 3 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff --git a/suites/rados/thrash/thrashers/mapgap.yaml b/suites/rados/thrash/thrashers/mapgap.yaml index 7854f8356d0b3..5c59340c7ebe1 100644 --- a/suites/rados/thrash/thrashers/mapgap.yaml +++ b/suites/rados/thrash/thrashers/mapgap.yaml @@ -8,6 +8,7 @@ overrides: osd scrub min interval: 60 osd scrub max interval: 120 osd scrub during recovery: false + osd max backfills: 6 tasks: - install: - ceph: diff --git a/suites/rados/thrash/thrashers/morepggrow.yaml b/suites/rados/thrash/thrashers/morepggrow.yaml index 20c84b1f1bbce..a22f80c584558 100644 --- a/suites/rados/thrash/thrashers/morepggrow.yaml +++ b/suites/rados/thrash/thrashers/morepggrow.yaml @@ -3,13 +3,13 @@ tasks: - ceph: conf: osd: - osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 journal throttle high multiple: 2 journal throttle max multiple: 10 filestore queue throttle high multiple: 2 filestore queue throttle max multiple: 10 + osd max backfills: 9 log-whitelist: - wrongly marked me down - objects unfound and apparently lost diff --git a/suites/rados/thrash/thrashers/pggrow.yaml b/suites/rados/thrash/thrashers/pggrow.yaml index 8b1cf46af505c..d381026af3b6f 100644 --- a/suites/rados/thrash/thrashers/pggrow.yaml +++ b/suites/rados/thrash/thrashers/pggrow.yaml @@ -9,6 +9,7 @@ tasks: osd scrub min interval: 60 osd scrub max interval: 120 filestore odsync write: true + osd max backfills: 2 - thrashosds: timeout: 1200 chance_pgnum_grow: 2 -- 2.39.5