'doing powercycle of {s}'.format(o=osd, s=remote.name))
self._assert_ipmi(remote)
remote.console.power_off()
- elif self.config.get('bdev_inject_crash'):
- self.raw_cluster_cmd(
- '--', 'tell', 'osd.%d' % osd,
- 'injectargs',
- '--bdev-inject-crash %d' % self.config.get('bdev_inject_crash'),
- )
- try:
- self.ctx.daemons.get_daemon('osd', osd, self.cluster).wait()
- except:
- pass
+ elif self.config.get('bdev_inject_crash') and self.config.get('bdev_inject_crash_probability'):
+ if random.uniform(0, 1) < self.config.get('bdev_inject_crash_probability', .5):
+ self.raw_cluster_cmd(
+ '--', 'tell', 'osd.%d' % osd,
+ 'injectargs',
+ '--bdev-inject-crash %d' % self.config.get('bdev_inject_crash'),
+ )
+ try:
+ self.ctx.daemons.get_daemon('osd', osd, self.cluster).wait()
+ except:
+ pass
+ else:
+ raise RuntimeError('osd.%s did not fail' % osd)
else:
- raise RuntimeError('osd.%s did not fail' % osd)
+ self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop()
else:
self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop()
of just the osd process. Note that this assumes that a single
osd is the only important process on the node.
+ bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash.
+ the delay lets the BlockDevice "accept" more aio operations but blocks
+ any flush, and then eventually crashes (losing some or all ios). If 0,
+ no bdev failure injection is enabled.
+
+ bdev_inject_crash_probability: (.5) probability of doing a bdev failure
+ injection crash vs a normal OSD kill.
+
chance_test_backfill_full: (0) chance to simulate full disks stopping
backfill