From 379bbe0613e8dbe2f4d38f4f9ee23a10ed39a64d Mon Sep 17 00:00:00 2001 From: Loic Dachary Date: Sat, 19 Dec 2015 00:53:03 +0100 Subject: [PATCH] ceph-disk: protect deactivate with activate lock When ceph-disk prepares the disk, it triggers udev events and each of them ceph-disk activate. If systemctl stop ceph-osd@2 happens while there still are ceph-disk activate in flight, the systemctl stop may be cancelled by the systemctl enable issued by one of the pending ceph-disk activate. This only matters in a test environment where disks are destroyed shortly after they are activated. Signed-off-by: Loic Dachary (cherry picked from commit 6395bf856b4d4511f0758174ef915ebcafbe3777) Conflicts: src/ceph-disk: ceph-disk deactivate does not exist in ceph-disk on infernalis. But the same feature is implemented in ceph-test-disk.py for test purposes and has the same problem. The patch is adapted to ceph-test-disk.py. --- qa/workunits/ceph-disk/ceph-disk-test.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/qa/workunits/ceph-disk/ceph-disk-test.py b/qa/workunits/ceph-disk/ceph-disk-test.py index 522f727610e95..a2bbd018d30af 100644 --- a/qa/workunits/ceph-disk/ceph-disk-test.py +++ b/qa/workunits/ceph-disk/ceph-disk-test.py @@ -30,6 +30,7 @@ # sudo /usr/sbin/ceph-disk -v trigger /dev/vdb1 # activates if vdb1 is data # import argparse +import fcntl import json import logging import configobj @@ -44,6 +45,21 @@ import uuid LOG = logging.getLogger('CephDisk') +class filelock(object): + def __init__(self, fn): + self.fn = fn + self.fd = None + + def acquire(self): + assert not self.fd + self.fd = file(self.fn, 'w') + fcntl.lockf(self.fd, fcntl.LOCK_EX) + + def release(self): + assert self.fd + fcntl.lockf(self.fd, fcntl.LOCK_UN) + self.fd = None + class CephDisk: def __init__(self): @@ -127,6 +143,9 @@ class CephDisk: raise Exception("journal for uuid = " + uuid + " not found in " + str(disks)) def destroy_osd(self, uuid): + STATEDIR = '/var/lib/ceph' + activate_lock = filelock(STATEDIR + '/tmp/ceph-disk.activate.lock') + activate_lock.acquire() id = self.sh("ceph osd create " + uuid).strip() self.helper("control_osd stop " + id + " || true") self.wait_for_osd_down(uuid) @@ -155,6 +174,7 @@ class CephDisk: ceph auth del osd.{id} ceph osd crush rm osd.{id} """.format(id=id)) + activate_lock.release() @staticmethod def osd_up_predicate(osds, uuid): -- 2.39.5