]> git.apps.os.sepia.ceph.com Git - ceph.git/commit
crimson/os: fix a shutdown-related race condition in AlienStore. 44110/head
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Wed, 24 Nov 2021 15:41:22 +0000 (15:41 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Thu, 25 Nov 2021 15:05:34 +0000 (15:05 +0000)
commit5a7fc07933c908d339b5f2146088c8ec7003aa41
tree050b20516470d8bf20c63bd6c1f09dd718a3bc0e
parent5e56258a349bedb00fd23a2662612d42b8465624
crimson/os: fix a shutdown-related race condition in AlienStore.

This is supposed to tackle crashes like the following one:

```
INFO  2021-11-17 16:33:12,048 [shard 0] alienstore - stat
...
DEBUG 2021-11-17 16:33:12,789 [shard 0] ms - [osd.2(hb_front) v2:0.0.0.0:6813/34383 >> osd.0 v2:127.0.0.1:6809/34293@56992] closed!
DEBUG 2021-11-17 16:33:12,791 [shard 0] ms - [osd.2(hb_front) v2:0.0.0.0:6813/34383@53359 >> osd.7 v2:0.0.0.0:6815/34448] closed!
INFO  2021-11-17 16:33:12,795 [shard 0] alienstore - umount
INFO  2021-11-17 16:33:12,804 [shard 0] osd - osd.2: committed_osd_maps(23, 62)
ceph-osd: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-8896-gf35358f1/rpm/el8/BUILD/ceph-17.0.0-8896-gf35358f1/src/rocksdb/db/db_impl/db_impl.cc:1615: rocksdb::Status rocksdb::DBImpl::GetImpl(const rocksdb::ReadOptions&, const rocksdb::Slice&, rocksdb::DBImpl::GetImplOptions&): Assertion `get_impl_options.column_family' failed.
Aborting.
Backtrace:
INFO  2021-11-17 16:33:13,542 [shard 0] ms - [osd.2(cluster) v2:172.21.15.17:6804/34383 >> osd.3 v2:172.21.15.17:6806/34387@50001] execute_ready(): fault at READY with nothing to send, going to STANDBY -- std::system_error (error crimson::net:4, read eof)
DEBUG 2021-11-17 16:33:13,542 [shard 0] ms - [osd.2(cluster) v2:172.21.15.17:6804/34383 >> osd.3 v2:172.21.15.17:6806/34387@50001] TRIGGER STANDBY, was READY
 0# gsignal in /lib64/libc.so.6
 1# abort in /lib64/libc.so.6
 2# 0x00007F12FA13FC89 in /lib64/libc.so.6
 3# 0x00007F12FA14DA76 in /lib64/libc.so.6
 4# rocksdb::DBImpl::GetImpl(rocksdb::ReadOptions const&, rocksdb::Slice const&, rocksdb::DBImpl::GetImplOptions&) in ceph-osd
 5# rocksdb::DBImpl::Get(rocksdb::ReadOptions const&, rocksdb::ColumnFamilyHandle*, rocksdb::Slice const&, rocksdb::PinnableSlice*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) in ceph-osd
 6# rocksdb::DBImpl::Get(rocksdb::ReadOptions const&, rocksdb::ColumnFamilyHandle*, rocksdb::Slice const&, rocksdb::PinnableSlice*) in ceph-osd
 7# RocksDBStore::get(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, char const*, unsigned long, ceph::buffer::v15_2_0::list*) in ceph-osd
 8# BlueStore::Collection::get_onode(ghobject_t const&, bool, bool) in ceph-osd
 9# BlueStore::read(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, ghobject_t const&, unsigned long, unsigned long, ceph::buffer::v15_2_0::list&, unsigned int) in ceph-osd
10# 0x00005584E516577F in ceph-osd
11# crimson::os::ThreadPool::loop(std::chrono::duration<long, std::ratio<1l, 1000l> >, unsigned long) in ceph-osd
12# 0x00005584E54E71E9 in ceph-osd
13# 0x00007F12FB861BA3 in /lib64/libstdc++.so.6
14# 0x00007F12FBB3C14A in /lib64/libpthread.so.0
15# clone in /lib64/libc.so.6
Content of /proc/self/maps:
7fff7000-8fff7000 rw-p 00000000 00:00 0
```

The problem happened in RocksDB:

```cpp
Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
                       GetImplOptions& get_impl_options) {
  assert(get_impl_options.value != nullptr ||
         get_impl_options.merge_operands != nullptr);

  assert(get_impl_options.column_family);
  // ...
```

```cpp
tatus DBImpl::Get(const ReadOptions& read_options,
                   ColumnFamilyHandle* column_family, const Slice& key,
                   PinnableSlice* value, std::string* timestamp) {
  GetImplOptions get_impl_options;
  get_impl_options.column_family = column_family;
  get_impl_options.value = value;
  get_impl_options.timestamp = timestamp;
  Status s = GetImpl(read_options, key, get_impl_options);
  return s;
}
```

```cpp
int RocksDBStore::get(
  const string& prefix,
  const char *key,
  size_t keylen,
  bufferlist *out)
{
  ceph_assert(out && (out->length() == 0));
  utime_t start = ceph_clock_now();
  int r = 0;
  rocksdb::PinnableSlice value;
  rocksdb::Status s;
  auto cf = get_cf_handle(prefix, key, keylen);
  if (cf) {
    s = db->Get(rocksdb::ReadOptions(),
                cf,
                rocksdb::Slice(key, keylen),
                &value);
  } else {
    string k;
    combine_strings(prefix, key, keylen, &k);
    s = db->Get(rocksdb::ReadOptions(),
                default_cf,
                rocksdb::Slice(k),
                &value);
  }
  // ...
```

It may be explained by a race condition between `AlienStore::stat()`
and `AlienStore::umount()`. Umounting a BlueStore means nullifying
`default_cf`:

```cpp
void RocksDBStore::close()
{
  // ...
  default_cf = nullptr;
  delete db;
  db = nullptr;
}
```

```
INFO  2021-11-17 16:33:12,048 [shard 0] alienstore - stat
...
INFO  2021-11-17 16:33:12,795 [shard 0] alienstore - umount
INFO  2021-11-17 16:33:12,804 [shard 0] osd - osd.2: committed_osd_maps(23, 62)
```

Although `AlienStore` synchronizes `umount()` and `do_transaction()`
with a `seastar::gate`, it lacks similar mechanism for read-like operations.

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/crimson/os/alienstore/alien_store.cc
src/crimson/os/alienstore/alien_store.h