]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: fix alloc update after bdev expansion in NCB mode.
authorIgor Fedotov <ifedotov@croit.io>
Mon, 17 Feb 2025 20:51:18 +0000 (23:51 +0300)
committerIgor Fedotov <igor.fedotov@croit.io>
Tue, 15 Apr 2025 09:04:23 +0000 (12:04 +0300)
Fixes: https://tracker.ceph.com/issues/69997
Signed-off-by: Igor Fedotov <igor.fedotov@croit.io>
(cherry picked from commit bd7764bc699b4cc6b3edaf99369e40b59f335b21)

src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index 0348708471685616e85e29f51dbb1a962d32f543..a6a9fdc8c97974133302820c43a199c9cee4cb2a 100644 (file)
@@ -6765,7 +6765,9 @@ int BlueStore::_read_multi_bdev_label(
     return -ENOENT;
   }
   done:
-  dout(10) << __func__ << " got " << *out_label << dendl;
+  dout(10) << __func__ << " got " << *out_label
+           << (all_labels_valid ? " all labels valid " : " some labels missing ")
+           << dendl;
   return all_labels_valid ? 0 : 1;
 }
 
@@ -6780,6 +6782,7 @@ void BlueStore::_main_bdev_label_try_reserve()
   ceph_assert(bdev_label_multi == true);
   vector<uint64_t> candidate_positions;
   vector<uint64_t> accepted_positions;
+  dout(20) << __func__ << " input " << bdev_label_valid_locations << dendl;
   uint64_t lsize = std::max(BDEV_LABEL_BLOCK_SIZE, min_alloc_size);
   for (uint64_t location : bdev_label_valid_locations) {
     if (location != BDEV_FIRST_LABEL_POSITION) {
@@ -6813,6 +6816,7 @@ void BlueStore::_main_bdev_label_try_reserve()
          << " occupied by BlueStore object or BlueFS file, disabling" << dendl;
     std::erase(bdev_label_valid_locations, candidate_positions[i]);
   }
+  dout(20) << __func__ << " result " << bdev_label_valid_locations << dendl;
 }
 
 void BlueStore::_main_bdev_label_remove(Allocator* an_alloc)
@@ -6908,7 +6912,9 @@ int BlueStore::_check_main_bdev_label()
       }
     }
     if (valid_locations > bdev_label_valid_locations.size()) {
-      derr << __func__ << " not all labels read properly" << dendl;
+      derr << __func__ << " not all labels read properly, "
+           << valid_locations << "!=" << bdev_label_valid_locations.size()
+           << dendl;
       return -EIO;
     }
   }
@@ -7260,7 +7266,16 @@ int BlueStore::_init_alloc(std::map<uint64_t, uint64_t> *zone_adjustments)
        return -ENOTRECOVERABLE;
       }
     }
+    if (before_expansion_bdev_size > 0 &&
+        before_expansion_bdev_size < bdev_label.size) {
+      // we grow the allocation range, must reflect it in the allocation file
+      alloc->init_add_free(before_expansion_bdev_size,
+                           bdev_label.size - before_expansion_bdev_size);
+      need_to_destage_allocation_file = true;    
+    }
   }
+  before_expansion_bdev_size = 0;
+
   dout(1) << __func__
           << " loaded " << byte_u_t(bytes) << " in " << num << " extents"
           << std::hex
@@ -8923,7 +8938,7 @@ int BlueStore::expand_devices(ostream& out)
   // let's open in read-only mode first to be able to recover
   // from the out-of-space state at DB/shared volume(s)
   // Opening in R/W mode might cause extra space allocation
-  // which is effectively a show stopper for volume expansion then.
+  // which is effectively a show stopper for volume expansion.
   int r = _open_db_and_around(true);
   ceph_assert(r == 0);
   bluefs->dump_block_extents(out);
@@ -8975,24 +8990,18 @@ int BlueStore::expand_devices(ostream& out)
     }
     _close_db_and_around();
 
-    // FIXME minor: try to get rid off the following mount 
-    // when possible, it looks like we don't really need it
     //
     // Mount in read/write to sync expansion changes
+    // and make sure everything is all right.
     //
-    r = _mount();
+    before_expansion_bdev_size = size0; //preserve orignal size to permit following
+                                        // _db_open_and_around() do some post-init stuff
+                                        // on opened allocator
+
+    r = _open_db_and_around(false);
     ceph_assert(r == 0);
-    if (fm && fm->is_null_manager()) {
-      // FIXME: move this stuff to fm's init phase when possible.
-      //
-      // we grow the allocation range, must reflect it in the allocation file
-      alloc->init_add_free(size0, size - size0);
-      need_to_destage_allocation_file = true;
-    }
-    umount();
-  } else {
-    _close_db_and_around();
   }
+  _close_db_and_around();
   return r;
 }
 
index ed768cddb0268a7ac96b1989328583f3dd7c0e90..a574b080c43e64a46ea01410bd1a7084be3ef445 100644 (file)
@@ -2459,6 +2459,11 @@ private:
   bool bdev_label_multi = false;
   int64_t bdev_label_epoch = -1;
   bool bluestore_bdev_label_require_all = false;
+  uint64_t before_expansion_bdev_size = 0; // having non-zero indicates we need
+                                           // to expand allocator in NCB mode,
+                                           // perhaps could be removed when
+                                           // https://tracker.ceph.com/issues/70008
+                                           // is resolved.
 
   typedef std::map<uint64_t, volatile_statfs> osd_pools_map;