}
}
+//---------------------------------------------
+bool BlueStore::has_null_manager()
+{
+ return (fm && fm->is_null_manager());
+}
+
int BlueStore::_mount()
{
dout(5) << __func__ << "NCB:: path " << path << dendl;
+
_kv_only = false;
if (cct->_conf->bluestore_fsck_on_mount) {
dout(5) << __func__ << "::NCB::calling fsck()" << dendl;
#endif
dout(20) << __func__ << " stopping kv thread" << dendl;
_kv_stop();
- _shutdown_cache();
+ // skip cache cleanup step on fast shutdown
+ if (likely(!m_fast_shutdown)) {
+ _shutdown_cache();
+ }
dout(20) << __func__ << " closing" << dendl;
}
-
_close_db_and_around();
- if (cct->_conf->bluestore_fsck_on_umount) {
+ // disable fsck on fast-shutdown
+ if (cct->_conf->bluestore_fsck_on_umount && !m_fast_shutdown) {
int rc = fsck(cct->_conf->bluestore_fsck_on_umount_deep);
if (rc < 0)
return rc;
return 0;
}
+void BlueStore::prepare_for_fast_shutdown()
+{
+ m_fast_shutdown = true;
+}
+
int BlueStore::get_devices(set<string> *ls)
{
if (bdev) {
string key_prefix;
_key_encode_u64(pool_id, &key_prefix);
*out_per_pool_omap = per_pool_omap != OMAP_BULK;
- if (*out_per_pool_omap) {
+ // stop calls after db was closed
+ if (*out_per_pool_omap && db) {
auto prefix = per_pool_omap == OMAP_PER_POOL ?
PREFIX_PERPOOL_OMAP :
PREFIX_PERPG_OMAP;
return 0;
} else {
derr << "mismatch:: idx1=" << idx1 << " idx2=" << idx2 << dendl;
- std::cout << "===================================================================" << std::endl;
- for (uint64_t i = 0; i < idx1; i++) {
- std::cout << "arr1[" << i << "]<" << arr1[i].offset << "," << arr1[i].length << "> " << std::endl;
- }
-
- std::cout << "===================================================================" << std::endl;
- for (uint64_t i = 0; i < idx2; i++) {
- std::cout << "arr2[" << i << "]<" << arr2[i].offset << "," << arr2[i].length << "> " << std::endl;
- }
return -1;
}
}
utime_t start = ceph_clock_now();
auto shutdown_cache = make_scope_guard([&] {
- std::cout << "Allocation Recovery was completed in " << duration
- << " seconds; insert_count=" << stats.insert_count
- << "; extent_count=" << stats.extent_count << std::endl;
+ dout(1) << "Allocation Recovery was completed in " << duration
+ << " seconds; insert_count=" << stats.insert_count
+ << "; extent_count=" << stats.extent_count << dendl;
_shutdown_cache();
_close_db_and_around();
});
};
allocator->dump(count_entries);
ret = compare_allocators(allocator.get(), alloc, stats.insert_count, memory_target);
- if (ret != 0) {
+ if (ret == 0) {
dout(5) << "Allocator drive - file integrity check OK" << dendl;
} else {
derr << "FAILURE. Allocator from file and allocator from metadata differ::ret=" << ret << dendl;
}
}
- std::cout << stats << std::endl;
+ dout(1) << stats << dendl;
return ret;
}
int OSD::shutdown()
{
+ // vstart overwrites osd_fast_shutdown value in the conf file -> force the value here!
+ //cct->_conf->osd_fast_shutdown = true;
+
+ dout(0) << "Fast Shutdown: - cct->_conf->osd_fast_shutdown = "
+ << cct->_conf->osd_fast_shutdown
+ << ", null-fm = " << store->has_null_manager() << dendl;
+
+ utime_t start_time_func = ceph_clock_now();
+
if (cct->_conf->osd_fast_shutdown) {
derr << "*** Immediate shutdown (osd_fast_shutdown=true) ***" << dendl;
if (cct->_conf->osd_fast_shutdown_notify_mon)
service.prepare_to_stop();
- cct->_log->flush();
- _exit(0);
- }
- if (!service.prepare_to_stop())
+ // There is no state we need to keep wehn running in NULL-FM moode
+ if (!store->has_null_manager()) {
+ cct->_log->flush();
+ _exit(0);
+ }
+ } else if (!service.prepare_to_stop()) {
return 0; // already shutting down
+ }
+
osd_lock.lock();
if (is_stopping()) {
osd_lock.unlock();
return 0;
}
- dout(0) << "shutdown" << dendl;
+ if (!cct->_conf->osd_fast_shutdown) {
+ dout(0) << "shutdown" << dendl;
+ }
+
+ // don't accept new task for this OSD
set_state(STATE_STOPPING);
- // Debugging
- if (cct->_conf.get_val<bool>("osd_debug_shutdown")) {
+ // Disabled debugging during fast-shutdown
+ if (!cct->_conf->osd_fast_shutdown && cct->_conf.get_val<bool>("osd_debug_shutdown")) {
cct->_conf.set_val("debug_osd", "100");
cct->_conf.set_val("debug_journal", "100");
cct->_conf.set_val("debug_filestore", "100");
cct->_conf.apply_changes(nullptr);
}
+ if (cct->_conf->osd_fast_shutdown) {
+ // first, stop new task from being taken from op_shardedwq
+ // and clear all pending tasks
+ op_shardedwq.stop_for_fast_shutdown();
+
+ utime_t start_time_timer = ceph_clock_now();
+ tick_timer.shutdown();
+ {
+ std::lock_guard l(tick_timer_lock);
+ tick_timer_without_osd_lock.shutdown();
+ }
+
+ osd_lock.unlock();
+ utime_t start_time_osd_drain = ceph_clock_now();
+
+ // then, wait on osd_op_tp to drain (TBD: should probably add a timeout)
+ osd_op_tp.drain();
+ osd_op_tp.stop();
+
+ utime_t start_time_umount = ceph_clock_now();
+ store->prepare_for_fast_shutdown();
+ std::lock_guard lock(osd_lock);
+ // TBD: assert in allocator that nothing is being add
+ store->umount();
+
+ utime_t end_time = ceph_clock_now();
+ if (cct->_conf->osd_fast_shutdown_timeout) {
+ ceph_assert(end_time - start_time_func < cct->_conf->osd_fast_shutdown_timeout);
+ }
+ dout(0) <<"Fast Shutdown duration total :" << end_time - start_time_func << " seconds" << dendl;
+ dout(0) <<"Fast Shutdown duration osd_drain :" << start_time_umount - start_time_osd_drain << " seconds" << dendl;
+ dout(0) <<"Fast Shutdown duration umount :" << end_time - start_time_umount << " seconds" << dendl;
+ dout(0) <<"Fast Shutdown duration timer :" << start_time_osd_drain - start_time_timer << " seconds" << dendl;
+ cct->_log->flush();
+
+ // now it is safe to exit
+ _exit(0);
+ }
+
// stop MgrClient earlier as it's more like an internal consumer of OSD
mgrc.shutdown();
hb_front_server_messenger->shutdown();
hb_back_server_messenger->shutdown();
+ utime_t duration = ceph_clock_now() - start_time_func;
+ dout(0) <<"Slow Shutdown duration:" << duration << " seconds" << dendl;
+
tracing::osd::tracer.shutdown();
return r;
}
void OSD::ShardedOpWQ::_enqueue(OpSchedulerItem&& item) {
+ if (unlikely(m_fast_shutdown) ) {
+ // stop enqueing when we are in the middle of a fast shutdown
+ return;
+ }
+
uint32_t shard_index =
item.get_ordering_token().hash_to_shard(osd->shards.size());
void OSD::ShardedOpWQ::_enqueue_front(OpSchedulerItem&& item)
{
+ if (unlikely(m_fast_shutdown) ) {
+ // stop enqueing when we are in the middle of a fast shutdown
+ return;
+ }
+
auto shard_index = item.get_ordering_token().hash_to_shard(osd->shards.size());
auto& sdata = osd->shards[shard_index];
ceph_assert(sdata);
sdata->sdata_cond.notify_one();
}
+void OSD::ShardedOpWQ::stop_for_fast_shutdown()
+{
+ uint32_t shard_index = 0;
+ m_fast_shutdown = true;
+
+ for (; shard_index < osd->num_shards; shard_index++) {
+ auto& sdata = osd->shards[shard_index];
+ ceph_assert(sdata);
+ sdata->shard_lock.lock();
+ int work_count = 0;
+ while(! sdata->scheduler->empty() ) {
+ auto work_item = sdata->scheduler->dequeue();
+ work_count++;
+ }
+ sdata->shard_lock.unlock();
+ }
+}
+
namespace ceph::osd_cmds {
int heap(CephContext& cct, const cmdmap_t& cmdmap, Formatter& f,