OPTION(osd_op_queue_mclock_scrub_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_scrub_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_scrub_lim, OPT_DOUBLE)
+OPTION(osd_op_queue_mclock_pg_delete_res, OPT_DOUBLE)
+OPTION(osd_op_queue_mclock_pg_delete_wgt, OPT_DOUBLE)
+OPTION(osd_op_queue_mclock_pg_delete_lim, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_peering_event_res, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_peering_event_wgt, OPT_DOUBLE)
OPTION(osd_op_queue_mclock_peering_event_lim, OPT_DOUBLE)
// requested scrubs jump the queue of scheduled scrubs
OPTION(osd_requested_scrub_priority, OPT_U32)
+OPTION(osd_pg_delete_priority, OPT_U32)
+OPTION(osd_pg_delete_cost, OPT_U32) // set default cost equal to 1MB io
+
OPTION(osd_recovery_priority, OPT_U32)
// set default cost equal to 20MB io
OPTION(osd_recovery_cost, OPT_U32)
.add_see_also("osd_op_queue_mclock_scrub_wgt")
.add_see_also("osd_op_queue_mclock_scrub_lim"),
+ Option("osd_op_queue_mclock_pg_delete_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .set_default(0.0)
+ .set_description("mclock reservation of pg delete work")
+ .set_long_description("mclock reservation of pg delete work when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
+ .add_see_also("osd_op_queue")
+ .add_see_also("osd_op_queue_mclock_client_op_res")
+ .add_see_also("osd_op_queue_mclock_client_op_wgt")
+ .add_see_also("osd_op_queue_mclock_client_op_lim")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_res")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
+ .add_see_also("osd_op_queue_mclock_snap_res")
+ .add_see_also("osd_op_queue_mclock_snap_wgt")
+ .add_see_also("osd_op_queue_mclock_snap_lim")
+ .add_see_also("osd_op_queue_mclock_recov_res")
+ .add_see_also("osd_op_queue_mclock_recov_wgt")
+ .add_see_also("osd_op_queue_mclock_recov_lim")
+ .add_see_also("osd_op_queue_mclock_scrub_wgt")
+ .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+ Option("osd_op_queue_mclock_pg_delete_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .set_default(1.0)
+ .set_description("mclock weight of pg delete work")
+ .set_long_description("mclock weight of pg delete work when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
+ .add_see_also("osd_op_queue")
+ .add_see_also("osd_op_queue_mclock_client_op_res")
+ .add_see_also("osd_op_queue_mclock_client_op_wgt")
+ .add_see_also("osd_op_queue_mclock_client_op_lim")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_res")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
+ .add_see_also("osd_op_queue_mclock_snap_res")
+ .add_see_also("osd_op_queue_mclock_snap_wgt")
+ .add_see_also("osd_op_queue_mclock_snap_lim")
+ .add_see_also("osd_op_queue_mclock_recov_res")
+ .add_see_also("osd_op_queue_mclock_recov_wgt")
+ .add_see_also("osd_op_queue_mclock_recov_lim")
+ .add_see_also("osd_op_queue_mclock_scrub_res")
+ .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+ Option("osd_op_queue_mclock_pg_delete_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .set_default(0.001)
+ .set_description("mclock weight of pg delete work limit requests")
+ .set_long_description("mclock weight of limit pg delete work when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
+ .add_see_also("osd_op_queue")
+ .add_see_also("osd_op_queue_mclock_client_op_res")
+ .add_see_also("osd_op_queue_mclock_client_op_wgt")
+ .add_see_also("osd_op_queue_mclock_client_op_lim")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_res")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_wgt")
+ .add_see_also("osd_op_queue_mclock_osd_rep_op_lim")
+ .add_see_also("osd_op_queue_mclock_snap_res")
+ .add_see_also("osd_op_queue_mclock_snap_wgt")
+ .add_see_also("osd_op_queue_mclock_snap_lim")
+ .add_see_also("osd_op_queue_mclock_recov_res")
+ .add_see_also("osd_op_queue_mclock_recov_wgt")
+ .add_see_also("osd_op_queue_mclock_recov_lim")
+ .add_see_also("osd_op_queue_mclock_scrub_res")
+ .add_see_also("osd_op_queue_mclock_scrub_wgt"),
+
Option("osd_op_queue_mclock_peering_event_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.0)
.set_description("mclock reservation of peering events")
.set_default(1<<20)
.set_description(""),
+ Option("osd_pg_delete_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(5)
+ .set_description(""),
+
+ Option("osd_pg_delete_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(1<<20)
+ .set_description(""),
+
Option("osd_scrub_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(5)
.set_description(""),
epoch));
}
+void OSDService::queue_for_pg_delete(spg_t pgid, epoch_t e)
+{
+ dout(10) << __func__ << " on " << pgid << " e " << e << dendl;
+ enqueue_back(
+ OpQueueItem(
+ unique_ptr<OpQueueItem::OpQueueable>(
+ new PGDelete(pgid, e)),
+ cct->_conf->osd_pg_delete_cost,
+ cct->_conf->osd_pg_delete_priority,
+ ceph_clock_now(),
+ 0,
+ e));
+}
+
+void OSDService::finish_pg_delete(PG *pg)
+{
+ osd->op_shardedwq.clear_pg_pointer(pg);
+ pg_remove_epoch(pg->get_pgid());
+}
+
void OSDService::_queue_for_recovery(
std::pair<epoch_t, PGRef> p,
uint64_t reserved_pushes)
for (ceph::unordered_map<spg_t,PG*>::iterator it = pg_map.begin();
it != pg_map.end();
++it) {
-
+ if (it->second->is_deleted()) {
+ continue;
+ }
list<obj_watch_item_t> pg_watchers;
PG *pg = it->second;
pg->get_watchers(&pg_watchers);
for (ceph::unordered_map<spg_t, PG*>::iterator p = pg_map.begin();
p != pg_map.end();
++p) {
+ if (p->second->is_deleted()) {
+ continue;
+ }
dout(20) << " kicking pg " << p->first << dendl;
p->second->lock();
if (p->second->get_num_ref() != 1) {
}
peering_wait_for_split.erase(to_wake);
}
- if (!service.get_osdmap()->have_pg_pool(pg->pg_id.pool()))
- _remove_pg(pg);
}
OSD::res_result OSD::_try_resurrect_pg(
PG *OSD::_lookup_lock_pg(spg_t pgid)
{
- RWLock::RLocker l(pg_map_lock);
-
- auto pg_map_entry = pg_map.find(pgid);
- if (pg_map_entry == pg_map.end())
+ while (true) {
+ {
+ RWLock::RLocker l(pg_map_lock);
+ auto p = pg_map.find(pgid);
+ if (p == pg_map.end()) {
+ return nullptr;
+ }
+ PG *pg = p->second;
+ pg->lock();
+ if (!pg->is_deleted()) {
+ return pg;
+ }
+ pg->unlock();
+ }
+ // try again, this time with a write lock
+ {
+ RWLock::WLocker l(pg_map_lock);
+ auto p = pg_map.find(pgid);
+ if (p == pg_map.end()) {
+ return nullptr;
+ }
+ PG *pg = p->second;
+ pg->lock();
+ if (!pg->is_deleted()) {
+ return pg;
+ }
+ pg_map.erase(p);
+ pg->put("PGMap");
+ pg->unlock();
+ }
return nullptr;
- PG *pg = pg_map_entry->second;
- pg->lock();
- return pg;
+ }
}
PG *OSD::lookup_lock_pg(spg_t pgid)
for (ceph::unordered_map<spg_t, PG*>::iterator i = pg_map.begin();
i != pg_map.end();
++i) {
+ if (i->second->is_deleted()) {
+ continue;
+ }
PG *pg = i->second;
pg->with_heartbeat_peers([&](int peer) {
if (osdmap->is_up(peer)) {
RWLock::RLocker l(pg_map_lock);
for (ceph::unordered_map<spg_t, PG*>::const_iterator pg_map_e = pg_map.begin();
pg_map_e != pg_map.end(); ++pg_map_e) {
+ if (pg_map_e->second->is_deleted()) {
+ continue;
+ }
PG *pg = pg_map_e->second;
string s = stringify(pg->pg_id);
f->open_array_section(s.c_str());
if (m->scrub_pgs.empty()) {
for (ceph::unordered_map<spg_t, PG*>::iterator p = pg_map.begin();
p != pg_map.end();
- ++p)
+ ++p) {
+ if (p->second->is_deleted()) {
+ continue;
+ }
handle_pg_scrub(m, p->second);
+ }
} else {
for (vector<pg_t>::iterator p = m->scrub_pgs.begin();
p != m->scrub_pgs.end();
set<PGRef> *new_pgs)
{
assert(pg->is_locked());
- epoch_t next_epoch = pg->get_osdmap()->get_epoch() + 1;
OSDMapRef lastmap = pg->get_osdmap();
-
assert(lastmap->get_epoch() < osd_epoch);
- for (;
+ for (epoch_t next_epoch = pg->get_osdmap()->get_epoch() + 1;
next_epoch <= osd_epoch;
++next_epoch) {
OSDMapRef nextmap = service.try_get_map(next_epoch);
}
int num_pg_primary = 0, num_pg_replica = 0, num_pg_stray = 0;
- list<PGRef> to_remove;
// scan pg's
{
it != pg_map.end();
++it) {
PG *pg = it->second;
+ if (pg->is_deleted()) {
+ continue;
+ }
pg->lock();
if (pg->is_primary())
num_pg_primary++;
else
num_pg_stray++;
- if (!osdmap->have_pg_pool(pg->pg_id.pool())) {
- //pool is deleted!
- to_remove.push_back(PGRef(pg));
- } else {
- service.init_splits_between(it->first, service.get_osdmap(), osdmap);
- }
+ service.init_splits_between(it->first, service.get_osdmap(), osdmap);
pg->unlock();
}
}
}
- for (list<PGRef>::iterator i = to_remove.begin();
- i != to_remove.end();
- to_remove.erase(i++)) {
- RWLock::WLocker locker(pg_map_lock);
- (*i)->lock();
- _remove_pg(&**i);
- (*i)->unlock();
- }
-
service.expand_pg_num(service.get_osdmap(), osdmap);
service.pre_publish_map(osdmap);
it != pg_map.end();
++it) {
PG *pg = it->second;
+ if (pg->is_deleted()) {
+ continue;
+ }
pg->lock();
pg->queue_null(osdmap->get_epoch(), osdmap->get_epoch());
pg->unlock();
continue;
}
- RWLock::WLocker l(pg_map_lock);
- if (pg_map.count(pgid) == 0) {
- dout(10) << " don't have pg " << pgid << dendl;
- continue;
- }
- dout(5) << "queue_pg_for_deletion: " << pgid << dendl;
- PG *pg = _lookup_lock_pg_with_map_lock_held(pgid);
- pg_history_t history = pg->get_history();
- int up_primary, acting_primary;
- vector<int> up, acting;
- osdmap->pg_to_up_acting_osds(
- pgid.pgid, &up, &up_primary, &acting, &acting_primary);
- bool valid_history = project_pg_history(
- pg->pg_id, history, pg->get_osdmap()->get_epoch(),
- up, up_primary, acting, acting_primary);
- if (valid_history &&
- history.same_interval_since <= m->get_epoch()) {
- assert(pg->get_primary().osd == m->get_source().num());
- PGRef _pg(pg);
- _remove_pg(pg);
+ PG *pg = _lookup_lock_pg(pgid);
+ if (pg) {
+ pg->queue_peering_event(
+ PGPeeringEventRef(
+ new PGPeeringEvent(
+ m->get_epoch(), m->get_epoch(),
+ PG::DeleteStart())));
pg->unlock();
} else {
- dout(10) << *pg << " ignoring remove request, pg changed in epoch "
- << history.same_interval_since
- << " > " << m->get_epoch() << dendl;
- pg->unlock();
+ dout(10) << " don't have pg " << pgid << dendl;
}
}
}
service.pg_remove_epoch(pg->pg_id);
// dereference from op_wq
- op_shardedwq.clear_pg_pointer(pg->pg_id);
+ //op_shardedwq.clear_pg_pointer(pg->pg_id);
// remove from map
pg_map.erase(pg->pg_id);
PGPeeringEventRef evt,
ThreadPool::TPHandle& handle)
{
- if (pg->is_deleting()) {
- pg->unlock();
- return;
- }
auto curmap = service.get_osdmap();
PG::RecoveryCtx rctx = create_context();
set<PGRef> split_pgs;
if (!split_pgs.empty()) {
rctx.on_applied->add(new C_CompleteSplits(this, split_pgs));
split_pgs.clear();
- }
+ }
dispatch_context_transaction(rctx, pg, &handle);
+ bool deleted = pg->is_deleted();
pg->unlock();
+ if (deleted) {
+ RWLock::WLocker l(pg_map_lock);
+ auto p = pg_map.find(pg->get_pgid());
+ if (p != pg_map.end() &&
+ p->second == pg) {
+ dout(20) << __func__ << " removed pg " << pg << " from pg_map" << dendl;
+ pg_map.erase(p);
+ pg->put("PGMap");
+ } else {
+ dout(20) << __func__ << " failed to remove pg " << pg << " from pg_map" << dendl;
+ }
+ }
+
if (need_up_thru) {
queue_want_up_thru(same_interval_since);
}
service.send_pg_temp();
}
+void OSD::dequeue_delete(
+ PG *pg,
+ epoch_t e,
+ ThreadPool::TPHandle& handle)
+{
+ dequeue_peering_evt(
+ pg,
+ PGPeeringEventRef(
+ new PGPeeringEvent(
+ e, e,
+ PG::DeleteSome())),
+ handle);
+}
+
+
+
// --------------------------------
const char** OSD::get_tracked_conf_keys() const
}
}
-void OSD::ShardedOpWQ::clear_pg_pointer(spg_t pgid)
+void OSD::ShardedOpWQ::clear_pg_pointer(PG *pg)
{
+ spg_t pgid = pg->get_pgid();
uint32_t shard_index = pgid.hash_to_shard(shard_list.size());
auto sdata = shard_list[shard_index];
Mutex::Locker l(sdata->sdata_op_ordering_lock);
auto p = sdata->pg_slots.find(pgid);
if (p != sdata->pg_slots.end()) {
auto& slot = p->second;
- dout(20) << __func__ << " " << pgid << " pg " << slot.pg << dendl;
- assert(!slot.pg || slot.pg->is_deleting());
+ assert(!slot.pg || slot.pg == pg);
+ dout(20) << __func__ << " " << pgid << " pg " << pg << " cleared" << dendl;
slot.pg = nullptr;
}
}
osd->service.maybe_inject_dispatch_delay();
// [lookup +] lock pg (if we have it)
- if (!pg) {
- pg = osd->_lookup_lock_pg(token);
- } else {
- pg->lock();
+ while (true) {
+ if (!pg) {
+ pg = osd->_lookup_lock_pg(token);
+ } else {
+ pg->lock();
+ if (pg->is_deleted()) {
+ dout(20) << __func__ << " got deleted pg " << pg << ", retrying" << dendl;
+ pg->unlock();
+ pg = nullptr;
+ continue;
+ }
+ }
+ break;
}
osd->service.maybe_inject_dispatch_delay();
sdata->sdata_op_ordering_lock.Unlock();
return;
}
- if (pg && !slot.pg && !pg->is_deleting()) {
+ if (pg && !slot.pg) {
dout(20) << __func__ << " " << token << " set pg to " << pg << dendl;
slot.pg = pg;
}
AsyncReserver<spg_t> snap_reserver;
void queue_for_snap_trim(PG *pg);
void queue_for_scrub(PG *pg, bool with_high_priority);
+ void queue_for_pg_delete(spg_t pgid, epoch_t e);
+ void finish_pg_delete(PG *pg);
private:
// -- pg recovery and associated throttling --
friend class PGOpItem;
friend class PGPeeringItem;
friend class PGRecovery;
+ friend class PGDelete;
class ShardedOpWQ
: public ShardedThreadPool::ShardedWQ<OpQueueItem>
void prune_pg_waiters(OSDMapRef osdmap, int whoami);
/// clear cached PGRef on pg deletion
- void clear_pg_pointer(spg_t pgid);
+ void clear_pg_pointer(PG *pg);
/// clear pg_slots on shutdown
void clear_pg_slots();
PGPeeringEventRef ref,
ThreadPool::TPHandle& handle);
+ void dequeue_delete(
+ PG *pg,
+ epoch_t epoch,
+ ThreadPool::TPHandle& handle);
+
friend class PG;
friend class PrimaryLogPG;
pg->unlock();
}
+void PGDelete::run(
+ OSD *osd,
+ PGRef& pg,
+ ThreadPool::TPHandle &handle)
+{
+ osd->dequeue_delete(pg.get(), epoch_queued, handle);
+}
peering_event,
bg_snaptrim,
bg_recovery,
- bg_scrub
+ bg_scrub,
+ bg_pg_delete
};
using Ref = std::unique_ptr<OpQueueable>;
friend ostream& operator<<(ostream& out, const OpQueueable& q) {
return q.print(out);
}
+
};
private:
virtual void run(
OSD *osd, PGRef& pg, ThreadPool::TPHandle &handle) override final;
};
+
+class PGDelete : public PGOpQueueable {
+ epoch_t epoch_queued;
+public:
+ PGDelete(
+ spg_t pg,
+ epoch_t epoch_queued)
+ : PGOpQueueable(pg),
+ epoch_queued(epoch_queued) {}
+ op_type_t get_op_type() const override final {
+ return op_type_t::bg_pg_delete;
+ }
+ ostream &print(ostream &rhs) const override final {
+ return rhs << "PGDelete(" << get_pgid()
+ << " e" << epoch_queued
+ << ")";
+ }
+ void run(
+ OSD *osd, PGRef& pg, ThreadPool::TPHandle &handle) override final;
+};
out << " r=" << pg.get_role();
out << " lpr=" << pg.get_last_peering_reset();
+ if (pg.deleting)
+ out << " DELETING";
+
if (!pg.past_intervals.empty()) {
out << " pi=[" << pg.past_intervals.get_bounds()
<< ")/" << pg.past_intervals.size();
}
}
+
+void PG::_delete_some()
+{
+ dout(10) << __func__ << dendl;
+
+ // this ensures we get a valid result. it *also* serves to throttle
+ // us a bit (on filestore) because we won't delete more until the
+ // previous deletions are applied.
+ osr->flush();
+
+ vector<ghobject_t> olist;
+ ObjectStore::Transaction t;
+ int max = std::min(osd->store->get_ideal_list_max(),
+ (int)cct->_conf->osd_target_transaction_size);
+ ghobject_t next;
+ osd->store->collection_list(
+ coll,
+ next,
+ ghobject_t::get_max(),
+ max,
+ &olist,
+ &next);
+ dout(20) << __func__ << " " << olist << dendl;
+
+ OSDriver::OSTransaction _t(osdriver.get_transaction(&t));
+ int64_t num = 0;
+ for (auto& oid : olist) {
+ if (oid.is_pgmeta()) {
+ continue;
+ }
+ int r = snap_mapper.remove_oid(oid.hobj, &_t);
+ if (r != 0 && r != -ENOENT) {
+ ceph_abort();
+ }
+ t.remove(coll, oid);
+ ++num;
+ }
+ epoch_t e = get_osdmap()->get_epoch();
+ if (num) {
+ dout(20) << __func__ << " deleting " << num << " objects" << dendl;
+ struct C_DeleteMore : public Context {
+ PGRef pg;
+ epoch_t epoch;
+ C_DeleteMore(PG *p, epoch_t e) : pg(p), epoch(e) {}
+ void finish(int r) {
+ if (r >= 0) {
+ pg->lock();
+ if (!pg->pg_has_reset_since(epoch)) {
+ pg->osd->queue_for_pg_delete(pg->get_pgid(), epoch);
+ }
+ pg->unlock();
+ }
+ }
+ };
+ osd->store->queue_transaction(
+ osr.get(),
+ std::move(t),
+ new C_DeleteMore(this, e));
+ } else {
+ dout(20) << __func__ << " finished" << dendl;
+ if (cct->_conf->osd_inject_failure_on_pg_removal) {
+ _exit(1);
+ }
+
+ ObjectStore::Transaction t;
+ PGLog::clear_info_log(info.pgid, &t);
+ t.remove_collection(coll);
+ int r = osd->store->queue_transaction(
+ osd->meta_osr.get(), std::move(t),
+ // keep pg ref around until txn completes to avoid any issues
+ // with Sequencer lifecycle (seen w/ filestore).
+ new ContainerContext<PGRef>(this));
+ assert(r == 0);
+
+ osd->finish_pg_delete(this);
+ deleted = true;
+ }
+}
+
+
+
/*------------ Recovery State Machine----------------*/
#undef dout_prefix
#define dout_prefix (*_dout << context< RecoveryMachine >().pg->gen_prefix() \
assert(!pg->is_peered());
assert(!pg->is_peering());
assert(!pg->is_primary());
- pg->start_flush(
- context< RecoveryMachine >().get_cur_transaction(),
- context< RecoveryMachine >().get_on_applied_context_list(),
- context< RecoveryMachine >().get_on_safe_context_list());
+
+ if (!pg->get_osdmap()->have_pg_pool(pg->get_pgid().pool())) {
+ ldout(pg->cct,10) << __func__ << " pool is deleted" << dendl;
+ post_event(DeleteStart());
+ } else {
+ pg->start_flush(
+ context< RecoveryMachine >().get_cur_transaction(),
+ context< RecoveryMachine >().get_on_applied_context_list(),
+ context< RecoveryMachine >().get_on_safe_context_list());
+ }
}
boost::statechart::result PG::RecoveryState::Stray::react(const MLogRec& logevt)
pg->osd->recoverystate_perf->tinc(rs_stray_latency, dur);
}
+
+/*--------Deleting----------*/
+PG::RecoveryState::Deleting::Deleting(my_context ctx)
+ : my_base(ctx),
+ NamedState(context< RecoveryMachine >().pg, "Started/Deleting")
+{
+ context< RecoveryMachine >().log_enter(state_name);
+ PG *pg = context< RecoveryMachine >().pg;
+ pg->deleting = true;
+ ObjectStore::Transaction* t = context<RecoveryMachine>().get_cur_transaction();
+ pg->on_removal(t);
+ pg->osd->logger->inc(l_osd_pg_removing);
+ pg->osd->queue_for_pg_delete(pg->get_pgid(), pg->get_osdmap()->get_epoch());
+}
+
+boost::statechart::result PG::RecoveryState::Deleting::react(const DeleteSome& evt)
+{
+ PG *pg = context< RecoveryMachine >().pg;
+ pg->_delete_some();
+ return discard_event();
+}
+
+void PG::RecoveryState::Deleting::exit()
+{
+ context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ pg->deleting = false;
+ pg->osd->logger->dec(l_osd_pg_removing);
+}
+
/*--------GetInfo---------*/
PG::RecoveryState::GetInfo::GetInfo(my_context ctx)
: my_base(ctx),
bool is_deleting() const {
return deleting;
}
+ bool is_deleted() const {
+ return deleted;
+ }
bool is_replica() const {
return role > 0;
}
}
bool pg_has_reset_since(epoch_t e) {
assert(is_locked());
- return deleting || e < get_last_peering_reset();
+ return deleted || e < get_last_peering_reset();
}
bool is_ec_pg() const {
virtual void on_removal(ObjectStore::Transaction *t) = 0;
+ void _delete_some();
void pg_remove_object(const ghobject_t& oid, ObjectStore::Transaction *t);
// reference counting
bool deleting; // true while in removing or OSD is shutting down
+ bool deleted = false;
ZTracer::Endpoint trace_endpoint;
TrivialEvent(IntervalFlush)
+ TrivialEvent(DeleteStart)
+ TrivialEvent(DeleteSome)
+
/* Encapsulates PG recovery process */
class RecoveryState {
void start_handle(RecoveryCtx *new_ctx);
// RepWaitBackfillReserved
// RepWaitRecoveryReserved
// Stray
+ // Deleting
+ // Crashed
struct Crashed : boost::statechart::state< Crashed, RecoveryMachine >, NamedState {
explicit Crashed(my_context ctx);
void exit();
};
+ struct Deleting;
struct RepNotRecovering;
struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState {
explicit ReplicaActive(my_context ctx);
boost::statechart::custom_reaction< UnfoundRecovery >,
boost::statechart::custom_reaction< UnfoundBackfill >,
boost::statechart::custom_reaction< RemoteBackfillPreempted >,
- boost::statechart::custom_reaction< RemoteRecoveryPreempted >
+ boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
+ boost::statechart::transition<DeleteStart, Deleting>
> reactions;
boost::statechart::result react(const QueryState& q);
boost::statechart::result react(const MInfoRec& infoevt);
void exit();
};
- struct Stray : boost::statechart::state< Stray, Started >, NamedState {
- map<int, pair<pg_query_t, epoch_t> > pending_queries;
-
+ struct Stray : boost::statechart::state< Stray, Started >,
+ NamedState {
explicit Stray(my_context ctx);
void exit();
boost::statechart::custom_reaction< MLogRec >,
boost::statechart::custom_reaction< MInfoRec >,
boost::statechart::custom_reaction< ActMap >,
- boost::statechart::custom_reaction< RecoveryDone >
+ boost::statechart::custom_reaction< RecoveryDone >,
+ boost::statechart::transition<DeleteStart, Deleting>
> reactions;
boost::statechart::result react(const MQuery& query);
boost::statechart::result react(const MLogRec& logevt);
}
};
+ struct Deleting : boost::statechart::state<Deleting, Started>, NamedState {
+ typedef boost::mpl::list <
+ boost::statechart::custom_reaction< ActMap >,
+ boost::statechart::custom_reaction< DeleteSome >
+ > reactions;
+ explicit Deleting(my_context ctx);
+ boost::statechart::result react(const ActMap &evt) {
+ return discard_event();
+ }
+ boost::statechart::result react(const DeleteSome &evt);
+ void exit();
+ };
+
struct GetLog;
struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState {
void exit();
};
-
RecoveryMachine machine;
PG *pg;
pg_log.reset_backfill();
dirty_info = true;
-
// clear log
PGLogEntryHandler rollbacker{this, t};
pg_log.roll_forward(&rollbacker);
- write_if_dirty(*t);
-
- if (!deleting)
+ if (!deleting) {
on_shutdown();
+ }
}
void PrimaryLogPG::clear_async_reads()
scrub(cct->_conf->osd_op_queue_mclock_scrub_res,
cct->_conf->osd_op_queue_mclock_scrub_wgt,
cct->_conf->osd_op_queue_mclock_scrub_lim),
+ pg_delete(cct->_conf->osd_op_queue_mclock_pg_delete_res,
+ cct->_conf->osd_op_queue_mclock_pg_delete_wgt,
+ cct->_conf->osd_op_queue_mclock_pg_delete_lim),
peering_event(cct->_conf->osd_op_queue_mclock_peering_event_res,
cct->_conf->osd_op_queue_mclock_peering_event_wgt,
cct->_conf->osd_op_queue_mclock_peering_event_lim)
using op_item_type_t = OpQueueItem::OpQueueable::op_type_t;
enum class osd_op_type_t {
- client_op, osd_rep_op, bg_snaptrim, bg_recovery, bg_scrub, peering_event
+ client_op, osd_rep_op, bg_snaptrim, bg_recovery, bg_scrub, bg_pg_delete,
+ peering_event
};
class OpClassClientInfoMgr {
crimson::dmclock::ClientInfo snaptrim;
crimson::dmclock::ClientInfo recov;
crimson::dmclock::ClientInfo scrub;
+ crimson::dmclock::ClientInfo pg_delete;
crimson::dmclock::ClientInfo peering_event;
static constexpr std::size_t rep_op_msg_bitset_size = 128;
return &recov;
case osd_op_type_t::bg_scrub:
return &scrub;
+ case osd_op_type_t::bg_pg_delete:
+ return &pg_delete;
case osd_op_type_t::peering_event:
return &peering_event;
default:
return osd_op_type_t::bg_recovery;
case op_item_type_t::bg_scrub:
return osd_op_type_t::bg_scrub;
+ case op_item_type_t::bg_pg_delete:
+ return osd_op_type_t::bg_pg_delete;
case op_item_type_t::peering_event:
return osd_op_type_t::peering_event;
default: