From 3b1b5a93afba6a535e7e109c029ac136f3243e4e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 10 Dec 2014 15:45:26 -0800 Subject: [PATCH] osd: eliminate temp collections The temp objects have distinct pool ids. Old temp objects are already blown away on OSD restart. This patch removes all the futzing with temp_coll and puts the temp objects in the same collection as everything else. Interesting, collection_move_rename is now always using the same source and dest collection. Hmm! Signed-off-by: Sage Weil --- src/osd/ECBackend.cc | 25 ++++++++--------- src/osd/ECBackend.h | 1 - src/osd/ECTransaction.cc | 11 +++----- src/osd/OSD.cc | 39 +++++++++----------------- src/osd/OSD.h | 2 -- src/osd/PG.h | 1 - src/osd/PGBackend.cc | 20 +++----------- src/osd/PGBackend.h | 34 ++--------------------- src/osd/ReplicatedBackend.cc | 53 +++++++++++------------------------- src/osd/ReplicatedBackend.h | 1 - src/osd/ReplicatedPG.cc | 7 +---- src/osd/ReplicatedPG.h | 8 ++---- src/osd/osd_types.h | 4 --- 13 files changed, 54 insertions(+), 152 deletions(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 7f2db01746b3b..d5e371201741f 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -170,12 +170,11 @@ void ECBackend::RecoveryOp::dump(Formatter *f) const ECBackend::ECBackend( PGBackend::Listener *pg, coll_t coll, - coll_t temp_coll, ObjectStore *store, CephContext *cct, ErasureCodeInterfaceRef ec_impl, uint64_t stripe_width) - : PGBackend(pg, store, coll, temp_coll), + : PGBackend(pg, store, coll), cct(cct), ec_impl(ec_impl), sinfo(ec_impl->get_data_chunk_count(), stripe_width) { @@ -244,7 +243,6 @@ void ECBackend::handle_recovery_push( RecoveryMessages *m) { bool oneshot = op.before_progress.first && op.after_progress.data_complete; - coll_t tcoll = oneshot ? coll : get_temp_coll(m->t); ghobject_t tobj; if (oneshot) { tobj = ghobject_t(op.soid, ghobject_t::NO_GEN, @@ -262,8 +260,8 @@ void ECBackend::handle_recovery_push( } if (op.before_progress.first) { - m->t->remove(tcoll, tobj); - m->t->touch(tcoll, tobj); + m->t->remove(coll, tobj); + m->t->touch(coll, tobj); } if (!op.data_included.empty()) { @@ -272,7 +270,7 @@ void ECBackend::handle_recovery_push( assert(op.data.length() == (end - start)); m->t->write( - tcoll, + coll, tobj, start, op.data.length(), @@ -284,7 +282,7 @@ void ECBackend::handle_recovery_push( if (op.before_progress.first) { assert(op.attrset.count(string("_"))); m->t->setattrs( - tcoll, + coll, tobj, op.attrset); } @@ -296,7 +294,7 @@ void ECBackend::handle_recovery_push( m->t->remove(coll, ghobject_t( op.soid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard)); m->t->collection_move_rename( - tcoll, tobj, + coll, tobj, coll, ghobject_t( op.soid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard)); } @@ -817,7 +815,6 @@ void ECBackend::handle_sub_write( ObjectStore::Transaction *localt = new ObjectStore::Transaction; localt->set_use_tbl(op.t.get_use_tbl()); if (!op.temp_added.empty()) { - get_temp_coll(localt); add_temp_objs(op.temp_added); } if (op.t.empty()) { @@ -827,7 +824,7 @@ void ECBackend::handle_sub_write( dout(10) << __func__ << ": removing object " << *i << " since we won't get the transaction" << dendl; localt->remove( - temp_coll, + coll, ghobject_t( *i, ghobject_t::NO_GEN, @@ -885,7 +882,7 @@ void ECBackend::handle_sub_read( ++j) { bufferlist bl; int r = store->read( - i->first.is_temp() ? temp_coll : coll, + coll, ghobject_t( i->first, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), j->get<0>(), @@ -914,7 +911,7 @@ void ECBackend::handle_sub_read( if (reply->errors.count(*i)) continue; int r = store->getattrs( - i->is_temp() ? temp_coll : coll, + coll, ghobject_t( *i, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), reply->attrs_read[*i]); @@ -1470,7 +1467,7 @@ ECUtil::HashInfoRef ECBackend::get_hash_info( dout(10) << __func__ << ": not in cache " << hoid << dendl; struct stat st; int r = store->stat( - hoid.is_temp() ? temp_coll : coll, + coll, ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), &st); ECUtil::HashInfo hinfo(ec_impl->get_chunk_count()); @@ -1478,7 +1475,7 @@ ECUtil::HashInfoRef ECBackend::get_hash_info( dout(10) << __func__ << ": found on disk, size " << st.st_size << dendl; bufferlist bl; r = store->getattr( - hoid.is_temp() ? temp_coll : coll, + coll, ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), ECUtil::get_hinfo_key(), bl); diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index 4290de8da2561..d1062686fc917 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -442,7 +442,6 @@ public: ECBackend( PGBackend::Listener *pg, coll_t coll, - coll_t temp_coll, ObjectStore *store, CephContext *cct, ErasureCodeInterfaceRef ec_impl, diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc index e1cf3868dfb08..1db2352e31896 100644 --- a/src/osd/ECTransaction.cc +++ b/src/osd/ECTransaction.cc @@ -93,20 +93,17 @@ struct TransGenerator : public boost::static_visitor { temp_removed->erase(hoid); temp_added->insert(hoid); } - return get_coll(shard, hoid); + return get_coll(shard); } coll_t get_coll_rm(shard_id_t shard, const hobject_t &hoid) { if (hoid.is_temp()) { temp_added->erase(hoid); temp_removed->insert(hoid); } - return get_coll(shard, hoid); + return get_coll(shard); } - coll_t get_coll(shard_id_t shard, const hobject_t &hoid) { - if (hoid.is_temp()) - return coll_t::make_temp_coll(spg_t(pgid, shard)); - else - return coll_t(spg_t(pgid, shard)); + coll_t get_coll(shard_id_t shard) { + return coll_t(spg_t(pgid, shard)); } void operator()(const ECTransaction::TouchOp &op) { diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index f6be098442c09..0ffda2bd4ca8d 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4252,21 +4252,15 @@ void OSD::RemoveWQ::_process( if (!item.second->start_or_resume_clearing()) return; - list& remaining_colls = item.second->colls_to_remove; - for (list::iterator i = remaining_colls.begin(); - i != remaining_colls.end();) { - bool cont = remove_dir( - pg->cct, store, &mapper, &driver, pg->osr.get(), *i, item.second, - &finished, handle); - if (!cont) - return; - if (finished) { - i = remaining_colls.erase(i); - } else { - if (item.second->pause_clearing()) - queue_front(item); - return; - } + bool cont = remove_dir( + pg->cct, store, &mapper, &driver, pg->osr.get(), coll, item.second, + &finished, handle); + if (!cont) + return; + if (!finished) { + if (item.second->pause_clearing()) + queue_front(item); + return; } if (!item.second->start_deleting()) @@ -4275,18 +4269,11 @@ void OSD::RemoveWQ::_process( ObjectStore::Transaction *t = new ObjectStore::Transaction; PGLog::clear_info_log(pg->info.pgid, t); - // remove the collections themselves - list colls_to_remove; - pg->get_colls(&colls_to_remove); - for (list::iterator i = colls_to_remove.begin(); - i != colls_to_remove.end(); - ++i) { - if (g_conf->osd_inject_failure_on_pg_removal) { - generic_derr << "osd_inject_failure_on_pg_removal" << dendl; - exit(1); - } - t->remove_collection(*i); + if (g_conf->osd_inject_failure_on_pg_removal) { + generic_derr << "osd_inject_failure_on_pg_removal" << dendl; + exit(1); } + t->remove_collection(coll); // We need the sequencer to stick around until the op is complete store->queue_transaction( diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 85452057ca380..cb0880cb0b876 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -226,11 +226,9 @@ class DeletingState { public: const spg_t pgid; const PGRef old_pg_state; - list colls_to_remove; DeletingState(const pair &in) : lock("DeletingState::lock"), status(QUEUED), stop_deleting(false), pgid(in.first), old_pg_state(in.second) { - old_pg_state->get_colls(&colls_to_remove); } /// transition status to CLEARING_WAITING diff --git a/src/osd/PG.h b/src/osd/PG.h index b6808eba4eefc..e5e437757a47c 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1212,7 +1212,6 @@ public: const std::map > &missing_digest) { } virtual void _scrub_clear_state() { } virtual void _scrub_finish() { } - virtual void get_colls(list *out) = 0; virtual void split_colls( spg_t child, int split_bits, diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index dd25f7209a201..319110cd0ddb8 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -90,22 +90,12 @@ void PGBackend::on_change_cleanup(ObjectStore::Transaction *t) dout(10) << __func__ << ": Removing oid " << *i << " from the temp collection" << dendl; t->remove( - get_temp_coll(t), + coll, ghobject_t(*i, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard)); } temp_contents.clear(); } -coll_t PGBackend::get_temp_coll(ObjectStore::Transaction *t) -{ - if (temp_created) - return temp_coll; - if (!store->collection_exists(temp_coll)) - t->create_collection(temp_coll); - temp_created = true; - return temp_coll; -} - int PGBackend::objects_list_partial( const hobject_t &begin, int min, @@ -188,7 +178,7 @@ int PGBackend::objects_get_attr( { bufferptr bp; int r = store->getattr( - hoid.is_temp() ? temp_coll : coll, + coll, ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), attr.c_str(), bp); @@ -204,7 +194,7 @@ int PGBackend::objects_get_attrs( map *out) { return store->getattrs( - hoid.is_temp() ? temp_coll : coll, + coll, ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), *out); } @@ -282,13 +272,12 @@ PGBackend *PGBackend::build_pg_backend( const OSDMapRef curmap, Listener *l, coll_t coll, - coll_t temp_coll, ObjectStore *store, CephContext *cct) { switch (pool.type) { case pg_pool_t::TYPE_REPLICATED: { - return new ReplicatedBackend(l, coll, temp_coll, store, cct); + return new ReplicatedBackend(l, coll, store, cct); } case pg_pool_t::TYPE_ERASURE: { ErasureCodeInterfaceRef ec_impl; @@ -304,7 +293,6 @@ PGBackend *PGBackend::build_pg_backend( return new ECBackend( l, coll, - temp_coll, store, cct, ec_impl, diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 6cc2dd47b44ec..90162ca513daf 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -43,7 +43,6 @@ protected: ObjectStore *store; const coll_t coll; - const coll_t temp_coll; public: /** * Provides interfaces for PGBackend callbacks @@ -230,11 +229,10 @@ }; Listener *parent; Listener *get_parent() const { return parent; } - PGBackend(Listener *l, ObjectStore *store, coll_t coll, coll_t temp_coll) : + PGBackend(Listener *l, ObjectStore *store, coll_t coll) : store(store), coll(coll), - temp_coll(temp_coll), - parent(l), temp_created(false) {} + parent(l) {} bool is_primary() const { return get_parent()->pgb_is_primary(); } OSDMapRef get_osdmap() const { return get_parent()->pgb_get_osdmap(); } const pg_info_t &get_info() { return get_parent()->get_info(); } @@ -341,38 +339,11 @@ }; virtual IsReadablePredicate *get_is_readable_predicate() = 0; - void temp_colls(list *out) { - if (temp_created) - out->push_back(temp_coll); - } - void split_colls( - spg_t child, - int split_bits, - int seed, - ObjectStore::Transaction *t) { - coll_t target = coll_t::make_temp_coll(child); - if (!temp_created) - return; - t->create_collection(target); - t->split_collection( - temp_coll, - split_bits, - seed, - target); - } - virtual void dump_recovery_info(Formatter *f) const = 0; private: - bool temp_created; set temp_contents; public: - coll_t get_temp_coll(ObjectStore::Transaction *t); - coll_t get_temp_coll() const { - return temp_coll; - } - bool have_temp_coll() const { return temp_created; } - // Track contents of temp collection, clear on reset void add_temp_obj(const hobject_t &oid) { temp_contents.insert(oid); @@ -631,7 +602,6 @@ const OSDMapRef curmap, Listener *l, coll_t coll, - coll_t temp_coll, ObjectStore *store, CephContext *cct); }; diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index f2ee3b09f98a2..8765ae19f32ee 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -62,11 +62,9 @@ static void log_subop_stats( ReplicatedBackend::ReplicatedBackend( PGBackend::Listener *pg, coll_t coll, - coll_t temp_coll, ObjectStore *store, CephContext *cct) : - PGBackend(pg, store, - coll, temp_coll), + PGBackend(pg, store, coll), cct(cct) {} void ReplicatedBackend::run_recovery_op( @@ -252,15 +250,6 @@ void ReplicatedBackend::on_change() void ReplicatedBackend::on_flushed() { - if (have_temp_coll() && - !store->collection_empty(get_temp_coll())) { - vector objects; - store->collection_list(get_temp_coll(), objects); - derr << __func__ << ": found objects in the temp collection: " - << objects << ", crashing now" - << dendl; - assert(0 == "found garbage in the temp collection"); - } } int ReplicatedBackend::objects_read_sync( @@ -316,7 +305,6 @@ void ReplicatedBackend::objects_read_async( class RPGTransaction : public PGBackend::PGTransaction { coll_t coll; - coll_t temp_coll; set temp_added; set temp_cleared; ObjectStore::Transaction *t; @@ -336,17 +324,13 @@ class RPGTransaction : public PGBackend::PGTransaction { return get_coll(hoid); } const coll_t &get_coll(const hobject_t &hoid) { - if (hoid.is_temp()) - return temp_coll; - else - return coll; + return coll; } public: - RPGTransaction(coll_t coll, coll_t temp_coll, bool use_tbl) - : coll(coll), temp_coll(temp_coll), t(new ObjectStore::Transaction), written(0) - { - t->set_use_tbl(use_tbl); - } + RPGTransaction(coll_t coll, bool use_tbl) + : coll(coll), t(new ObjectStore::Transaction), written(0) { + t->set_use_tbl(use_tbl); + } /// Yields ownership of contained transaction ObjectStore::Transaction *get_transaction() { @@ -520,7 +504,7 @@ public: PGBackend::PGTransaction *ReplicatedBackend::get_transaction() { - return new RPGTransaction(coll, get_temp_coll(), parent->transaction_use_tbl()); + return new RPGTransaction(coll, parent->transaction_use_tbl()); } class C_OSD_OnOpCommit : public Context { @@ -603,7 +587,6 @@ void ReplicatedBackend::submit_transaction( ObjectStore::Transaction *local_t = new ObjectStore::Transaction; local_t->set_use_tbl(op_t->get_use_tbl()); if (!(t->get_temp_added().empty())) { - get_temp_coll(local_t); add_temp_objs(t->get_temp_added()); } clear_temp_objs(t->get_temp_cleared()); @@ -1166,14 +1149,13 @@ void ReplicatedBackend::sub_op_modify_impl(OpRequestRef op) if (m->new_temp_oid != hobject_t()) { dout(20) << __func__ << " start tracking temp " << m->new_temp_oid << dendl; add_temp_obj(m->new_temp_oid); - get_temp_coll(&rm->localt); } if (m->discard_temp_oid != hobject_t()) { dout(20) << __func__ << " stop tracking temp " << m->discard_temp_oid << dendl; if (rm->opt.empty()) { dout(10) << __func__ << ": removing object " << m->discard_temp_oid << " since we won't get the transaction" << dendl; - rm->localt.remove(temp_coll, m->discard_temp_oid); + rm->localt.remove(coll, m->discard_temp_oid); } clear_temp_obj(m->discard_temp_oid); } @@ -1695,13 +1677,10 @@ void ReplicatedBackend::submit_push_data( map &omap_entries, ObjectStore::Transaction *t) { - coll_t target_coll; hobject_t target_oid; if (first && complete) { - target_coll = coll; target_oid = recovery_info.soid; } else { - target_coll = get_temp_coll(t); target_oid = get_parent()->get_temp_recovery_object(recovery_info.version, recovery_info.soid.snap); if (first) { @@ -1712,10 +1691,10 @@ void ReplicatedBackend::submit_push_data( } if (first) { - t->remove(target_coll, target_oid); - t->touch(target_coll, target_oid); - t->truncate(target_coll, target_oid, recovery_info.size); - t->omap_setheader(target_coll, target_oid, omap_header); + t->remove(coll, target_oid); + t->touch(coll, target_oid); + t->truncate(coll, target_oid, recovery_info.size); + t->omap_setheader(coll, target_oid, omap_header); } uint64_t off = 0; for (interval_set::const_iterator p = intervals_included.begin(); @@ -1723,15 +1702,15 @@ void ReplicatedBackend::submit_push_data( ++p) { bufferlist bit; bit.substr_of(data_included, off, p.get_len()); - t->write(target_coll, target_oid, + t->write(coll, target_oid, p.get_start(), p.get_len(), bit); off += p.get_len(); } if (!omap_entries.empty()) - t->omap_setkeys(target_coll, target_oid, omap_entries); + t->omap_setkeys(coll, target_oid, omap_entries); if (!attrs.empty()) - t->setattrs(target_coll, target_oid, attrs); + t->setattrs(coll, target_oid, attrs); if (complete) { if (!first) { @@ -1739,7 +1718,7 @@ void ReplicatedBackend::submit_push_data( << target_oid << " from the temp collection" << dendl; clear_temp_obj(target_oid); t->remove(coll, recovery_info.soid); - t->collection_move_rename(target_coll, target_oid, coll, recovery_info.soid); + t->collection_move_rename(coll, target_oid, coll, recovery_info.soid); } submit_push_complete(recovery_info, t); diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index 5ad22bfc0b22d..51edf6f108559 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -33,7 +33,6 @@ public: ReplicatedBackend( PGBackend::Listener *pg, coll_t coll, - coll_t temp_coll, ObjectStore *store, CephContext *cct); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 3c3a9bad56ba2..b0658980c5aea 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1214,7 +1214,7 @@ ReplicatedPG::ReplicatedPG(OSDService *o, OSDMapRef curmap, PG(o, curmap, _pool, p), pgbackend( PGBackend::build_pg_backend( - _pool.info, curmap, this, coll_t(p), coll_t::make_temp_coll(p), o->store, cct)), + _pool.info, curmap, this, coll_t(p), o->store, cct)), object_contexts(o->cct, g_conf->osd_pg_object_context_cache_count), snapset_contexts_lock("ReplicatedPG::snapset_contexts"), new_backfill(false), @@ -5636,11 +5636,6 @@ void ReplicatedPG::do_osd_op_effects(OpContext *ctx, const ConnectionRef& conn) } } -coll_t ReplicatedPG::get_temp_coll(ObjectStore::Transaction *t) -{ - return pgbackend->get_temp_coll(t); -} - hobject_t ReplicatedPG::generate_temp_object() { ostringstream ss; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 94ef080dbb8f3..42cacf59e6b55 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -1424,15 +1424,14 @@ public: private: hobject_t earliest_backfill() const; bool check_src_targ(const hobject_t& soid, const hobject_t& toid) const; + uint64_t temp_seq; ///< last id for naming temp objects - coll_t get_temp_coll(ObjectStore::Transaction *t); hobject_t generate_temp_object(); ///< generate a new temp object name /// generate a new temp object name (for recovery) hobject_t get_temp_recovery_object(eversion_t version, snapid_t snap); public: - void get_colls(list *out) { - out->push_back(coll); - return pgbackend->temp_colls(out); + coll_t get_coll() { + return coll; } void split_colls( spg_t child, @@ -1448,7 +1447,6 @@ public: seed, target); PG::_init(*t, child, pool); - pgbackend->split_colls(child, split_bits, seed, t); } private: struct NotTrimming; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 7492602df0c0a..19d4ff909f6e9 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -488,10 +488,6 @@ public: : str(pg_and_snap_to_str(pgid, snap)) { } - static coll_t make_temp_coll(spg_t pgid) { - return coll_t(pg_to_tmp_str(pgid)); - } - const std::string& to_str() const { return str; } -- 2.39.5