Otherwise, we need to syncronize access to the shared PGPool objects.
The wasted memory is probably preferable to syncronization overhead.
Signed-off-by: Samuel Just <sam.just@inktank.com>
// ======================================================
// PG's
-PGPool* OSD::_get_pool(int id)
+PGPool OSD::_get_pool(int id, OSDMapRef createmap)
{
- map<int, PGPool*>::iterator pm = pool_map.find(id);
- PGPool *p = (pm == pool_map.end()) ? NULL : pm->second;
-
- if (!p) {
- if (!osdmap->have_pg_pool(id)) {
- dout(5) << __func__ << ": the OSDmap does not contain a PG pool with id = "
- << id << dendl;
- return NULL;
- }
+ if (!createmap->have_pg_pool(id)) {
+ dout(5) << __func__ << ": the OSDmap does not contain a PG pool with id = "
+ << id << dendl;
+ assert(0);
+ }
- p = new PGPool(id, osdmap->get_pool_name(id),
- osdmap->get_pg_pool(id)->auid);
- pool_map[id] = p;
- p->get();
+ PGPool p = PGPool(id, createmap->get_pool_name(id),
+ createmap->get_pg_pool(id)->auid);
- const pg_pool_t *pi = osdmap->get_pg_pool(id);
- p->info = *pi;
- p->snapc = pi->get_snap_context();
+ const pg_pool_t *pi = createmap->get_pg_pool(id);
+ p.info = *pi;
+ p.snapc = pi->get_snap_context();
- pi->build_removed_snaps(p->cached_removed_snaps);
- }
- dout(10) << "_get_pool " << p->id << " " << p->num_pg << " -> " << (p->num_pg+1) << dendl;
- p->num_pg++;
+ pi->build_removed_snaps(p.cached_removed_snaps);
+ dout(10) << "_get_pool " << p.id << dendl;
return p;
}
-void OSD::_put_pool(PGPool *p)
-{
- dout(10) << "_put_pool " << p->id << " " << p->num_pg
- << " -> " << (p->num_pg-1) << dendl;
- assert(p->num_pg > 0);
- p->num_pg--;
- if (!p->num_pg) {
- pool_map.erase(p->id);
- p->put();
- }
-}
-
PG *OSD::_open_lock_pg(
OSDMapRef createmap,
pg_t pgid, bool no_lockdep_check, bool hold_map_lock)
assert(osd_lock.is_locked());
dout(10) << "_open_lock_pg " << pgid << dendl;
- PGPool *pool = _get_pool(pgid.pool());
- assert(pool);
+ PGPool pool = _get_pool(pgid.pool(), createmap);
// create
PG *pg;
map<int64_t, int> pool_resize; // poolid -> old size
- // update pools
- for (map<int, PGPool*>::iterator p = pool_map.begin();
- p != pool_map.end();
- p++) {
- const pg_pool_t *pi = osdmap->get_pg_pool(p->first);
- if (pi == NULL) {
- dout(10) << " pool " << p->first << " appears to have been deleted" << dendl;
- continue;
- }
- PGPool *pool = p->second;
- bool changed = false;
-
- // make sure auid stays up to date
- pool->auid = pi->auid;
-
- // split?
- if (pool->info.get_pg_num() != pi->get_pg_num()) {
- dout(1) << " pool " << p->first << " pg_num " << pool->info.get_pg_num()
- << " -> " << pi->get_pg_num() << dendl;
- pool_resize[p->first] = pool->info.get_pg_num();
- changed = true;
- }
-
- if (pi->get_snap_epoch() == osdmap->get_epoch()) {
- pi->build_removed_snaps(pool->newly_removed_snaps);
- pool->newly_removed_snaps.subtract(pool->cached_removed_snaps);
- pool->cached_removed_snaps.union_of(pool->newly_removed_snaps);
- dout(10) << " pool " << p->first << " removed_snaps " << pool->cached_removed_snaps
- << ", newly so are " << pool->newly_removed_snaps << ")"
- << dendl;
- pool->snapc = pi->get_snap_context();
- changed = true;
- } else {
- dout(10) << " pool " << p->first << " removed snaps " << pool->cached_removed_snaps
- << ", unchanged (snap_epoch = " << pi->get_snap_epoch() << ")" << dendl;
- pool->newly_removed_snaps.clear();
- }
- if (changed)
- pool->info = *pi;
- }
-
-
// scan pg creations
hash_map<pg_t, create_pg_info>::iterator n = creating_pgs.begin();
while (n != creating_pgs.end()) {
pg->put(); // since we've taken it out of map
service.unreg_last_pg_scrub(pg->info.pgid, pg->info.history.last_scrub_stamp);
- _put_pool(pg->pool);
}
if (key.length() == 0)
key = op->get_oid().name;
- bool cap = caps.is_capable(pg->pool->name, pg->pool->auid, key,
+ bool cap = caps.is_capable(pg->pool.name, pg->pool.auid, key,
op->may_read(), op->may_write(), op->require_exec_caps());
- dout(20) << "op_has_sufficient_caps pool=" << pg->pool->id << " (" << pg->pool->name
- << ") owner=" << pg->pool->auid
+ dout(20) << "op_has_sufficient_caps pool=" << pg->pool.id << " (" << pg->pool.name
+ << ") owner=" << pg->pool.auid
<< " may_read=" << op->may_read()
<< " may_write=" << op->may_write()
<< " may_exec=" << op->may_exec()
protected:
// -- placement groups --
- map<int, PGPool*> pool_map;
hash_map<pg_t, PG*> pg_map;
map<pg_t, list<OpRequestRef> > waiting_for_pg;
PGRecoveryStats pg_recovery_stats;
- PGPool *_get_pool(int id);
- void _put_pool(PGPool *p);
+ PGPool _get_pool(int id, OSDMapRef createmap);
bool _have_pg(pg_t pgid);
PG *_lookup_lock_pg(pg_t pgid);
return *_dout << pg->gen_prefix();
}
+void PGPool::update(OSDMapRef map)
+{
+ const pg_pool_t *pi = map->get_pg_pool(id);
+ assert(pi);
+ info = *pi;
+ auid = pi->auid;
+ if (pi->get_snap_epoch() == map->get_epoch()) {
+ pi->build_removed_snaps(newly_removed_snaps);
+ newly_removed_snaps.subtract(cached_removed_snaps);
+ cached_removed_snaps.union_of(newly_removed_snaps);
+ snapc = pi->get_snap_context();
+ } else {
+ newly_removed_snaps.clear();
+ }
+}
+
PG::PG(OSDService *o, OSDMapRef curmap,
- PGPool *_pool, pg_t p, const hobject_t& loid, const hobject_t& ioid) :
+ PGPool _pool, pg_t p, const hobject_t& loid, const hobject_t& ioid) :
osd(o), osdmap_ref(curmap), pool(_pool),
_lock("PG::_lock"),
ref(0), deleting(false), dirty_info(false), dirty_log(false),
scrub_reserved(false), scrub_reserve_failed(false),
scrub_waiting_on(0),
active_rep_scrub(0),
- recovery_state(this)
-{
- pool->get();
-}
+ recovery_state(this) {}
void PG::lock(bool no_lockdep)
{
// -- crash recovery?
if (is_primary() &&
- pool->info.crash_replay_interval > 0 &&
+ pool.info.crash_replay_interval > 0 &&
may_need_replay(get_osdmap())) {
replay_until = ceph_clock_now(g_ceph_context);
- replay_until += pool->info.crash_replay_interval;
- dout(10) << "activate starting replay interval for " << pool->info.crash_replay_interval
+ replay_until += pool.info.crash_replay_interval;
+ dout(10) << "activate starting replay interval for " << pool.info.crash_replay_interval
<< " until " << replay_until << dendl;
state_set(PG_STATE_REPLAY);
// initialize snap_trimq
if (is_primary()) {
- snap_trimq = pool->cached_removed_snaps;
+ snap_trimq = pool.cached_removed_snaps;
snap_trimq.subtract(info.purged_snaps);
dout(10) << "activate - snap_trimq " << snap_trimq << dendl;
if (!snap_trimq.empty() && is_clean())
assert(lastmap == osdmap_ref);
dout(10) << "handle_advance_map " << newup << "/" << newacting << dendl;
osdmap_ref = osdmap;
+ pool.update(osdmap);
AdvMap evt(osdmap, lastmap, newup, newacting);
recovery_state.handle_event(evt, rctx);
}
{
PG *pg = context< RecoveryMachine >().pg;
dout(10) << "Active advmap" << dendl;
- if (!pg->pool->newly_removed_snaps.empty()) {
- pg->snap_trimq.union_of(pg->pool->newly_removed_snaps);
+ if (!pg->pool.newly_removed_snaps.empty()) {
+ pg->snap_trimq.union_of(pg->pool.newly_removed_snaps);
dout(10) << *pg << " snap_trimq now " << pg->snap_trimq << dendl;
pg->dirty_info = true;
}
struct PGPool {
int id;
- atomic_t nref;
- int num_pg;
string name;
uint64_t auid;
interval_set<snapid_t> newly_removed_snaps; // newly removed in the last epoch
PGPool(int i, const char *_name, uint64_t au) :
- id(i), num_pg(0), auid(au) {
+ id(i), auid(au) {
if (_name)
name = _name;
}
- void get() { nref.inc(); }
- void put() {
- if (nref.dec() == 0)
- delete this;
- }
+ void update(OSDMapRef map);
};
-
/** PG - Replica Placement Group
*
*/
protected:
OSDService *osd;
OSDMapRef osdmap_ref;
- PGPool *pool;
+ PGPool pool;
OSDMapRef get_osdmap() const {
assert(is_locked());
public:
PG(OSDService *o, OSDMapRef curmap,
- PGPool *_pool, pg_t p, const hobject_t& loid, const hobject_t& ioid);
- virtual ~PG() {
- pool->put();
- }
-
+ PGPool pool, pg_t p, const hobject_t& loid, const hobject_t& ioid);
+ virtual ~PG() {}
+
private:
// Prevent copying
PG(const PG& rhs);
}
ReplicatedPG::ReplicatedPG(OSDService *o, OSDMapRef curmap,
- PGPool *_pool, pg_t p, const hobject_t& oid,
+ PGPool _pool, pg_t p, const hobject_t& oid,
const hobject_t& ioid) :
PG(o, curmap, _pool, p, oid, ioid), temp_created(false),
temp_coll(coll_t::make_temp_coll(p)), snap_trimmer_machine(this)
if (m->may_write()) {
// snap
- if (pool->info.is_pool_snaps_mode()) {
+ if (pool.info.is_pool_snaps_mode()) {
// use pool's snapc
- ctx->snapc = pool->snapc;
+ ctx->snapc = pool.snapc;
} else {
// client specified snapc
ctx->snapc.seq = m->get_snap_seq();
// trim clone's snaps
vector<snapid_t> newsnaps;
for (unsigned i=0; i<snaps.size(); i++)
- if (!pool->info.is_removed_snap(snaps[i]))
+ if (!pool.info.is_removed_snap(snaps[i]))
newsnaps.push_back(snaps[i]);
if (newsnaps.empty()) {
public:
ReplicatedPG(OSDService *o, OSDMapRef curmap,
- PGPool *_pool, pg_t p, const hobject_t& oid,
+ PGPool _pool, pg_t p, const hobject_t& oid,
const hobject_t& ioid);
~ReplicatedPG() {}