/*
* osdmap encoding versions
*/
-#define CEPH_OSDMAP_INC_VERSION 3
-#define CEPH_OSDMAP_VERSION 3
+#define CEPH_OSDMAP_INC_VERSION 4
+#define CEPH_OSDMAP_VERSION 4
/*
* fs id
void OSDMonitor::create_pending()
{
- pending_inc = OSDMap::Incremental(osdmap.epoch+1);
+ pending_inc = OSDMap::Incremental(osdmap.highest_pool_num, osdmap.epoch+1);
pending_inc.fsid = mon->monmap->fsid;
+ pending_inc.highest_pool_num_new = osdmap.highest_pool_num;
dout(10) << "create_pending e " << pending_inc.epoch << dendl;
}
int OSDMonitor::prepare_new_pool(string& name)
{
- int pool = 1;
- int err = 0;
- for (map<int,nstring>::iterator i = osdmap.pool_name.begin();
- i != osdmap.pool_name.end();
- i++) {
- if (i->second == name) {
- err = -EEXIST;
- goto out;
- }
- if (i->first >= pool)
- pool = i->first + 1;
+ if (osdmap.name_pool.count(name)) {
+ return -EEXIST;
}
+ if (-1 == pending_inc.highest_pool_num_new)
+ pending_inc.highest_pool_num_new = osdmap.highest_pool_num;
+ int pool = ++pending_inc.highest_pool_num_new;
pending_inc.new_pools[pool].v.type = CEPH_PG_TYPE_REP;
pending_inc.new_pools[pool].v.size = 2;
pending_inc.new_pools[pool].v.crush_ruleset = 0;
pending_inc.new_pools[pool].v.lpgp_num = 0;
pending_inc.new_pools[pool].v.last_change = pending_inc.epoch;
pending_inc.new_pool_names[pool] = name;
-out:
- return err;
+ return 0;
}
bool OSDMonitor::prepare_command(MMonCommand *m)
p != pool_map.end();
p++) {
const pg_pool_t* pi = osdmap->get_pg_pool(p->first);
+ if (NULL == pi) {
+ dout(10) << " pool " << p->first
+ << " appears to have been deleted" << dendl;
+ continue;
+ }
if (pi->get_snap_epoch() == cur+1) {
PGPool *pool = p->second;
pi->build_removed_snaps(pool->newly_removed_snaps);
it++) {
PG *pg = it->second;
pg->lock();
+ if (!osdmap->have_pg_pool(pg->info.pgid.pool())) {
+ //pool is deleted!
+ queue_pg_for_deletion(pg);
+ pg->unlock();
+ continue;
+ }
if (pg->is_active()) {
// update started counter
if (!pg->info.snap_trimq.empty())
<< m->pg_list.size() << " pgs" << dendl;
if (!require_same_or_newer_map(m, m->get_epoch())) return;
-
+
for (vector<pg_t>::iterator it = m->pg_list.begin();
it != m->pg_list.end();
it++) {
pg_t pgid = *it;
- PG *pg;
-
+
if (pg_map.count(pgid) == 0) {
dout(10) << " don't have pg " << pgid << dendl;
continue;
}
-
- pg = _lookup_lock_pg(pgid);
+ dout(5) << "queue_pg_for_deletion: " << pgid << dendl;
+ PG *pg = _lookup_lock_pg(pgid);
if (pg->info.history.same_acting_since <= m->get_epoch()) {
if (pg->deleting) {
dout(10) << *pg << " already removing." << dendl;
} else {
- dout(10) << *pg << " removing." << dendl;
- assert(pg->get_role() == -1);
assert(pg->get_primary() == m->get_source().num());
- pg->deleting = true;
- remove_wq.queue(pg);
+ queue_pg_for_deletion(pg);
}
} else {
dout(10) << *pg << " ignoring remove request, pg changed in epoch "
- << pg->info.history.same_acting_since << " > " << m->get_epoch() << dendl;
+ << pg->info.history.same_acting_since
+ << " > " << m->get_epoch() << dendl;
}
pg->unlock();
}
-
delete m;
}
+
+void OSD::queue_pg_for_deletion(PG *pg)
+{
+ dout(10) << *pg << " removing." << dendl;
+ assert(pg->get_role() == -1);
+ pg->deleting = true;
+ remove_wq.queue(pg);
+}
+
void OSD::_remove_pg(PG *pg)
{
pg_t pgid = pg->info.pgid;
void handle_pg_trim(class MOSDPGTrim *m);
void handle_pg_remove(class MOSDPGRemove *m);
+ void queue_pg_for_deletion(PG *pg);
void _remove_pg(PG *pg);
// helper for handle_pg_log and handle_pg_info
rulesets[CEPH_DATA_RULE] = "data";
rulesets[CEPH_METADATA_RULE] = "metadata";
rulesets[CEPH_CASDATA_RULE] = "casdata";
+ //If you add new rulesets, you MUST change the default "highest_pool_num"
+ //initialization to match or very bad things WILL happen, like losing pools!
int pool = 0;
for (map<int,const char*>::iterator p = rulesets.begin(); p != rulesets.end(); p++) {
ceph_fsid_t fsid;
epoch_t epoch; // new epoch; we are a diff from epoch-1 to epoch
utime_t modified;
+ int highest_pool_num_new; //incremented by the OSDMonitor on each pool create
int32_t new_flags;
/*
::encode(fsid, bl);
::encode(epoch, bl);
::encode(modified, bl);
+ ::encode(highest_pool_num_new, bl);
::encode(new_flags, bl);
::encode(fullmap, bl);
::encode(crush, bl);
::decode(fsid, p);
::decode(epoch, p);
::decode(modified, p);
+ ::decode(highest_pool_num_new, p);
::decode(new_flags, p);
::decode(fullmap, p);
::decode(crush, p);
::decode(old_blacklist, p);
}
- Incremental(epoch_t e=0) : epoch(e), new_flags(-1), new_max_osd(-1) {
+ Incremental(epoch_t e=0) :
+ epoch(e), highest_pool_num_new(-1), new_flags(-1), new_max_osd(-1) {
memset(&fsid, 0, sizeof(fsid));
}
Incremental(bufferlist &bl) {
ceph_fsid_t fsid;
epoch_t epoch; // what epoch of the osd cluster descriptor is this
utime_t created, modified; // epoch start time
+ int highest_pool_num; //the largest pool num in this epoch
uint32_t flags;
public:
OSDMap() : epoch(0),
+ highest_pool_num(2),
flags(0),
max_osd(0) {
memset(&fsid, 0, sizeof(fsid));
if (inc.new_max_osd >= 0)
set_max_osd(inc.new_max_osd);
+ if (inc.highest_pool_num_new != -1)
+ highest_pool_num = inc.highest_pool_num_new;
for (set<int32_t>::iterator p = inc.old_pools.begin();
p != inc.old_pools.end();
p++) {
max_pools = pools.rbegin()->first + 1;
::encode(max_pools, bl);
::encode(pools, bl);
+ ::encode(highest_pool_num, bl);
::encode(flags, bl);
int32_t max_pools;
::decode(max_pools, p);
::decode(pools, p);
+ ::decode(highest_pool_num, p);
::decode(flags, p);