struct creating_pgs_t {
epoch_t last_scan_epoch = 0;
+
+ /// pgs we are currently creating
std::map<pg_t, std::pair<epoch_t, utime_t> > pgs;
- std::set<int64_t> created_pools;
- unsigned create_pool(int64_t poolid, uint32_t pg_num,
- epoch_t created, utime_t modified) {
- if (created_pools.count(poolid)) {
- return 0;
+ struct create_info {
+ epoch_t created;
+ utime_t modified;
+ uint64_t start = 0;
+ uint64_t end = 0;
+ bool done() const {
+ return start >= end;
}
- const unsigned total = pgs.size();
- for (ps_t ps = 0; ps < pg_num; ps++) {
- const pg_t pgid{ps, static_cast<uint64_t>(poolid)};
- if (pgs.count(pgid)) {
- continue;
- }
- pgs.emplace(pgid, make_pair(created, modified));
+ void encode(bufferlist& bl) const {
+ ::encode(created, bl);
+ ::encode(modified, bl);
+ ::encode(start, bl);
+ ::encode(end, bl);
}
- return pgs.size() - total;
- }
+ void decode(bufferlist::iterator& p) {
+ ::decode(created, p);
+ ::decode(modified, p);
+ ::decode(start, p);
+ ::decode(end, p);
+ }
+ };
+
+ /// queue of pgs we still need to create (poolid -> <created, set of ps>)
+ map<int64_t,create_info> queue;
+ /// pools that exist in the osdmap for which at least one pg has been created
+ std::set<int64_t> created_pools;
+
+ void create_pool(int64_t poolid, uint32_t pg_num,
+ epoch_t created, utime_t modified) {
+ if (created_pools.count(poolid) == 0) {
+ auto& c = queue[poolid];
+ c.created = created;
+ c.modified = modified;
+ c.end = pg_num;
+ created_pools.insert(poolid);
+ }
+ }
unsigned remove_pool(int64_t removed_pool) {
const unsigned total = pgs.size();
auto first = pgs.lower_bound(pg_t{0, (uint64_t)removed_pool});
return total - pgs.size();
}
void encode(bufferlist& bl) const {
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 1, bl);
::encode(last_scan_epoch, bl);
::encode(pgs, bl);
::encode(created_pools, bl);
+ ::encode(queue, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator& bl) {
- DECODE_START(1, bl);
+ DECODE_START(2, bl);
::decode(last_scan_epoch, bl);
::decode(pgs, bl);
::decode(created_pools, bl);
+ if (struct_v >= 2)
+ ::decode(queue, bl);
DECODE_FINISH(bl);
}
void dump(ceph::Formatter *f) const {
- f->open_object_section("creating_pgs");
f->dump_unsigned("last_scan_epoch", last_scan_epoch);
+ f->open_array_section("creating_pgs");
for (auto& pg : pgs) {
f->open_object_section("pg");
f->dump_stream("pgid") << pg.first;
f->close_section();
}
f->close_section();
+ f->open_array_section("queue");
+ for (auto& p : queue) {
+ f->open_object_section("pool");
+ f->dump_unsigned("pool", p.first);
+ f->dump_unsigned("created", p.second.created);
+ f->dump_stream("modified") << p.second.modified;
+ f->dump_unsigned("ps_start", p.second.start);
+ f->dump_unsigned("ps_end", p.second.end);
+ f->close_section();
+ }
+ f->close_section();
f->open_array_section("created_pools");
for (auto pool : created_pools) {
f->dump_unsigned("pool", pool);
o.push_back(c);
}
};
+WRITE_CLASS_ENCODER(creating_pgs_t::create_info);
WRITE_CLASS_ENCODER(creating_pgs_t);
creating_pgs_t
OSDMonitor::update_pending_pgs(const OSDMap::Incremental& inc)
{
+ dout(10) << __func__ << dendl;
creating_pgs_t pending_creatings;
{
std::lock_guard<std::mutex> l(creating_pgs_lock);
pending_creatings = creating_pgs;
}
- if (pending_creatings.last_scan_epoch > inc.epoch) {
- return pending_creatings;
- }
- if (osdmap.get_epoch() &&
- !osdmap.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) {
- const unsigned total = pending_creatings.pgs.size();
- mon->pgservice->maybe_add_creating_pgs(creating_pgs.last_scan_epoch,
- &pending_creatings);
- dout(7) << __func__ << total - pending_creatings.pgs.size()
- << " pgs added from pgmap" << dendl;
- }
- unsigned added = 0;
- added += scan_for_creating_pgs(osdmap.get_pools(),
- inc.old_pools,
- inc.modified,
- &pending_creatings);
- added += scan_for_creating_pgs(inc.new_pools,
- inc.old_pools,
- inc.modified,
- &pending_creatings);
- dout(10) << __func__ << added << " pg added for new pools" << dendl;
- for (auto deleted_pool : inc.old_pools) {
- auto removed = pending_creatings.remove_pool(deleted_pool);
- dout(10) << __func__ << removed
- << " pg removed because containing pool deleted: "
- << deleted_pool << dendl;
- }
- // pgmon updates its creating_pgs in check_osd_map() which is called by
- // on_active() and check_osd_map() could be delayed if lease expires, so its
- // creating_pgs could be stale in comparison with the one of osdmon. let's
- // trim them here. otherwise, they will be added back after being erased.
- unsigned removed = 0;
- for (auto& pg : pending_created_pgs) {
- pending_creatings.created_pools.insert(pg.pool());
- removed += pending_creatings.pgs.erase(pg);
- }
- pending_created_pgs.clear();
- dout(10) << __func__ << removed << " pgs removed because they're created"
- << dendl;
- pending_creatings.last_scan_epoch = osdmap.get_epoch();
+ // check for new or old pools
+ if (pending_creatings.last_scan_epoch < inc.epoch) {
+ if (osdmap.get_epoch() &&
+ osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS) {
+ const unsigned total = pending_creatings.pgs.size();
+ mon->pgservice->maybe_add_creating_pgs(creating_pgs.last_scan_epoch,
+ &pending_creatings);
+ dout(7) << __func__ << total - pending_creatings.pgs.size()
+ << " pgs added from pgmap" << dendl;
+ }
+ scan_for_creating_pgs(osdmap.get_pools(),
+ inc.old_pools,
+ inc.modified,
+ &pending_creatings);
+ scan_for_creating_pgs(inc.new_pools,
+ inc.old_pools,
+ inc.modified,
+ &pending_creatings);
+ for (auto deleted_pool : inc.old_pools) {
+ auto removed = pending_creatings.remove_pool(deleted_pool);
+ dout(10) << __func__ << removed
+ << " pg removed because containing pool deleted: "
+ << deleted_pool << dendl;
+ last_epoch_clean.remove_pool(deleted_pool);
+ }
+ // pgmon updates its creating_pgs in check_osd_map() which is called by
+ // on_active() and check_osd_map() could be delayed if lease expires, so its
+ // creating_pgs could be stale in comparison with the one of osdmon. let's
+ // trim them here. otherwise, they will be added back after being erased.
+ unsigned removed = 0;
+ for (auto& pg : pending_created_pgs) {
+ pending_creatings.created_pools.insert(pg.pool());
+ removed += pending_creatings.pgs.erase(pg);
+ }
+ pending_created_pgs.clear();
+ dout(10) << __func__ << removed << " pgs removed because they're created"
+ << dendl;
+ pending_creatings.last_scan_epoch = osdmap.get_epoch();
+ }
+
+ // process queue
+ unsigned max = MAX(1, g_conf->mon_osd_max_creating_pgs);
+ while (pending_creatings.pgs.size() < max &&
+ !pending_creatings.queue.empty()) {
+ auto p = pending_creatings.queue.begin();
+ int64_t poolid = p->first;
+ dout(10) << __func__ << " pool " << poolid
+ << " created " << p->second.created
+ << " modified " << p->second.modified
+ << " [" << p->second.start << "-" << p->second.end << ")"
+ << dendl;
+ int n = MIN(max - pending_creatings.pgs.size(),
+ p->second.end - p->second.start);
+ ps_t first = p->second.start;
+ ps_t end = first + n;
+ for (ps_t ps = first; ps < end; ++ps) {
+ const pg_t pgid{ps, static_cast<uint64_t>(poolid)};
+ pending_creatings.pgs.emplace(pgid, make_pair(p->second.created,
+ p->second.modified));
+ dout(10) << __func__ << " adding " << pgid << dendl;
+ }
+ p->second.start = end;
+ if (p->second.done()) {
+ dout(10) << __func__ << " done with queue for " << poolid << dendl;
+ pending_creatings.queue.erase(p);
+ } else {
+ dout(10) << __func__ << " pool " << poolid
+ << " now [" << p->second.start << "-" << p->second.end << ")"
+ << dendl;
+ }
+ }
+ dout(10) << __func__ << " queue remaining: " << pending_creatings.queue.size()
+ << " pools" << dendl;
+
return pending_creatings;
}
dout(20) << __func__ << " pgmap shows " << p->first << " is created"
<< dendl;
p = creating_pgs->pgs.erase(p);
- creating_pgs->created_pools.insert(q->first.pool());
} else {
++p;
}
}
}
-unsigned OSDMonitor::scan_for_creating_pgs(
+void OSDMonitor::scan_for_creating_pgs(
const mempool::osdmap::map<int64_t,pg_pool_t>& pools,
const mempool::osdmap::set<int64_t>& removed_pools,
utime_t modified,
creating_pgs_t* creating_pgs) const
{
- unsigned total = 0;
for (auto& p : pools) {
int64_t poolid = p.first;
const pg_pool_t& pool = p.second;
<< " " << pool << dendl;
continue;
}
- unsigned added = creating_pgs->create_pool(poolid, pool.get_pg_num(),
- created, modified);
- dout(10) << __func__ << added << " pgs added for pool "<< poolid
+ dout(10) << __func__ << " queueing pool create for " << poolid
<< " " << pool << dendl;
- total += added;
+ creating_pgs->create_pool(poolid, pool.get_pg_num(), created, modified);
}
- return total;
}
void OSDMonitor::update_creating_pgs()
{
+ dout(10) << __func__ << " " << creating_pgs.pgs.size() << " pgs creating, "
+ << creating_pgs.queue.size() << " pools in queue" << dendl;
decltype(creating_pgs_by_osd_epoch) new_pgs_by_osd_epoch;
std::lock_guard<std::mutex> l(creating_pgs_lock);
for (auto& pg : creating_pgs.pgs) {
mapped = mapping.get_epoch();
}
break;
- }
+ } else {
+ // newly creating
+ mapped = mapping.get_epoch();
+ }
}
}
dout(10) << __func__ << " will instruct osd." << acting_primary