mkfs
-- pg splitting!
-- ratchet up pgp_num as pg creation succeeds
+- increase pg_num when pg_num > pgp_num
code cleanup
- userspace encoding/decoding needs major cleanup
int osds[10];
int nr_osds;
int i;
-
+ int pps; /* placement ps */
+
dout(30, "send_request %p\n", req);
ruleno = crush_find_rule(osdc->osdmap->crush, req->r_pgid.pg.pool,
req->r_pgid.pg.type, req->r_pgid.pg.size);
BUG_ON(ruleno < 0); /* fixme, need some proper error handling here */
dout(30, "using crush rule %d\n", ruleno);
- nr_osds = crush_do_rule(osdc->osdmap->crush, ruleno,
- req->r_pgid.pg.ps, osds, req->r_pgid.pg.size,
- req->r_pgid.pg.preferred);
+ if (req->r_pgid.pg.preferred >= 0)
+ pps = ceph_stable_mod(req->r_pgid.pg.ps,
+ osdc->osdmap->lpgp_num,
+ osdc->osdmap->lpgp_num_mask);
+ else
+ pps = ceph_stable_mod(req->r_pgid.pg.ps,
+ osdc->osdmap->pgp_num,
+ osdc->osdmap->pgp_num_mask);
+ nr_osds = crush_do_rule(osdc->osdmap->crush, ruleno, pps, osds,
+ req->r_pgid.pg.size, req->r_pgid.pg.preferred);
for (i=0; i<nr_osds; i++) {
if (ceph_osd_is_up(osdc->osdmap, osds[i]))
break;
}
else if (m->cmd[1] == "setpgnum" && m->cmd.size() > 2) {
int n = atoi(m->cmd[2].c_str());
- if (n > osdmap.get_pg_num() &&
- osdmap.get_pg_num() == osdmap.get_pgp_num()) {
+ if (n < osdmap.get_pg_num()) {
+ ss << "specified pg_num " << n << " < current " << osdmap.get_pg_num();
+ } else if (osdmap.get_pg_num() != osdmap.get_pgp_num()) {
+ ss << "current pg_num " << osdmap.get_pg_num() << " > " << osdmap.get_pgp_num()
+ << ", increase pgp_num first";
+ } else if (!mon->pgmon->pg_map.creating_pgs.empty()) {
+ ss << "currently creating pgs, wait";
+ } else {
ss << "set new pg_num = " << n;
pending_inc.new_pg_num = n;
getline(ss, rs);
paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs));
return true;
+ }
+ getline(ss, rs);
+ mon->reply_command(m, -EINVAL, rs);
+ }
+ else if (m->cmd[1] == "setpgpnum" && m->cmd.size() > 2) {
+ int n = atoi(m->cmd[2].c_str());
+ if (n <= osdmap.get_pgp_num()) {
+ ss << "specified pgp_num " << n << " <= current " << osdmap.get_pgp_num();
+ } else if (n > osdmap.get_pg_num()) {
+ ss << "specified pgp_num " << n << " > pg_num " << osdmap.get_pg_num();
+ } else if (!mon->pgmon->pg_map.creating_pgs.empty()) {
+ ss << "still creating pgs, wait";
} else {
- ss << "specified pg_num " << n << " < current " << osdmap.get_pg_num();
- ss << " or pg_num " << osdmap.get_pg_num() << " > pgp_num " << osdmap.get_pgp_num();
+ ss << "set new pgp_num = " << n;
+ pending_inc.new_pgp_num = n;
getline(ss, rs);
- mon->reply_command(m, -EINVAL, rs);
+ paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs));
+ return true;
}
+ getline(ss, rs);
+ mon->reply_command(m, -EINVAL, rs);
}
else if (m->cmd[1] == "down" && m->cmd.size() > 2) {
errno = 0;
long osd = strtol(m->cmd[2].c_str(), 0, 10);
- if (osdmap.is_up(osd)) {
+ if (osdmap.is_down(osd)) {
+ ss << "osd" << osd << " is already down";
+ } else {
pending_inc.new_down[osd] = false;
ss << "marked down osd" << osd;
getline(ss, rs);
paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs));
return true;
- } else {
- ss << "osd" << osd << " is already down";
- getline(ss, rs);
- mon->reply_command(m, -EINVAL, rs);
}
+ getline(ss, rs);
+ mon->reply_command(m, -EINVAL, rs);
}
else if (m->cmd[1] == "out" && m->cmd.size() > 2) {
errno = 0;
long osd = strtol(m->cmd[2].c_str(), 0, 10);
- if (osdmap.is_in(osd)) {
+ if (osdmap.is_out(osd)) {
+ ss << "osd" << osd << " is already out";
+ } else {
pending_inc.new_offload[osd] = CEPH_OSD_OUT;
ss << "marked out osd" << osd;
getline(ss, rs);
paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs));
return true;
- } else {
- ss << "osd" << osd << " is already out";
- getline(ss, rs);
- mon->reply_command(m, -EINVAL, rs);
- }
+ }
+ getline(ss, rs);
+ mon->reply_command(m, -EINVAL, rs);
}
else if (m->cmd[1] == "in" && m->cmd.size() > 2) {
errno = 0;
long osd = strtol(m->cmd[2].c_str(), 0, 10);
- if (osdmap.is_out(osd)) {
+ if (osdmap.is_in(osd)) {
+ ss << "osd" << osd << " is already in";
+ } else {
pending_inc.new_offload[osd] = CEPH_OSD_IN;
ss << "marked in osd" << osd;
getline(ss, rs);
paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs));
return true;
- } else {
- ss << "osd" << osd << " is already in";
- getline(ss, rs);
- mon->reply_command(m, -EINVAL, rs);
- }
+ }
+ getline(ss, rs);
+ mon->reply_command(m, -EINVAL, rs);
}
}
return false;
parent.u.pg.ps &= ~(1<<(msb-1));
dout(10) << " is " << pgid << " parent " << parent << " ?" << dendl;
if (parent.u.pg.ps < mon->osdmon->osdmap.get_pgp_num()) {
+ //if (pg_map.pg_stat.count(parent) &&
+ //pg_map.pg_stat[parent].state != PG_STATE_CREATING) {
dout(10) << " parent is " << parent << dendl;
break;
}
class PGMonitor : public PaxosService {
public:
+ PGMap pg_map;
private:
- PGMap pg_map;
PGMap::Incremental pending_inc;
void create_initial();
if (role != 0) {
dout(10) << " no longer primary for " << pgid << ", stopping creation" << dendl;
creating_pgs.erase(p);
+ } else {
+ /*
+ * adding new ppl to our pg has no effect, since we're still primary,
+ * and obviously haven't given the new nodes any data.
+ */
+ p->second.acting.swap(acting); // keep the latest
}
- p->second.acting.swap(acting); // keep the latest
- /*
- * adding new ppl to our pg has no effect, since we're still primary,
- * and obviously haven't given the new nodes any data.
- */
}
// scan existing pg's
created++;
}
store->apply_transaction(t);
+
+ // remove from queue
+ pg_split_ready.erase(p);
}
do_queries(query_map);
// pg -> (osd list)
- int pg_to_osds(pg_t actual_pg, vector<int>& osds) {
+ int pg_to_osds(pg_t pg, vector<int>& osds) {
// map to osds[]
- pg_t pg = actual_pg; // effective pgid for placement calc
+ ps_t pps; // placement ps
if (pg.preferred() >= 0)
- pg.u.pg.ps = ceph_stable_mod(pg.u.pg.ps, pgp_num, pgp_num_mask);
+ pps = ceph_stable_mod(pg.ps(), lpgp_num, lpgp_num_mask);
else
- pg.u.pg.ps = ceph_stable_mod(pg.u.pg.ps, lpgp_num, lpgp_num_mask);
+ pps = ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask);
switch (g_conf.osd_pg_layout) {
case CEPH_PG_LAYOUT_CRUSH:
// what crush rule?
int ruleno = crush.find_rule(pg.pool(), pg.type(), pg.size());
if (ruleno >= 0)
- crush.do_rule(ruleno, pg.ps(), osds, pg.size(), pg.preferred());
+ crush.do_rule(ruleno, pps, osds, pg.size(), pg.preferred());
}
break;
case CEPH_PG_LAYOUT_LINEAR:
for (int i=0; i<pg.size(); i++)
- osds.push_back( (i + pg.ps()*pg.size()) % g_conf.num_osd );
+ osds.push_back( (i + pps*pg.size()) % g_conf.num_osd );
break;
case CEPH_PG_LAYOUT_HYBRID:
{
- int h = crush_hash32(pg.ps());
+ int h = crush_hash32(pps);
for (int i=0; i<pg.size(); i++)
osds.push_back( (h+i) % g_conf.num_osd );
}
int t = 1;
int osd = 0;
while (t++) {
- osd = crush_hash32_3(i, pg.ps(), t) % g_conf.num_osd;
+ osd = crush_hash32_3(i, pps, t) % g_conf.num_osd;
int j = 0;
for (; j<i; j++)
if (osds[j] == osd) break;