From: Sage Weil Date: Sun, 23 Oct 2011 23:26:35 +0000 (-0700) Subject: Merge remote-tracking branch 'gh/master' into n X-Git-Tag: v0.38~57^2~2^2~9 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=8bb8e85d041c0781e43dfa67b2db5cf8cf8d83c5;p=ceph.git Merge remote-tracking branch 'gh/master' into n Conflicts: src/osd/OSDMap.h --- 8bb8e85d041c0781e43dfa67b2db5cf8cf8d83c5 diff --cc src/osd/OSDMap.cc index 80f3e023ff311,398cb0fd884f4..6ce35f3c366ad --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@@ -18,633 -18,16 +18,633 @@@ #include "common/Formatter.h" +// ---------------------------------- +// osd_info_t + void osd_info_t::dump(Formatter *f) const { - f->dump_int("last_clean_first", last_clean_first); - f->dump_int("last_clean_last", last_clean_last); + f->dump_int("last_clean_begin", last_clean_begin); + f->dump_int("last_clean_end", last_clean_end); f->dump_int("up_from", up_from); f->dump_int("up_thru", up_thru); f->dump_int("down_at", down_at); f->dump_int("lost_at", lost_at); } +void osd_info_t::encode(bufferlist& bl) const +{ + __u8 struct_v = 1; + ::encode(struct_v, bl); - ::encode(last_clean_first, bl); - ::encode(last_clean_last, bl); ++ ::encode(last_clean_begin, bl); ++ ::encode(last_clean_end, bl); + ::encode(up_from, bl); + ::encode(up_thru, bl); + ::encode(down_at, bl); + ::encode(lost_at, bl); +} + +void osd_info_t::decode(bufferlist::iterator& bl) +{ + __u8 struct_v; + ::decode(struct_v, bl); - ::decode(last_clean_first, bl); - ::decode(last_clean_last, bl); ++ ::decode(last_clean_begin, bl); ++ ::decode(last_clean_end, bl); + ::decode(up_from, bl); + ::decode(up_thru, bl); + ::decode(down_at, bl); + ::decode(lost_at, bl); +} + +ostream& operator<<(ostream& out, const osd_info_t& info) +{ + out << "up_from " << info.up_from + << " up_thru " << info.up_thru + << " down_at " << info.down_at - << " last_clean_interval " << info.last_clean_first << "-" << info.last_clean_last; ++ << " last_clean_interval [" << info.last_clean_begin << "," << info.last_clean_end << ")"; + if (info.lost_at) + out << " lost_at " << info.lost_at; + return out; +} + + +// ---------------------------------- +// OSDMap::Incremental + +void OSDMap::Incremental::encode_client_old(bufferlist& bl) const +{ + __u16 v = 5; + ::encode(v, bl); + ::encode(fsid, bl); + ::encode(epoch, bl); + ::encode(modified, bl); + int32_t new_t = new_pool_max; + ::encode(new_t, bl); + ::encode(new_flags, bl); + ::encode(fullmap, bl); + ::encode(crush, bl); + + ::encode(new_max_osd, bl); + // for ::encode(new_pools, bl); + __u32 n = new_pools.size(); + ::encode(n, bl); + for (map::const_iterator p = new_pools.begin(); + p != new_pools.end(); + ++p) { + n = p->first; + ::encode(n, bl); + ::encode(p->second, bl, 0); + } + // for ::encode(new_pool_names, bl); + n = new_pool_names.size(); + ::encode(n, bl); + for (map::const_iterator p = new_pool_names.begin(); p != new_pool_names.end(); ++p) { + n = p->first; + ::encode(n, bl); + ::encode(p->second, bl); + } + // for ::encode(old_pools, bl); + n = old_pools.size(); + ::encode(n, bl); + for (set::iterator p = old_pools.begin(); p != old_pools.end(); ++p) { + n = *p; + ::encode(n, bl); + } + ::encode(new_up_client, bl); + ::encode(new_state, bl); + ::encode(new_weight, bl); + // for ::encode(new_pg_temp, bl); + n = new_pg_temp.size(); + ::encode(n, bl); + for (map >::const_iterator p = new_pg_temp.begin(); + p != new_pg_temp.end(); + ++p) { + old_pg_t opg = p->first.get_old_pg(); + ::encode(opg, bl); + ::encode(p->second, bl); + } +} + +void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const +{ + if ((features & CEPH_FEATURE_PGID64) == 0) { + encode_client_old(bl); + return; + } + + // base + __u16 v = 6; + ::encode(v, bl); + ::encode(fsid, bl); + ::encode(epoch, bl); + ::encode(modified, bl); + ::encode(new_pool_max, bl); + ::encode(new_flags, bl); + ::encode(fullmap, bl); + ::encode(crush, bl); + + ::encode(new_max_osd, bl); + ::encode(new_pools, bl, features); + ::encode(new_pool_names, bl); + ::encode(old_pools, bl); + ::encode(new_up_client, bl); + ::encode(new_state, bl); + ::encode(new_weight, bl); + ::encode(new_pg_temp, bl); + + // extended + __u16 ev = CEPH_OSDMAP_INC_VERSION_EXT; + ::encode(ev, bl); + ::encode(new_hb_up, bl); + ::encode(new_up_thru, bl); + ::encode(new_last_clean_interval, bl); + ::encode(new_lost, bl); + ::encode(new_blacklist, bl); + ::encode(old_blacklist, bl); + ::encode(new_up_internal, bl); + ::encode(cluster_snapshot, bl); +} + +void OSDMap::Incremental::decode(bufferlist::iterator &p) +{ + __u32 n, t; + // base + __u16 v; + ::decode(v, p); + ::decode(fsid, p); + ::decode(epoch, p); + ::decode(modified, p); + if (v == 4 || v == 5) { + ::decode(n, p); + new_pool_max = n; + } else if (v >= 6) + ::decode(new_pool_max, p); + ::decode(new_flags, p); + ::decode(fullmap, p); + ::decode(crush, p); + + ::decode(new_max_osd, p); + if (v < 6) { + new_pools.clear(); + ::decode(n, p); + while (n--) { + ::decode(t, p); + ::decode(new_pools[t], p); + } + } else { + ::decode(new_pools, p); + } + if (v == 5) { + new_pool_names.clear(); + ::decode(n, p); + while (n--) { + ::decode(t, p); + ::decode(new_pool_names[t], p); + } + } else if (v >= 6) { + ::decode(new_pool_names, p); + } + if (v < 6) { + old_pools.clear(); + ::decode(n, p); + while (n--) { + ::decode(t, p); + old_pools.insert(t); + } + } else { + ::decode(old_pools, p); + } + ::decode(new_up_client, p); + ::decode(new_state, p); + ::decode(new_weight, p); + + if (v < 6) { + new_pg_temp.clear(); + ::decode(n, p); + while (n--) { + old_pg_t opg; + ::decode_raw(opg, p); + ::decode(new_pg_temp[pg_t(opg)], p); + } + } else { + ::decode(new_pg_temp, p); + } + + // extended + __u16 ev = 0; + if (v >= 5) + ::decode(ev, p); + ::decode(new_hb_up, p); + if (v < 5) + ::decode(new_pool_names, p); + ::decode(new_up_thru, p); + ::decode(new_last_clean_interval, p); + ::decode(new_lost, p); + ::decode(new_blacklist, p); + ::decode(old_blacklist, p); + if (ev >= 6) + ::decode(new_up_internal, p); + if (ev >= 7) + ::decode(cluster_snapshot, p); +} + + +// ---------------------------------- +// OSDMap + +bool OSDMap::is_blacklisted(const entity_addr_t& a) +{ + if (blacklist.empty()) + return false; + + // this specific instance? + if (blacklist.count(a)) + return true; + + // is entire ip blacklisted? + entity_addr_t b = a; + b.set_port(0); + b.set_nonce(0); + return blacklist.count(b); +} + +void OSDMap::set_max_osd(int m) +{ + int o = max_osd; + max_osd = m; + osd_state.resize(m); + osd_weight.resize(m); + for (; o& weights, Incremental& inc) const +{ + float max = 0; + for (map::const_iterator p = weights.begin(); + p != weights.end(); ++p) { + if (p->second > max) + max = p->second; + } + + for (map::const_iterator p = weights.begin(); + p != weights.end(); ++p) { + inc.new_weight[p->first] = (unsigned)((p->second / max) * CEPH_OSD_IN); + } +} + +int OSDMap::apply_incremental(Incremental &inc) +{ + if (inc.epoch == 1) + fsid = inc.fsid; + else + if (ceph_fsid_compare(&inc.fsid, &fsid) != 0) { + return -EINVAL; + } + assert(inc.epoch == epoch+1); + epoch++; + modified = inc.modified; + + // full map? + if (inc.fullmap.length()) { + decode(inc.fullmap); + return 0; + } + + // nope, incremental. + if (inc.new_flags >= 0) + flags = inc.new_flags; + + if (inc.new_max_osd >= 0) + set_max_osd(inc.new_max_osd); + + if (inc.new_pool_max != -1) + pool_max = inc.new_pool_max; + + for (set::iterator p = inc.old_pools.begin(); + p != inc.old_pools.end(); + p++) { + pools.erase(*p); + name_pool.erase(pool_name[*p]); + pool_name.erase(*p); + } + for (map::iterator p = inc.new_pools.begin(); + p != inc.new_pools.end(); + p++) { + pools[p->first] = p->second; + pools[p->first].last_change = epoch; + } + for (map::iterator p = inc.new_pool_names.begin(); + p != inc.new_pool_names.end(); + p++) { + pool_name[p->first] = p->second; + name_pool[p->second] = p->first; + } + + for (map::iterator i = inc.new_weight.begin(); + i != inc.new_weight.end(); + i++) + set_weight(i->first, i->second); + + // up/down + for (map::iterator i = inc.new_state.begin(); + i != inc.new_state.end(); + i++) { + int s = i->second ? i->second : CEPH_OSD_UP; + if ((osd_state[i->first] & CEPH_OSD_UP) && + (s & CEPH_OSD_UP)) { + osd_info[i->first].down_at = epoch; + } + osd_state[i->first] ^= s; + } + for (map::iterator i = inc.new_up_client.begin(); + i != inc.new_up_client.end(); + i++) { + osd_state[i->first] |= CEPH_OSD_EXISTS | CEPH_OSD_UP; + osd_addr[i->first] = i->second; + if (inc.new_hb_up.empty()) + osd_hb_addr[i->first] = i->second; //this is a backward-compatibility hack + else + osd_hb_addr[i->first] = inc.new_hb_up[i->first]; + osd_info[i->first].up_from = epoch; + } + for (map::iterator i = inc.new_up_internal.begin(); + i != inc.new_up_internal.end(); + i++) + osd_cluster_addr[i->first] = i->second; + // info + for (map::iterator i = inc.new_up_thru.begin(); + i != inc.new_up_thru.end(); + i++) + osd_info[i->first].up_thru = i->second; + for (map >::iterator i = inc.new_last_clean_interval.begin(); + i != inc.new_last_clean_interval.end(); + i++) { - osd_info[i->first].last_clean_first = i->second.first; - osd_info[i->first].last_clean_last = i->second.second; ++ osd_info[i->first].last_clean_begin = i->second.first; ++ osd_info[i->first].last_clean_end = i->second.second; + } + for (map::iterator p = inc.new_lost.begin(); p != inc.new_lost.end(); p++) + osd_info[p->first].lost_at = p->second; + + // pg rebuild + for (map >::iterator p = inc.new_pg_temp.begin(); p != inc.new_pg_temp.end(); p++) { + if (p->second.empty()) + pg_temp.erase(p->first); + else + pg_temp[p->first] = p->second; + } + + // blacklist + for (map::iterator p = inc.new_blacklist.begin(); + p != inc.new_blacklist.end(); + p++) + blacklist[p->first] = p->second; + for (vector::iterator p = inc.old_blacklist.begin(); + p != inc.old_blacklist.end(); + p++) + blacklist.erase(*p); + + // cluster snapshot? + if (inc.cluster_snapshot.length()) { + cluster_snapshot = inc.cluster_snapshot; + cluster_snapshot_epoch = inc.epoch; + } else { + cluster_snapshot.clear(); + cluster_snapshot_epoch = 0; + } + + // do new crush map last (after up/down stuff) + if (inc.crush.length()) { + bufferlist::iterator blp = inc.crush.begin(); + crush.decode(blp); + } + + calc_num_osds(); + return 0; +} + +// serialize, unserialize +void OSDMap::encode_client_old(bufferlist& bl) const +{ + __u16 v = 5; + ::encode(v, bl); + + // base + ::encode(fsid, bl); + ::encode(epoch, bl); + ::encode(created, bl); + ::encode(modified, bl); + + // for ::encode(pools, bl); + __u32 n = pools.size(); + ::encode(n, bl); + for (map::const_iterator p = pools.begin(); + p != pools.end(); + ++p) { + n = p->first; + ::encode(n, bl); + ::encode(p->second, bl, 0); + } + // for ::encode(pool_name, bl); + n = pool_name.size(); + ::encode(n, bl); + for (map::const_iterator p = pool_name.begin(); + p != pool_name.end(); + ++p) { + n = p->first; + ::encode(n, bl); + ::encode(p->second, bl); + } + // for ::encode(pool_max, bl); + n = pool_max; + ::encode(n, bl); + + ::encode(flags, bl); + + ::encode(max_osd, bl); + ::encode(osd_state, bl); + ::encode(osd_weight, bl); + ::encode(osd_addr, bl); + + // for ::encode(pg_temp, bl); + n = pg_temp.size(); + ::encode(n, bl); + for (map >::const_iterator p = pg_temp.begin(); + p != pg_temp.end(); + ++p) { + old_pg_t opg = p->first.get_old_pg(); + ::encode(opg, bl); + ::encode(p->second, bl); + } + + // crush + bufferlist cbl; + crush.encode(cbl); + ::encode(cbl, bl); +} + +void OSDMap::encode(bufferlist& bl, uint64_t features) const +{ + if ((features & CEPH_FEATURE_PGID64) == 0) { + encode_client_old(bl); + return; + } + + __u16 v = 6; + ::encode(v, bl); + + // base + ::encode(fsid, bl); + ::encode(epoch, bl); + ::encode(created, bl); + ::encode(modified, bl); + + ::encode(pools, bl, features); + ::encode(pool_name, bl); + ::encode(pool_max, bl); + + ::encode(flags, bl); + + ::encode(max_osd, bl); + ::encode(osd_state, bl); + ::encode(osd_weight, bl); + ::encode(osd_addr, bl); + + ::encode(pg_temp, bl); + + // crush + bufferlist cbl; + crush.encode(cbl); + ::encode(cbl, bl); + + // extended + __u16 ev = CEPH_OSDMAP_VERSION_EXT; + ::encode(ev, bl); + ::encode(osd_hb_addr, bl); + ::encode(osd_info, bl); + ::encode(blacklist, bl); + ::encode(osd_cluster_addr, bl); + ::encode(cluster_snapshot_epoch, bl); + ::encode(cluster_snapshot, bl); +} + +void OSDMap::decode(bufferlist& bl) +{ + __u32 n, t; + bufferlist::iterator p = bl.begin(); + __u16 v; + ::decode(v, p); + + // base + ::decode(fsid, p); + ::decode(epoch, p); + ::decode(created, p); + ::decode(modified, p); + + int32_t max_pools = 0; + if (v < 4) { + ::decode(max_pools, p); + } + if (v < 6) { + pools.clear(); + ::decode(n, p); + while (n--) { + ::decode(t, p); + ::decode(pools[t], p); + } + } else { + ::decode(pools, p); + } + if (v == 5) { + pool_name.clear(); + ::decode(n, p); + while (n--) { + ::decode(t, p); + ::decode(pool_name[t], p); + } + } else if (v >= 6) { + ::decode(pool_name, p); + } + if (v == 4 || v == 5) { + ::decode(n, p); + pool_max = n; + } else if (v >= 6) { + ::decode(pool_max, p); + } else { + pool_max = max_pools; + } + + ::decode(flags, p); + + ::decode(max_osd, p); + ::decode(osd_state, p); + ::decode(osd_weight, p); + ::decode(osd_addr, p); + if (v <= 5) { + pg_temp.clear(); + ::decode(n, p); + while (n--) { + old_pg_t opg; + ::decode_raw(opg, p); + ::decode(pg_temp[pg_t(opg)], p); + } + } else { + ::decode(pg_temp, p); + } + + // crush + bufferlist cbl; + ::decode(cbl, p); + bufferlist::iterator cblp = cbl.begin(); + crush.decode(cblp); + + // extended + __u16 ev = 0; + if (v >= 5) + ::decode(ev, p); + ::decode(osd_hb_addr, p); + ::decode(osd_info, p); + if (v < 5) + ::decode(pool_name, p); + + ::decode(blacklist, p); + if (ev >= 6) + ::decode(osd_cluster_addr, p); + else + osd_cluster_addr.resize(osd_addr.size()); + + if (ev >= 7) { + ::decode(cluster_snapshot_epoch, p); + ::decode(cluster_snapshot, p); + } + + // index pool names + name_pool.clear(); + for (map::iterator i = pool_name.begin(); i != pool_name.end(); i++) + name_pool[i->second] = i->first; + + calc_num_osds(); +} + + + void OSDMap::dump_json(ostream& out) const { JSONFormatter jsf(true);