void usage()
{
- cerr << "usage: cmds [flags] [--mds rank] [--shadow rank]\n";
+ cerr << "usage: cmds name [flags] [--mds rank] [--shadow rank]\n";
cerr << " -m monitorip:port\n";
cerr << " connect to monitor at given address\n";
cerr << " --debug_mds n\n";
common_init(args, "mds");
// mds specific args
- const char *monhost = 0;
int whoami = -1;
int shadow = -1;
+ const char *name = 0;
for (unsigned i=0; i<args.size(); i++) {
if (strcmp(args[i], "--mds") == 0)
whoami = atoi(args[++i]);
else if (strcmp(args[i], "--shadow") == 0)
whoami = shadow = atoi(args[++i]);
- else if (monhost == 0)
- monhost = args[i];
+ else if (!name)
+ name = args[i];
else {
cerr << "unrecognized arg " << args[i] << std::endl;
usage();
return -1;
}
}
+ if (!name)
+ usage();
if (g_conf.clock_tare) g_clock.tare();
rank.start();
// start mds
- MDS *mds = new MDS(whoami, m, &monmap);
+ MDS *mds = new MDS(name, whoami, m, &monmap);
mds->standby_replay_for = shadow;
mds->init();
host = cosd5
btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-3:0:0:0 /dev/disk/by-path/pci-0000:05:01.0-scsi-4:0:0:0 /dev/disk/by-path/pci-0000:05:01.0-scsi-5:0:0:0 /dev/disk/by-path/pci-0000:05:01.0-scsi-6:0:0:0"
-[mds0]
+[mds.foo]
+ host = cosd0
+[mds.bar]
host = cosd0
\ No newline at end of file
#define CEPH_MDS_PROTOCOL 5 /* cluster internal */
#define CEPH_MON_PROTOCOL 4 /* cluster internal */
#define CEPH_OSDC_PROTOCOL 5 /* public/client */
-#define CEPH_MDSC_PROTOCOL 7 /* public/client */
-#define CEPH_MONC_PROTOCOL 6 /* public/client */
+#define CEPH_MDSC_PROTOCOL 8 /* public/client */
+#define CEPH_MONC_PROTOCOL 7 /* public/client */
/*
for name in $what; do
type=`echo $name | cut -c 1-3` # e.g. 'mon', if $item is 'mon1'
- num=`echo $name | cut -c 4-`
+ id=`echo $name | cut -c 4- | sed 's/\\.//'`
+ num=$id
sections="$name $type global"
check_host || continue
fi
if [[ $name =~ "mds" ]]; then
+ get_conf mds "" "mds" $sections
+ get_conf shadow "" "shadow" $sections
module_opt="$mon_addr_arg"
- module_bin="$BINDIR/cmds"
+ module_bin="$BINDIR/cmds $id"
+ [[ $mds != "" ]] && $module_bin="$module_bin --mds $mds"
+ [[ $shadow != "" ]] && $module_bin="$module_bin --shadow $shadow"
fi
if [[ $name =~ "osd" ]]; then
// cons/des
-MDS::MDS(int whoami_, Messenger *m, MonMap *mm) :
+MDS::MDS(const char *n, int whoami_, Messenger *m, MonMap *mm) :
mds_lock("MDS::mds_lock"),
timer(mds_lock),
+ name(n),
whoami(whoami_), incarnation(0),
standby_replay_for(-1),
messenger(m),
beacon_seq_stamp[beacon_last_seq] = g_clock.now();
- messenger->send_message(new MMDSBeacon(monmap->fsid, mdsmap->get_epoch(),
+ messenger->send_message(new MMDSBeacon(monmap->fsid, name, mdsmap->get_epoch(),
want_state, beacon_last_seq, want_rank),
monmap->get_inst(mon));
Mutex mds_lock;
SafeTimer timer;
+ string name;
int whoami;
int incarnation;
private:
virtual bool dispatch_impl(Message *m);
public:
- MDS(int whoami, Messenger *m, MonMap *mm);
+ MDS(const char *n, int whoami, Messenger *m, MonMap *mm);
~MDS();
// who am i etc
for (map<entity_addr_t,mds_info_t>::iterator p = mds_info.begin();
p != mds_info.end();
p++)
- foo.insert(pair<pair<unsigned,unsigned>,entity_addr_t>(pair<unsigned,unsigned>(p->second.mds, p->second.inc-1), p->first));
+ foo.insert(pair<pair<unsigned,unsigned>,entity_addr_t>(pair<unsigned,unsigned>(p->second.rank, p->second.inc-1), p->first));
for (multimap< pair<unsigned,unsigned>, entity_addr_t >::iterator p = foo.begin();
p != foo.end();
mds_info_t& info = mds_info[p->second];
out << info.addr
- << " mds" << info.mds
+ << " '" << info.name << "'"
+ << " mds" << info.rank
<< "." << info.inc
<< " " << get_state_name(info.state)
<< " seq " << info.state_seq;
}
struct mds_info_t {
- int32_t mds;
+ string name;
+ int32_t rank;
int32_t inc;
int32_t state;
version_t state_seq;
entity_addr_t addr;
utime_t laggy_since;
- mds_info_t() : mds(-1), inc(0), state(STATE_STANDBY), state_seq(0) { }
+ mds_info_t() : rank(-1), inc(0), state(STATE_STANDBY), state_seq(0) { }
bool laggy() const { return !(laggy_since == utime_t()); }
void clear_laggy() { laggy_since = utime_t(); }
- entity_inst_t get_inst() const { return entity_inst_t(entity_name_t::MDS(mds), addr); }
+ entity_inst_t get_inst() const { return entity_inst_t(entity_name_t::MDS(rank), addr); }
void encode(bufferlist& bl) const {
- ::encode(mds, bl);
+ ::encode(name, bl);
+ ::encode(rank, bl);
::encode(inc, bl);
::encode(state, bl);
::encode(state_seq, bl);
::encode(laggy_since, bl);
}
void decode(bufferlist::iterator& bl) {
- ::decode(mds, bl);
+ ::decode(name, bl);
+ ::decode(rank, bl);
::decode(inc, bl);
::decode(state, bl);
::decode(state_seq, bl);
p != mds_info.end();
p++)
if (p->second.state >= STATE_REPLAY && p->second.state <= STATE_STOPPING)
- s.insert(p->second.mds);
+ s.insert(p->second.rank);
}
void get_mds_set(set<int>& s, int state) {
for (map<entity_addr_t,mds_info_t>::const_iterator p = mds_info.begin();
p != mds_info.end();
p++)
if (p->second.state == state)
- s.insert(p->second.mds);
+ s.insert(p->second.rank);
}
int get_random_up_mds() {
for (map<entity_addr_t,mds_info_t>::const_iterator p = mds_info.begin();
p != mds_info.end();
p++) {
- if (p->second.mds == mds &&
+ if (p->second.rank == mds &&
p->second.state == MDSMap::STATE_STANDBY &&
!p->second.laggy()) {
a = p->second.addr;
for (map<entity_addr_t,mds_info_t>::const_iterator p = mds_info.begin();
p != mds_info.end();
p++) {
- if (p->second.mds == -1 &&
+ if (p->second.rank == -1 &&
p->second.state == MDSMap::STATE_STANDBY &&
!p->second.laggy()) {
a = p->second.addr;
int get_rank(const entity_addr_t& addr) {
if (mds_info.count(addr))
- return mds_info[addr].mds;
+ return mds_info[addr].rank;
return -1;
}
class MMDSBeacon : public Message {
ceph_fsid_t fsid;
+ string name;
epoch_t last_epoch_seen; // include last mdsmap epoch mds has seen to avoid race with monitor decree
__u32 state;
version_t seq;
public:
MMDSBeacon() : Message(MSG_MDS_BEACON) {}
- MMDSBeacon(ceph_fsid_t &f, epoch_t les, int st, version_t se, int wr) :
+ MMDSBeacon(ceph_fsid_t &f, string& n, epoch_t les, int st, version_t se, int wr) :
Message(MSG_MDS_BEACON),
- fsid(f), last_epoch_seen(les), state(st), seq(se), want_rank(wr) { }
+ fsid(f), name(n), last_epoch_seen(les), state(st), seq(se), want_rank(wr) { }
ceph_fsid_t& get_fsid() { return fsid; }
+ string& get_name() { return name; }
epoch_t get_last_epoch_seen() { return last_epoch_seen; }
int get_state() { return state; }
version_t get_seq() { return seq; }
int get_want_rank() { return want_rank; }
void print(ostream& out) {
- out << "mdsbeacon(" << MDSMap::get_state_name(state)
+ out << "mdsbeacon(" << name << " " << MDSMap::get_state_name(state)
<< " seq " << seq << ")";
}
::encode(last_epoch_seen, payload);
::encode(state, payload);
::encode(seq, payload);
+ ::encode(name, payload);
::encode(want_rank, payload);
}
void decode_payload() {
::decode(last_epoch_seen, p);
::decode(state, p);
::decode(seq, p);
+ ::decode(name, p);
::decode(want_rank, p);
}
};
# create monitors, osds
for name in $what; do
type=`echo $name | cut -c 1-3` # e.g. 'mon', if $name is 'mon1'
- num=`echo $name | cut -c 4-`
+ id=`echo $name | cut -c 4- | sed 's/\\.//'`
+ num=$id
sections="$name $type global"
check_host || continue
if (info.state == MDSMap::STATE_STANDBY &&
state == MDSMap::STATE_STANDBY_REPLAY &&
(pending_mdsmap.is_degraded() ||
- pending_mdsmap.get_state(info.mds) < MDSMap::STATE_ACTIVE)) {
- dout(10) << "mds_beacon can't standby-replay mds" << info.mds << " at this time (cluster degraded, or mds not active)" << dendl;
+ pending_mdsmap.get_state(info.rank) < MDSMap::STATE_ACTIVE)) {
+ dout(10) << "mds_beacon can't standby-replay mds" << info.rank << " at this time (cluster degraded, or mds not active)" << dendl;
goto ignore;
}
// note time and reply
dout(15) << "mds_beacon " << *m << " noting time and replying" << dendl;
last_beacon[addr] = g_clock.now();
- mon->messenger->send_message(new MMDSBeacon(mon->monmap->fsid,
+ mon->messenger->send_message(new MMDSBeacon(mon->monmap->fsid, m->get_name(),
mdsmap.get_epoch(), state, seq, 0),
m->get_orig_source_inst());
// add
MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr];
- info.mds = standby_for;
+ info.name = m->get_name();
+ info.rank = standby_for;
info.addr = addr;
info.state = MDSMap::STATE_STANDBY;
info.clear_laggy();
}
- dout(10) << "prepare_beacon mds" << info.mds
+ dout(10) << "prepare_beacon mds" << info.rank
<< " " << MDSMap::get_state_name(info.state)
<< " -> " << MDSMap::get_state_name(state)
<< dendl;
if (state == MDSMap::STATE_STOPPED) {
- pending_mdsmap.up.erase(info.mds);
+ pending_mdsmap.up.erase(info.rank);
pending_mdsmap.mds_info.erase(addr);
- pending_mdsmap.stopped.insert(info.mds);
- pending_mdsmap.in.erase(info.mds);
+ pending_mdsmap.stopped.insert(info.rank);
+ pending_mdsmap.in.erase(info.rank);
} else {
info.state = state;
info.state_seq = seq;
dout(1) << "adding standby " << addr << " as mds" << mds << dendl;
MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr];
- info.mds = mds;
+ info.rank = mds;
if (pending_mdsmap.stopped.count(mds))
info.state = MDSMap::STATE_STARTING;
else
MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr];
- dout(10) << "no beacon from " << addr << " mds" << info.mds << "." << info.inc
+ dout(10) << "no beacon from " << addr << " mds" << info.rank << "." << info.inc
<< " " << MDSMap::get_state_name(info.state)
<< " since " << since << dendl;
// are we in?
// and is there a non-laggy standby that can take over for us?
entity_addr_t sa;
- if (info.mds >= 0 &&
+ if (info.rank >= 0 &&
info.state > 0 && //|| info.state == MDSMap::STATE_STANDBY_REPLAY) &&
- pending_mdsmap.find_standby_for(info.mds, sa)) {
- dout(10) << " replacing " << addr << " mds" << info.mds << "." << info.inc
+ pending_mdsmap.find_standby_for(info.rank, sa)) {
+ dout(10) << " replacing " << addr << " mds" << info.rank << "." << info.inc
<< " " << MDSMap::get_state_name(info.state)
<< " with " << sa << dendl;
MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa];
default:
assert(0);
}
- si.mds = info.mds;
+ si.rank = info.rank;
if (si.state > 0) {
- si.inc = ++pending_mdsmap.inc[info.mds];
- pending_mdsmap.up[info.mds] = sa;
+ si.inc = ++pending_mdsmap.inc[info.rank];
+ pending_mdsmap.up[info.rank] = sa;
pending_mdsmap.last_failure = pending_mdsmap.epoch;
}
pending_mdsmap.mds_info.erase(addr);
do_propose = true;
} else if (info.state == MDSMap::STATE_STANDBY_REPLAY) {
- dout(10) << " failing " << addr << " mds" << info.mds << "." << info.inc
+ dout(10) << " failing " << addr << " mds" << info.rank << "." << info.inc
<< " " << MDSMap::get_state_name(info.state)
<< dendl;
pending_mdsmap.mds_info.erase(addr);
do_propose = true;
} else if (!info.laggy()) {
// just mark laggy
- dout(10) << " marking " << addr << " mds" << info.mds << "." << info.inc
+ dout(10) << " marking " << addr << " mds" << info.rank << "." << info.inc
<< " " << MDSMap::get_state_name(info.state)
<< " laggy" << dendl;
info.laggy_since = now;
dout(0) << " taking over failed mds" << f << " with " << sa << dendl;
MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa];
si.state = MDSMap::STATE_REPLAY;
- si.mds = f;
+ si.rank = f;
si.inc = ++pending_mdsmap.inc[f];
pending_mdsmap.in.insert(f);
pending_mdsmap.up[f] = sa;
p != pending_mdsmap.mds_info.end();
p++) {
if (p->second.state == MDSMap::STATE_STANDBY_REPLAY)
- shadowed.insert(p->second.mds);
+ shadowed.insert(p->second.rank);
if (p->second.state == MDSMap::STATE_STANDBY &&
!p->second.laggy())
- avail[p->second.mds].insert(p->first);
+ avail[p->second.rank].insert(p->first);
}
// find an mds that needs a standby
dout(10) << "mds" << *p << " will be shadowed by " << s << dendl;
MDSMap::mds_info_t& info = pending_mdsmap.mds_info[s];
- info.mds = *p;
+ info.rank = *p;
info.state = MDSMap::STATE_STANDBY_REPLAY;
do_propose = true;
}
info.state = MDSMap::STATE_STOPPING;
break;
case MDSMap::STATE_STARTING:
- pending_mdsmap.stopped.insert(info.mds);
+ pending_mdsmap.stopped.insert(info.rank);
case MDSMap::STATE_CREATING:
- pending_mdsmap.up.erase(info.mds);
+ pending_mdsmap.up.erase(info.rank);
pending_mdsmap.mds_info.erase(info.addr);
- pending_mdsmap.in.erase(info.mds);
+ pending_mdsmap.in.erase(info.rank);
break;
case MDSMap::STATE_REPLAY:
case MDSMap::STATE_RESOLVE:
case MDSMap::STATE_RECONNECT:
case MDSMap::STATE_REJOIN:
// BUG: hrm, if this is the case, the STOPPING guys won't be able to stop, will they?
- pending_mdsmap.failed.insert(info.mds);
- pending_mdsmap.up.erase(info.mds);
+ pending_mdsmap.failed.insert(info.rank);
+ pending_mdsmap.up.erase(info.rank);
pending_mdsmap.mds_info.erase(info.addr);
- pending_mdsmap.in.erase(info.mds);
+ pending_mdsmap.in.erase(info.rank);
break;
}
}
[global]
conf = ceph.conf
restart on core dump = true
+ pid file = /var/run/ceph/$name.pid
; monitor
[mon]
- pid file = /var/run/ceph/mon$mon.pid
[mon0]
host = alpha
; mds
[mds]
- pid file = /var/run/ceph/mds$mds.pid
-[mds0]
+[mds.alpha]
host = alpha
; osd
[osd]
- pid file = /var/run/ceph/osd$osd.pid
sudo = true
[osd0]