From: sage Date: Wed, 15 Jun 2005 19:58:26 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: v0.1~2060 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1ea8caeabb90b03a37441fb6e4b5ee0da7f4763e;p=ceph.git *** empty log message *** git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@321 29311d96-e01e-0410-9327-a35deaab8ce9 --- diff --git a/ceph/Makefile b/ceph/Makefile index 7d09041c071ba..e0c73071b4cc5 100644 --- a/ceph/Makefile +++ b/ceph/Makefile @@ -86,6 +86,10 @@ fakesyn: fakesyn.cc mds/allmds.o client/Client.o client/SyntheticClient.o osd/OS fakefuse: fakefuse.cc mds/allmds.o client/Client.o osd/OSD.o client/fuse.o msg/FakeMessenger.cc msg/CheesySerializer.o ${COMMON_OBJS} ${CC} -pg ${CFLAGS} ${LIBS} -lfuse $^ -o $@ +testmpi: test/testmpi.cc msg/MPIMessenger.cc ${COMMON_OBJS} + ${MPICC} ${CFLAGS} ${LIBS} $^ -o $@ + + clean: rm -f *.o */*.o ${TARGETS} ${TEST_TARGETS} diff --git a/ceph/TODO b/ceph/TODO index 761bb731d886f..3dc47c5bf3779 100644 --- a/ceph/TODO +++ b/ceph/TODO @@ -26,10 +26,6 @@ md tests: - log length versus cache size, workload -notes and todos. -- SyntheticClient - -- virtual mega-filesystem (metadata only) finish HARD LINKS diff --git a/ceph/config.cc b/ceph/config.cc index c54c588eb9b1f..c8e69441d4820 100644 --- a/ceph/config.cc +++ b/ceph/config.cc @@ -49,7 +49,7 @@ md_config_t g_conf = { mds_bal_replicate_threshold: 500, mds_bal_unreplicate_threshold: 200, - mds_bal_interval: 10000, + mds_bal_interval: 60, // seconds mds_commit_on_shutdown: true, diff --git a/ceph/mds/CInode.cc b/ceph/mds/CInode.cc index 6d4ac739fbaed..58820fa8608aa 100644 --- a/ceph/mds/CInode.cc +++ b/ceph/mds/CInode.cc @@ -13,7 +13,7 @@ #define dout(x) if (x <= g_conf.debug) cout << "cinode: " -map cinode_pins; // counts +int cinode_pins[CINODE_NUM_PINS]; // counts ostream& operator<<(ostream& out, CInode& in) diff --git a/ceph/mds/CInode.h b/ceph/mds/CInode.h index ff873723a8e82..fb0769f966335 100644 --- a/ceph/mds/CInode.h +++ b/ceph/mds/CInode.h @@ -157,7 +157,7 @@ class CInode; ostream& operator<<(ostream& out, CInode& in); -extern map cinode_pins; // counts +extern int cinode_pins[CINODE_NUM_PINS]; // counts // cached inode wrapper diff --git a/ceph/mds/MDCache.cc b/ceph/mds/MDCache.cc index 1a226c0b7e0b3..8868e7e1a8509 100644 --- a/ceph/mds/MDCache.cc +++ b/ceph/mds/MDCache.cc @@ -1499,14 +1499,9 @@ void MDCache::request_cleanup(Message *req) if (g_conf.log_pins) { // pin - for (map::iterator it = cinode_pins.begin(); - it != cinode_pins.end(); - it++) { - //string s = "I"; - //s += cinode_pin_names[it->first]; - mds->logger2->set(//s, - cinode_pin_names[it->first], - it->second); + for (int i=0; ilogger2->set(cinode_pin_names[i], + cinode_pins[i]); } /* for (map::iterator it = cdir_pins.begin(); diff --git a/ceph/mds/MDS.cc b/ceph/mds/MDS.cc index 10e9fbf343af7..07d6bbd5f0a5c 100644 --- a/ceph/mds/MDS.cc +++ b/ceph/mds/MDS.cc @@ -112,7 +112,7 @@ MDS::MDS(MDCluster *mdc, int whoami, Messenger *m) { mds_paused = false; stat_ops = 0; - last_heartbeat = 0; + last_balancer_heartbeat = g_clock.gettimepair(); // log string name; @@ -441,9 +441,10 @@ void MDS::my_dispatch(Message *m) } // balance? + timepair_t now = g_clock.gettimepair(); if (true && whoami == 0 && - stat_ops >= last_heartbeat + g_conf.mds_bal_interval) { - last_heartbeat = stat_ops; + now.first - last_balancer_heartbeat.first >= g_conf.mds_bal_interval) { + last_balancer_heartbeat = now; balancer->send_heartbeat(); } diff --git a/ceph/mds/MDS.h b/ceph/mds/MDS.h index d98776b643a1a..a927624691be6 100644 --- a/ceph/mds/MDS.h +++ b/ceph/mds/MDS.h @@ -136,7 +136,7 @@ class MDS : public Dispatcher { protected: __uint64_t stat_ops; - __uint64_t last_heartbeat; + timepair_t last_balancer_heartbeat; public: MDS(MDCluster *mdc, int whoami, Messenger *m); diff --git a/ceph/messages/MOSDOp.h b/ceph/messages/MOSDOp.h index 5a351eff9e400..c2e5551f67925 100644 --- a/ceph/messages/MOSDOp.h +++ b/ceph/messages/MOSDOp.h @@ -27,7 +27,7 @@ typedef struct { msg_addr_t asker; object_t oid; - repgroup_t rg; + repgroup_t rg, rg_role; __uint64_t ocv; int op; @@ -48,6 +48,7 @@ class MOSDOp : public Message { object_t get_oid() { return st.oid; } repgroup_t get_rg() { return st.rg; } + int get_rg_role() { return st.rg_role; } // who am i asking for? __uint64_t get_ocv() { return st.ocv; } int get_op() { return st.op; } @@ -76,11 +77,14 @@ class MOSDOp : public Message { this->st.oid = oid; this->st.rg = rg; + this->st.rg_role = 0; this->st.ocv = ocv; this->st.op = op; } MOSDOp() {} + void set_rg_role(int r) { st.rg_role = r; } + void set_length(size_t l) { st.length = l; } void set_offset(size_t o) { st.offset = o; } diff --git a/ceph/msg/MPIMessenger.cc b/ceph/msg/MPIMessenger.cc index e755c56f6aa46..d49255a1886f1 100644 --- a/ceph/msg/MPIMessenger.cc +++ b/ceph/msg/MPIMessenger.cc @@ -35,7 +35,7 @@ int mpi_rank; bool mpi_done = false; // set this flag to stop the event loop -#define FUNNEL_MPI // if we want to funnel mpi through a single thread +#define FUNNEL_MPI // if we want to funnel mpi through a single thread #define TAG_UNSOLICITED 0 #define DBLVL 18 diff --git a/ceph/osd/OSD.cc b/ceph/osd/OSD.cc index 82b9fcc7d3ca0..9c03f83a07084 100644 --- a/ceph/osd/OSD.cc +++ b/ceph/osd/OSD.cc @@ -217,7 +217,15 @@ void OSD::handle_op(MOSDOp *op) if (op->get_ocv() < osdcluster->get_version()) { // op's is old dout(7) << "op cluster " << op->get_ocv() << " > " << osdcluster->get_version() << endl; + } + + + // am i the right rg_role? + repgroup_t rg = op->get_rg(); + if (op->get_rg_role() == 0) { + // PRIMARY + // verify that we are primary, or acting primary int acting_primary = osdcluster->get_rg_acting_primary( op->get_rg() ); if (acting_primary != whoami) { @@ -226,9 +234,20 @@ void OSD::handle_op(MOSDOp *op) logger->inc("fwd"); return; } + } else { + // REPLICA + int my_role = osdcluster->get_rg_role(rg, whoami); + + dout(7) << "rg " << rg << " my_role " << my_role << " wants " << op->get_rg_role() << endl; + + if (my_role != op->get_rg_role()) { + assert(0); + } } + + // do the op switch (op->get_op()) { diff --git a/ceph/osd/OSD.h b/ceph/osd/OSD.h index 25739a3ce1566..35fbc52e1b042 100644 --- a/ceph/osd/OSD.h +++ b/ceph/osd/OSD.h @@ -24,7 +24,7 @@ class Message; class ReplicaGroup { public: repgroup_t rg; - int role; // 1 = primary, 2 = secondary, etc. 0=undef. + int role; // 0 = primary, 1 = secondary, etc. 0=undef. int state; map dirty_map; // dirty objects @@ -34,6 +34,8 @@ class ReplicaGroup { void enumerate_objects(list& ls); }; + + class OSD : public Dispatcher { protected: Messenger *messenger; diff --git a/ceph/osd/OSDMap.h b/ceph/osd/OSDMap.h index 8ffac78ef4406..a951f6a2fccf6 100644 --- a/ceph/osd/OSDMap.h +++ b/ceph/osd/OSDMap.h @@ -183,6 +183,19 @@ class OSDCluster { return -1; // we fail! } + /* what replica # is a given osd? 0 primary, -1 for none. */ + int get_rg_role(repgroup_t rg, int osd) { + int group[NUM_RUSH_REPLICAS]; + repgroup_to_osds(rg, group, NUM_RUSH_REPLICAS); + int role = 0; + for (int i=0; i= 0) && (replicasLeft > 0); cluster--) { if (serversInPrevious[cluster] < replicasLeft) { mustAssign = replicasLeft - serversInPrevious[cluster]; @@ -96,13 +99,13 @@ Rush::GetServersByKey (int key, int nReplicas, int servers[]) rng.Seed (myhash (key)^cluster, cluster^0xb90738); numberAssigned = mustAssign + rng.HyperGeometricWeighted (toDraw, myWeight, - totalWeightBefore[cluster] + myWeight, - clusterWeight[cluster]); + totalWeightBefore[cluster] + myWeight, + clusterWeight[cluster]); if (numberAssigned > 0) { rng.Seed (myhash (key)^cluster ^ 11, cluster^0xfea937); rng.DrawKofN (srv, numberAssigned, clusterSize[cluster]); for (i = 0; i < numberAssigned; i++) { - srv[i] += serversInPrevious[cluster]; + srv[i] += serversInPrevious[cluster]; } replicasLeft -= numberAssigned; srv += numberAssigned; diff --git a/ceph/test/testmpi.cc b/ceph/test/testmpi.cc new file mode 100644 index 0000000000000..07d1f09e9d40e --- /dev/null +++ b/ceph/test/testmpi.cc @@ -0,0 +1,44 @@ +#include +#include +#include +using namespace std; + +#include "include/config.h" +#include "messages/MPing.h" + +#include "msg/MPIMessenger.h" + +class Pinger : public Dispatcher { +public: + Messenger *messenger; + Pinger(Messenger *m) : messenger(m) { + m->set_dispatcher(this); + } + void dispatch(Message *m) { + cout << "got incoming " << m << endl; + delete m; + } +}; + +int main(int argc, char **argv) { + + + int myrank = mpimessenger_init(argc, argv); + int world = mpimessenger_world(); + + Pinger *p = new Pinger( new MPIMessenger(myrank) ); + + mpimessenger_start(); + + while (1) { + + // ping random nodes + int d = rand() % world; + p->messenger->send_message(new MPing(), d); + + } + + + mpimessenger_wait(); + mpimessenger_shutdown(); // shutdown MPI +}