cmds_SOURCES = cmds.cc msg/SimpleMessenger.cc
cmds_LDADD = libmds.a libosdc.a libcrush.a libcommon.a
+dumpjournal_SOURCES = dumpjournal.cc msg/SimpleMessenger.cc
+dumpjournal_LDADD = libosdc.a libcrush.a libcommon.a
+
# osd
cosd_SOURCES = cosd.cc msg/SimpleMessenger.cc
cosd_LDADD = libosd.a libos.a libebofs.a libcrush.a libcommon.a
cmonctl \
mkmonfs monmaptool osdmaptool crushtool \
fakesyn \
- streamtest dupstore psim \
+ streamtest dupstore psim dumpjournal \
test.ebofs mkfs.ebofs \
$(FUSEBIN) $(NEWSYN)
noinst_LIBRARIES = \
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <sys/stat.h>
+#include <iostream>
+#include <string>
+using namespace std;
+
+#include "config.h"
+
+#include "mon/MonMap.h"
+#include "mon/MonClient.h"
+#include "msg/SimpleMessenger.h"
+#include "osd/OSDMap.h"
+#include "messages/MOSDGetMap.h"
+#include "osdc/Objecter.h"
+#include "osdc/Journaler.h"
+#include "mds/mdstypes.h"
+
+#include "common/Timer.h"
+
+#ifndef DARWIN
+#include <envz.h>
+#endif // DARWIN
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+
+OSDMap osdmap;
+Mutex lock;
+Cond cond;
+
+Messenger *messenger = 0;
+Objecter *objecter = 0;
+Journaler *journaler = 0;
+
+class Dumper : public Dispatcher {
+ void dispatch(Message *m) {
+ switch (m->get_type()) {
+ case CEPH_MSG_OSD_OPREPLY:
+ objecter->handle_osd_op_reply((MOSDOpReply *)m);
+ break;
+ case CEPH_MSG_OSD_MAP:
+ objecter->handle_osd_map((MOSDMap*)m);
+ break;
+ }
+ }
+} dispatcher;
+
+
+void usage()
+{
+ exit(1);
+}
+
+int main(int argc, const char **argv, const char *envp[])
+{
+ vector<const char*> args;
+ argv_to_vec(argc, argv, args);
+ parse_config_options(args);
+
+ vec_to_argv(args, argc, argv);
+
+ int mds = 0;
+
+ // get monmap
+ MonMap monmap;
+ MonClient mc;
+ if (mc.get_monmap(&monmap) < 0)
+ return -1;
+
+ // start up network
+ rank.bind();
+ g_conf.daemonize = false; // not us!
+ rank.start();
+ messenger = rank.register_entity(entity_name_t::ADMIN());
+ messenger->set_dispatcher(&dispatcher);
+
+ inode_t log_inode;
+ memset(&log_inode, 0, sizeof(log_inode));
+ log_inode.ino = MDS_INO_LOG_OFFSET + mds;
+ log_inode.layout = g_default_mds_log_layout;
+
+ objecter = new Objecter(messenger, &monmap, &osdmap, lock);
+ journaler = new Journaler(log_inode, objecter, 0, &lock);
+
+ objecter->set_client_incarnation(0);
+
+ bool done;
+ journaler->recover(new C_SafeCond(&lock, &cond, &done));
+ lock.Lock();
+ while (!done)
+ cond.Wait(lock);
+ lock.Unlock();
+
+ __u64 start = journaler->get_read_pos();
+ __u64 end = journaler->get_write_pos();
+ __u64 len = end-start;
+ cout << "journal is " << start << "~" << len << std::endl;
+
+ Filer filer(objecter);
+ bufferlist bl;
+ filer.read(log_inode, start, len, &bl, 0, new C_SafeCond(&lock, &cond, &done));
+ lock.Lock();
+ while (!done)
+ cond.Wait(lock);
+ lock.Unlock();
+
+ cout << "read " << bl.length() << " bytes" << std::endl;
+ bl.write_file("mds.journal.dump");
+ messenger->shutdown();
+
+ // wait for messenger to finish
+ rank.wait();
+
+ return 0;
+}
+
* object layout - how a given object should be stored.
*/
struct ceph_object_layout {
- __le64 ol_pgid;
+ __le64 ol_pgid; /* raw pg, with _full_ ps precision. */
__le32 ol_stripe_unit;
} __attribute__ ((packed));
}
pgid.pg64 = 0; /* start with it zeroed out */
- pgid.pg.ps = ceph_stable_mod(bno + crush_hash32_2(ino, ino>>32),
- num, num_mask);
+ pgid.pg.ps = bno + crush_hash32_2(ino, ino>>32);
pgid.pg.preferred = preferred;
pgid.pg.type = fl->fl_pg_type;
pgid.pg.size = fl->fl_pg_size;
<< " " << get_opname(get_op())
<< " " << head.oid;
if (get_length()) out << " " << get_offset() << "~" << get_length();
+ out << " " << pg_t(head.layout.ol_pgid);
if (is_retry_attempt()) out << " RETRY";
out << ")";
}
}
}
+ // wake up _all_ pg waiters; raw pg -> actual pg mapping may have shifted
+ for (hash_map<pg_t, list<Message*> >::iterator p = waiting_for_pg.begin();
+ p != waiting_for_pg.end();
+ p++)
+ take_waiters(p->second);
+ waiting_for_pg.clear();
+
+
// finishers?
finished_lock.Lock();
if (finished.empty()) {
op_queue_cond.Wait(osd_lock);
}
+ // calc actual pgid
+ pg_t pgid = osdmap->raw_pg_to_pg(op->get_pg());
+
// get and lock *pg.
- const pg_t pgid = op->get_pg();
PG *pg = _have_pg(pgid) ? _lookup_lock_pg(pgid):0;
logger->set("buf", buffer_total_alloc.test());
}
ceph_object_layout make_object_layout(object_t oid, int pg_type, int pg_size, int pg_pool, int preferred=-1, int object_stripe_unit = 0) {
- int num = preferred >= 0 ? lpg_num:pg_num;
- int num_mask = preferred >= 0 ? lpg_num_mask:pg_num_mask;
-
// calculate ps (placement seed)
- ps_t ps;
+ ps_t ps; // NOTE: keep full precision, here!
switch (g_conf.osd_object_layout) {
case CEPH_OBJECT_LAYOUT_LINEAR:
- ps = ceph_stable_mod(oid.bno + oid.ino, num, num_mask);
+ ps = oid.bno + oid.ino;
break;
case CEPH_OBJECT_LAYOUT_HASHINO:
//ps = stable_mod(oid.bno + H(oid.bno+oid.ino)^H(oid.ino>>32), num, num_mask);
- ps = ceph_stable_mod(oid.bno + crush_hash32_2(oid.ino, oid.ino>>32), num, num_mask);
+ ps = oid.bno + crush_hash32_2(oid.ino, oid.ino>>32);
break;
case CEPH_OBJECT_LAYOUT_HASH:
//ps = stable_mod(H( (oid.bno & oid.ino) ^ ((oid.bno^oid.ino) >> 32) ), num, num_mask);
//ps = stable_mod(H(oid.bno) + H(oid.ino)^H(oid.ino>>32), num, num_mask);
//ps = stable_mod(oid.bno + H(oid.bno+oid.ino)^H(oid.bno+oid.ino>>32), num, num_mask);
- ps = ceph_stable_mod(oid.bno + crush_hash32_2(oid.ino, oid.ino>>32), num, num_mask);
+ ps = oid.bno + crush_hash32_2(oid.ino, oid.ino>>32);
break;
default:
}
+ /*
+ * map a raw pg (with full precision ps) into an actual pg, for storage
+ */
+ pg_t raw_pg_to_pg(pg_t pg) {
+ if (pg.preferred() >= 0)
+ pg.u.pg.ps = ceph_stable_mod(pg.ps(), lpg_num, lpg_num_mask);
+ else
+ pg.u.pg.ps = ceph_stable_mod(pg.ps(), pg_num, pg_num_mask);
+ return pg;
+ }
+
+ /*
+ * map raw pg (full precision ps) into a placement ps
+ */
+ ps_t raw_pg_to_pps(pg_t pg) {
+ if (pg.preferred() >= 0)
+ return ceph_stable_mod(pg.ps(), lpgp_num, lpgp_num_mask);
+ else
+ return ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask);
+ }
+
// pg -> (osd list)
int pg_to_osds(pg_t pg, vector<int>& osds) {
// map to osds[]
- ps_t pps; // placement ps
- if (pg.preferred() >= 0)
- pps = ceph_stable_mod(pg.ps(), lpgp_num, lpgp_num_mask);
- else
- pps = ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask);
+ ps_t pps = raw_pg_to_pps(pg); // placement ps
switch (g_conf.osd_pg_layout) {
case CEPH_PG_LAYOUT_CRUSH: