mds/AnchorTable.cc \
mds/AnchorClient.cc \
mds/LogEvent.cc \
+ mds/MDSTable.cc \
mds/IdAllocator.cc \
mds/SessionMap.cc \
mds/MDLog.cc
snapshot notes --
todo
-- basic types (snapid_t, etc.)
-- snap lineage in MOSDOp
+/- basic types (snapid_t, etc.)
+/- snap lineage in MOSDOp
- rados bits to do clone+write
+/ - cloning
- figure out how to fix up rados logging
- snap collections
- garbage collection
*
*/
-
-
-#define DBLEVEL 20
-
#include "IdAllocator.h"
#include "MDS.h"
-#include "MDLog.h"
-
-#include "osdc/Filer.h"
#include "include/types.h"
#define dout(x) if (x <= g_conf.debug_mds) *_dout << dbeginl << g_clock.now() << " mds" << mds->get_nodeid() << ".idalloc: "
-
void IdAllocator::init_inode()
{
- memset(&inode, 0, sizeof(inode));
- inode.ino = MDS_INO_IDS_OFFSET + mds->get_nodeid();
- inode.layout = g_default_file_layout;
+ ino = MDS_INO_IDS_OFFSET + mds->get_nodeid();
+ layout = g_default_file_layout;
}
+void IdAllocator::reset_state()
+{
+ // use generic range. FIXME THIS IS CRAP
+ free.clear();
+ //#ifdef __LP64__
+ uint64_t start = (uint64_t)(mds->get_nodeid()+1) << 40;
+ uint64_t end = ((uint64_t)(mds->get_nodeid()+2) << 40) - 1;
+ //#else
+ //# warning this looks like a 32-bit system, using small inode numbers.
+ // uint64_t start = (uint64_t)(mds->get_nodeid()+1) << 25;
+ // uint64_t end = ((uint64_t)(mds->get_nodeid()+2) << 25) - 1;
+ //#endif
+ free.insert(start, end);
+}
inodeno_t IdAllocator::alloc_id()
{
version++;
- // log it
- /*
- if (!replay)
- mds->mdlog->submit_entry(new EAlloc(IDTYPE_INO, id, EALLOC_EV_ALLOC, version));
- */
-
return id;
}
free.insert(id);
version++;
-
- /*
- if (!replay)
- mds->mdlog->submit_entry(new EAlloc(IDTYPE_INO, id, EALLOC_EV_FREE, version));
- */
}
-
-
-class C_ID_Save : public Context {
- IdAllocator *ida;
- version_t version;
-public:
- C_ID_Save(IdAllocator *i, version_t v) : ida(i), version(v) {}
- void finish(int r) {
- ida->save_2(version);
- }
-};
-
-void IdAllocator::save(Context *onfinish, version_t v)
-{
- if (v > 0 && v <= committing_version) {
- dout(10) << "save v " << version << " - already saving "
- << committing_version << " >= needed " << v << dendl;
- waitfor_save[v].push_back(onfinish);
- return;
- }
-
- dout(10) << "save v " << version << dendl;
- assert(is_active());
-
- bufferlist bl;
- ::encode(version, bl);
- ::encode(free.m, bl);
-
- committing_version = version;
-
- if (onfinish)
- waitfor_save[version].push_back(onfinish);
-
- // write (async)
- mds->filer->write(inode.ino, &inode.layout,
- 0, bl.length(), bl,
- 0,
- 0, new C_ID_Save(this, version));
-}
-
-void IdAllocator::save_2(version_t v)
-{
- dout(10) << "save_2 v " << v << dendl;
-
- committed_version = v;
-
- list<Context*> ls;
- while (!waitfor_save.empty()) {
- if (waitfor_save.begin()->first > v) break;
- ls.splice(ls.end(), waitfor_save.begin()->second);
- waitfor_save.erase(waitfor_save.begin());
- }
- finish_contexts(ls,0);
-}
-
-
-void IdAllocator::reset()
-{
- init_inode();
-
- // use generic range. FIXME THIS IS CRAP
- free.clear();
-//#ifdef __LP64__
- uint64_t start = (uint64_t)(mds->get_nodeid()+1) << 40;
- uint64_t end = ((uint64_t)(mds->get_nodeid()+2) << 40) - 1;
-//#else
-//# warning this looks like a 32-bit system, using small inode numbers.
-// uint64_t start = (uint64_t)(mds->get_nodeid()+1) << 25;
-// uint64_t end = ((uint64_t)(mds->get_nodeid()+2) << 25) - 1;
-//#endif
- free.insert(start, end);
-
- state = STATE_ACTIVE;
-}
-
-
-
-// -----------------------
-
-class C_ID_Load : public Context {
-public:
- IdAllocator *ida;
- Context *onfinish;
- bufferlist bl;
- C_ID_Load(IdAllocator *i, Context *o) : ida(i), onfinish(o) {}
- void finish(int r) {
- ida->load_2(r, bl, onfinish);
- }
-};
-
-void IdAllocator::load(Context *onfinish)
-{
- dout(10) << "load" << dendl;
-
- init_inode();
-
- assert(is_undef());
- state = STATE_OPENING;
-
- C_ID_Load *c = new C_ID_Load(this, onfinish);
- mds->filer->read(inode.ino, &inode.layout,
- 0, ceph_file_layout_su(inode.layout),
- &c->bl, 0,
- c);
-}
-
-void IdAllocator::load_2(int r, bufferlist& bl, Context *onfinish)
-{
- assert(is_opening());
- state = STATE_ACTIVE;
-
- if (r > 0) {
- dout(10) << "load_2 got " << bl.length() << " bytes" << dendl;
- bufferlist::iterator p = bl.begin();
- ::decode(version, p);
- ::decode(free.m, p);
- committed_version = version;
- }
- else {
- dout(10) << "load_2 found no alloc file" << dendl;
- assert(0); // this shouldn't happen if mkfs finished.
- reset();
- }
-
- if (onfinish) {
- onfinish->finish(0);
- delete onfinish;
- }
-}
#ifndef __IDALLOCATOR_H
#define __IDALLOCATOR_H
-#include "mdstypes.h"
+#include "MDSTable.h"
#include "include/interval_set.h"
-#include "include/buffer.h"
-#include "include/Context.h"
class MDS;
-class IdAllocator {
- MDS *mds;
- inode_t inode;
-
- static const int STATE_UNDEF = 0;
- static const int STATE_OPENING = 1;
- static const int STATE_ACTIVE = 2;
- //static const int STATE_COMMITTING = 3;
- int state;
-
- version_t version, committing_version, committed_version;
-
+class IdAllocator : public MDSTable {
interval_set<inodeno_t> free; // unused ids
-
- map<version_t, list<Context*> > waitfor_save;
public:
- IdAllocator(MDS *m) :
- mds(m),
- state(STATE_UNDEF),
- version(0), committing_version(0), committed_version(0)
- {
- }
-
- void init_inode();
+ IdAllocator(MDS *m) : MDSTable(m, "idalloc") { }
// alloc or reclaim ids
inodeno_t alloc_id();
void reclaim_id(inodeno_t ino);
-
- version_t get_version() { return version; }
- version_t get_committed_version() { return committed_version; }
- version_t get_committing_version() { return committing_version; }
-
- // load/save from disk (hack)
- bool is_undef() { return state == STATE_UNDEF; }
- bool is_active() { return state == STATE_ACTIVE; }
- bool is_opening() { return state == STATE_OPENING; }
-
- void reset();
- void save(Context *onfinish=0, version_t need=0);
- void save_2(version_t v);
-
- void shutdown() {
- if (is_active()) save(0);
+
+ void init_inode();
+ void reset_state();
+ void encode_state(bufferlist& bl) {
+ ::encode(free.m, bl);
+ }
+ void decode_state(bufferlist::iterator& bl) {
+ ::decode(free.m, bl);
}
-
- void load(Context *onfinish);
- void load_2(int, bufferlist&, Context *onfinish);
-
};
#endif
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "MDSTable.h"
+
+#include "MDS.h"
+#include "MDLog.h"
+
+#include "osdc/Filer.h"
+
+#include "include/types.h"
+
+#include "config.h"
+
+#define dout(x) if (x <= g_conf.debug_mds) *_dout << dbeginl << g_clock.now() << " mds" << mds->get_nodeid() << "." << table_name << ": "
+
+class C_MT_Save : public Context {
+ MDSTable *ida;
+ version_t version;
+public:
+ C_MT_Save(MDSTable *i, version_t v) : ida(i), version(v) {}
+ void finish(int r) {
+ ida->save_2(version);
+ }
+};
+
+void MDSTable::save(Context *onfinish, version_t v)
+{
+ if (v > 0 && v <= committing_version) {
+ dout(10) << "save v " << version << " - already saving "
+ << committing_version << " >= needed " << v << dendl;
+ waitfor_save[v].push_back(onfinish);
+ return;
+ }
+
+ dout(10) << "save v " << version << dendl;
+ assert(is_active());
+
+ bufferlist bl;
+ ::encode(version, bl);
+ encode_state(bl);
+
+ committing_version = version;
+
+ if (onfinish)
+ waitfor_save[version].push_back(onfinish);
+
+ // write (async)
+ mds->filer->write(ino, &layout,
+ 0, bl.length(), bl,
+ 0,
+ 0, new C_MT_Save(this, version));
+}
+
+void MDSTable::save_2(version_t v)
+{
+ dout(10) << "save_2 v " << v << dendl;
+
+ committed_version = v;
+
+ list<Context*> ls;
+ while (!waitfor_save.empty()) {
+ if (waitfor_save.begin()->first > v) break;
+ ls.splice(ls.end(), waitfor_save.begin()->second);
+ waitfor_save.erase(waitfor_save.begin());
+ }
+ finish_contexts(ls,0);
+}
+
+
+void MDSTable::reset()
+{
+ init_inode();
+ reset_state();
+ state = STATE_ACTIVE;
+}
+
+
+
+// -----------------------
+
+class C_MT_Load : public Context {
+public:
+ MDSTable *ida;
+ Context *onfinish;
+ bufferlist bl;
+ C_MT_Load(MDSTable *i, Context *o) : ida(i), onfinish(o) {}
+ void finish(int r) {
+ ida->load_2(r, bl, onfinish);
+ }
+};
+
+void MDSTable::load(Context *onfinish)
+{
+ dout(10) << "load" << dendl;
+
+ init_inode();
+
+ assert(is_undef());
+ state = STATE_OPENING;
+
+ C_MT_Load *c = new C_MT_Load(this, onfinish);
+ mds->filer->read(ino, &layout,
+ 0, ceph_file_layout_su(layout),
+ &c->bl, 0,
+ c);
+}
+
+void MDSTable::load_2(int r, bufferlist& bl, Context *onfinish)
+{
+ assert(is_opening());
+ state = STATE_ACTIVE;
+
+ if (r > 0) {
+ dout(10) << "load_2 got " << bl.length() << " bytes" << dendl;
+ bufferlist::iterator p = bl.begin();
+ ::decode(version, p);
+ committed_version = version;
+ decode_state(p);
+ }
+ else {
+ dout(10) << "load_2 found no table" << dendl;
+ assert(0); // this shouldn't happen if mkfs finished.
+ reset();
+ }
+
+ if (onfinish) {
+ onfinish->finish(0);
+ delete onfinish;
+ }
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef __MDSTABLE_H
+#define __MDSTABLE_H
+
+#include "mdstypes.h"
+#include "include/buffer.h"
+#include "include/Context.h"
+
+class MDS;
+
+class MDSTable {
+ protected:
+ MDS *mds;
+
+ const char *table_name;
+ inodeno_t ino;
+ ceph_file_layout layout;
+
+ static const int STATE_UNDEF = 0;
+ static const int STATE_OPENING = 1;
+ static const int STATE_ACTIVE = 2;
+ //static const int STATE_COMMITTING = 3;
+ int state;
+
+ version_t version, committing_version, committed_version;
+
+ map<version_t, list<Context*> > waitfor_save;
+
+public:
+ MDSTable(MDS *m, const char *n) :
+ mds(m), table_name(n),
+ ino(0),
+ state(STATE_UNDEF),
+ version(0), committing_version(0), committed_version(0)
+ {
+ }
+ virtual ~MDSTable() {}
+
+ version_t get_version() { return version; }
+ version_t get_committed_version() { return committed_version; }
+ version_t get_committing_version() { return committing_version; }
+
+ // load/save from disk (hack)
+ bool is_undef() { return state == STATE_UNDEF; }
+ bool is_active() { return state == STATE_ACTIVE; }
+ bool is_opening() { return state == STATE_OPENING; }
+
+ void reset();
+ void save(Context *onfinish=0, version_t need=0);
+ void save_2(version_t v);
+
+ void shutdown() {
+ if (is_active()) save(0);
+ }
+
+ void load(Context *onfinish);
+ void load_2(int, bufferlist&, Context *onfinish);
+
+ // child must overload these
+ virtual void init_inode() = 0;
+ virtual void reset_state() = 0;
+ virtual void decode_state(bufferlist::iterator& p) = 0;
+ virtual void encode_state(bufferlist& bl) = 0;
+};
+
+#endif