From: Sage Weil Date: Wed, 2 Jul 2008 17:35:17 +0000 (-0700) Subject: mds: factor MDSTable out of IdAllocator X-Git-Tag: v0.4~498 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a891e8bff00413cfdcf5c8003b7ca85d312d9f07;p=ceph.git mds: factor MDSTable out of IdAllocator --- diff --git a/src/Makefile.am b/src/Makefile.am index 70747ad1da6e..d39c44fa6523 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -203,6 +203,7 @@ libmds_a_SOURCES = \ mds/AnchorTable.cc \ mds/AnchorClient.cc \ mds/LogEvent.cc \ + mds/MDSTable.cc \ mds/IdAllocator.cc \ mds/SessionMap.cc \ mds/MDLog.cc diff --git a/src/TODO b/src/TODO index 4a1eef538dcd..ea9570e03142 100644 --- a/src/TODO +++ b/src/TODO @@ -218,9 +218,10 @@ remaining hard problems snapshot notes -- todo -- basic types (snapid_t, etc.) -- snap lineage in MOSDOp +/- basic types (snapid_t, etc.) +/- snap lineage in MOSDOp - rados bits to do clone+write +/ - cloning - figure out how to fix up rados logging - snap collections - garbage collection diff --git a/src/mds/IdAllocator.cc b/src/mds/IdAllocator.cc index 81f3ab988ebb..2dfddbbe4c65 100644 --- a/src/mds/IdAllocator.cc +++ b/src/mds/IdAllocator.cc @@ -12,15 +12,8 @@ * */ - - -#define DBLEVEL 20 - #include "IdAllocator.h" #include "MDS.h" -#include "MDLog.h" - -#include "osdc/Filer.h" #include "include/types.h" @@ -28,14 +21,26 @@ #define dout(x) if (x <= g_conf.debug_mds) *_dout << dbeginl << g_clock.now() << " mds" << mds->get_nodeid() << ".idalloc: " - void IdAllocator::init_inode() { - memset(&inode, 0, sizeof(inode)); - inode.ino = MDS_INO_IDS_OFFSET + mds->get_nodeid(); - inode.layout = g_default_file_layout; + ino = MDS_INO_IDS_OFFSET + mds->get_nodeid(); + layout = g_default_file_layout; } +void IdAllocator::reset_state() +{ + // use generic range. FIXME THIS IS CRAP + free.clear(); + //#ifdef __LP64__ + uint64_t start = (uint64_t)(mds->get_nodeid()+1) << 40; + uint64_t end = ((uint64_t)(mds->get_nodeid()+2) << 40) - 1; + //#else + //# warning this looks like a 32-bit system, using small inode numbers. + // uint64_t start = (uint64_t)(mds->get_nodeid()+1) << 25; + // uint64_t end = ((uint64_t)(mds->get_nodeid()+2) << 25) - 1; + //#endif + free.insert(start, end); +} inodeno_t IdAllocator::alloc_id() { @@ -48,12 +53,6 @@ inodeno_t IdAllocator::alloc_id() version++; - // log it - /* - if (!replay) - mds->mdlog->submit_entry(new EAlloc(IDTYPE_INO, id, EALLOC_EV_ALLOC, version)); - */ - return id; } @@ -65,139 +64,5 @@ void IdAllocator::reclaim_id(inodeno_t id) free.insert(id); version++; - - /* - if (!replay) - mds->mdlog->submit_entry(new EAlloc(IDTYPE_INO, id, EALLOC_EV_FREE, version)); - */ } - - -class C_ID_Save : public Context { - IdAllocator *ida; - version_t version; -public: - C_ID_Save(IdAllocator *i, version_t v) : ida(i), version(v) {} - void finish(int r) { - ida->save_2(version); - } -}; - -void IdAllocator::save(Context *onfinish, version_t v) -{ - if (v > 0 && v <= committing_version) { - dout(10) << "save v " << version << " - already saving " - << committing_version << " >= needed " << v << dendl; - waitfor_save[v].push_back(onfinish); - return; - } - - dout(10) << "save v " << version << dendl; - assert(is_active()); - - bufferlist bl; - ::encode(version, bl); - ::encode(free.m, bl); - - committing_version = version; - - if (onfinish) - waitfor_save[version].push_back(onfinish); - - // write (async) - mds->filer->write(inode.ino, &inode.layout, - 0, bl.length(), bl, - 0, - 0, new C_ID_Save(this, version)); -} - -void IdAllocator::save_2(version_t v) -{ - dout(10) << "save_2 v " << v << dendl; - - committed_version = v; - - list ls; - while (!waitfor_save.empty()) { - if (waitfor_save.begin()->first > v) break; - ls.splice(ls.end(), waitfor_save.begin()->second); - waitfor_save.erase(waitfor_save.begin()); - } - finish_contexts(ls,0); -} - - -void IdAllocator::reset() -{ - init_inode(); - - // use generic range. FIXME THIS IS CRAP - free.clear(); -//#ifdef __LP64__ - uint64_t start = (uint64_t)(mds->get_nodeid()+1) << 40; - uint64_t end = ((uint64_t)(mds->get_nodeid()+2) << 40) - 1; -//#else -//# warning this looks like a 32-bit system, using small inode numbers. -// uint64_t start = (uint64_t)(mds->get_nodeid()+1) << 25; -// uint64_t end = ((uint64_t)(mds->get_nodeid()+2) << 25) - 1; -//#endif - free.insert(start, end); - - state = STATE_ACTIVE; -} - - - -// ----------------------- - -class C_ID_Load : public Context { -public: - IdAllocator *ida; - Context *onfinish; - bufferlist bl; - C_ID_Load(IdAllocator *i, Context *o) : ida(i), onfinish(o) {} - void finish(int r) { - ida->load_2(r, bl, onfinish); - } -}; - -void IdAllocator::load(Context *onfinish) -{ - dout(10) << "load" << dendl; - - init_inode(); - - assert(is_undef()); - state = STATE_OPENING; - - C_ID_Load *c = new C_ID_Load(this, onfinish); - mds->filer->read(inode.ino, &inode.layout, - 0, ceph_file_layout_su(inode.layout), - &c->bl, 0, - c); -} - -void IdAllocator::load_2(int r, bufferlist& bl, Context *onfinish) -{ - assert(is_opening()); - state = STATE_ACTIVE; - - if (r > 0) { - dout(10) << "load_2 got " << bl.length() << " bytes" << dendl; - bufferlist::iterator p = bl.begin(); - ::decode(version, p); - ::decode(free.m, p); - committed_version = version; - } - else { - dout(10) << "load_2 found no alloc file" << dendl; - assert(0); // this shouldn't happen if mkfs finished. - reset(); - } - - if (onfinish) { - onfinish->finish(0); - delete onfinish; - } -} diff --git a/src/mds/IdAllocator.h b/src/mds/IdAllocator.h index 90dab5c6294e..ab46c168494c 100644 --- a/src/mds/IdAllocator.h +++ b/src/mds/IdAllocator.h @@ -16,63 +16,29 @@ #ifndef __IDALLOCATOR_H #define __IDALLOCATOR_H -#include "mdstypes.h" +#include "MDSTable.h" #include "include/interval_set.h" -#include "include/buffer.h" -#include "include/Context.h" class MDS; -class IdAllocator { - MDS *mds; - inode_t inode; - - static const int STATE_UNDEF = 0; - static const int STATE_OPENING = 1; - static const int STATE_ACTIVE = 2; - //static const int STATE_COMMITTING = 3; - int state; - - version_t version, committing_version, committed_version; - +class IdAllocator : public MDSTable { interval_set free; // unused ids - - map > waitfor_save; public: - IdAllocator(MDS *m) : - mds(m), - state(STATE_UNDEF), - version(0), committing_version(0), committed_version(0) - { - } - - void init_inode(); + IdAllocator(MDS *m) : MDSTable(m, "idalloc") { } // alloc or reclaim ids inodeno_t alloc_id(); void reclaim_id(inodeno_t ino); - - version_t get_version() { return version; } - version_t get_committed_version() { return committed_version; } - version_t get_committing_version() { return committing_version; } - - // load/save from disk (hack) - bool is_undef() { return state == STATE_UNDEF; } - bool is_active() { return state == STATE_ACTIVE; } - bool is_opening() { return state == STATE_OPENING; } - - void reset(); - void save(Context *onfinish=0, version_t need=0); - void save_2(version_t v); - - void shutdown() { - if (is_active()) save(0); + + void init_inode(); + void reset_state(); + void encode_state(bufferlist& bl) { + ::encode(free.m, bl); + } + void decode_state(bufferlist::iterator& bl) { + ::decode(free.m, bl); } - - void load(Context *onfinish); - void load_2(int, bufferlist&, Context *onfinish); - }; #endif diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc new file mode 100644 index 000000000000..78c921887cea --- /dev/null +++ b/src/mds/MDSTable.cc @@ -0,0 +1,142 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "MDSTable.h" + +#include "MDS.h" +#include "MDLog.h" + +#include "osdc/Filer.h" + +#include "include/types.h" + +#include "config.h" + +#define dout(x) if (x <= g_conf.debug_mds) *_dout << dbeginl << g_clock.now() << " mds" << mds->get_nodeid() << "." << table_name << ": " + +class C_MT_Save : public Context { + MDSTable *ida; + version_t version; +public: + C_MT_Save(MDSTable *i, version_t v) : ida(i), version(v) {} + void finish(int r) { + ida->save_2(version); + } +}; + +void MDSTable::save(Context *onfinish, version_t v) +{ + if (v > 0 && v <= committing_version) { + dout(10) << "save v " << version << " - already saving " + << committing_version << " >= needed " << v << dendl; + waitfor_save[v].push_back(onfinish); + return; + } + + dout(10) << "save v " << version << dendl; + assert(is_active()); + + bufferlist bl; + ::encode(version, bl); + encode_state(bl); + + committing_version = version; + + if (onfinish) + waitfor_save[version].push_back(onfinish); + + // write (async) + mds->filer->write(ino, &layout, + 0, bl.length(), bl, + 0, + 0, new C_MT_Save(this, version)); +} + +void MDSTable::save_2(version_t v) +{ + dout(10) << "save_2 v " << v << dendl; + + committed_version = v; + + list ls; + while (!waitfor_save.empty()) { + if (waitfor_save.begin()->first > v) break; + ls.splice(ls.end(), waitfor_save.begin()->second); + waitfor_save.erase(waitfor_save.begin()); + } + finish_contexts(ls,0); +} + + +void MDSTable::reset() +{ + init_inode(); + reset_state(); + state = STATE_ACTIVE; +} + + + +// ----------------------- + +class C_MT_Load : public Context { +public: + MDSTable *ida; + Context *onfinish; + bufferlist bl; + C_MT_Load(MDSTable *i, Context *o) : ida(i), onfinish(o) {} + void finish(int r) { + ida->load_2(r, bl, onfinish); + } +}; + +void MDSTable::load(Context *onfinish) +{ + dout(10) << "load" << dendl; + + init_inode(); + + assert(is_undef()); + state = STATE_OPENING; + + C_MT_Load *c = new C_MT_Load(this, onfinish); + mds->filer->read(ino, &layout, + 0, ceph_file_layout_su(layout), + &c->bl, 0, + c); +} + +void MDSTable::load_2(int r, bufferlist& bl, Context *onfinish) +{ + assert(is_opening()); + state = STATE_ACTIVE; + + if (r > 0) { + dout(10) << "load_2 got " << bl.length() << " bytes" << dendl; + bufferlist::iterator p = bl.begin(); + ::decode(version, p); + committed_version = version; + decode_state(p); + } + else { + dout(10) << "load_2 found no table" << dendl; + assert(0); // this shouldn't happen if mkfs finished. + reset(); + } + + if (onfinish) { + onfinish->finish(0); + delete onfinish; + } +} diff --git a/src/mds/MDSTable.h b/src/mds/MDSTable.h new file mode 100644 index 000000000000..647d54c2d72e --- /dev/null +++ b/src/mds/MDSTable.h @@ -0,0 +1,79 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef __MDSTABLE_H +#define __MDSTABLE_H + +#include "mdstypes.h" +#include "include/buffer.h" +#include "include/Context.h" + +class MDS; + +class MDSTable { + protected: + MDS *mds; + + const char *table_name; + inodeno_t ino; + ceph_file_layout layout; + + static const int STATE_UNDEF = 0; + static const int STATE_OPENING = 1; + static const int STATE_ACTIVE = 2; + //static const int STATE_COMMITTING = 3; + int state; + + version_t version, committing_version, committed_version; + + map > waitfor_save; + +public: + MDSTable(MDS *m, const char *n) : + mds(m), table_name(n), + ino(0), + state(STATE_UNDEF), + version(0), committing_version(0), committed_version(0) + { + } + virtual ~MDSTable() {} + + version_t get_version() { return version; } + version_t get_committed_version() { return committed_version; } + version_t get_committing_version() { return committing_version; } + + // load/save from disk (hack) + bool is_undef() { return state == STATE_UNDEF; } + bool is_active() { return state == STATE_ACTIVE; } + bool is_opening() { return state == STATE_OPENING; } + + void reset(); + void save(Context *onfinish=0, version_t need=0); + void save_2(version_t v); + + void shutdown() { + if (is_active()) save(0); + } + + void load(Context *onfinish); + void load_2(int, bufferlist&, Context *onfinish); + + // child must overload these + virtual void init_inode() = 0; + virtual void reset_state() = 0; + virtual void decode_state(bufferlist::iterator& p) = 0; + virtual void encode_state(bufferlist& bl) = 0; +}; + +#endif