fl_object_size: init_le32(1<<22),
fl_cas_hash: init_le32(0),
fl_object_stripe_unit: init_le32(0),
- fl_pg_preferred: init_le32(-1),
- fl_pg_pool: {CEPH_DATA_RULE},
};
-struct ceph_file_layout g_default_casdata_layout = {
- fl_stripe_unit: init_le32(1<<22),
- fl_stripe_count: init_le32(1),
- fl_object_size: init_le32(1<<22),
- fl_cas_hash: init_le32(0),
- fl_object_stripe_unit: init_le32(0),
- fl_pg_preferred: init_le32(-1),
- fl_pg_pool: {CEPH_CASDATA_RULE},
-};
-
-struct ceph_file_layout g_default_mds_dir_layout = {
- fl_stripe_unit: init_le32(1<<22),
- fl_stripe_count: init_le32(1),
- fl_object_size: init_le32(1<<22),
- fl_cas_hash: init_le32(0),
- fl_object_stripe_unit: init_le32(0),
- fl_pg_preferred: init_le32(-1),
- fl_pg_pool: {CEPH_METADATA_RULE},
-};
-struct ceph_file_layout g_default_mds_log_layout = {
- fl_stripe_unit: init_le32(1<<20),
- fl_stripe_count: init_le32(1),
- fl_object_size: init_le32(1<<20),
- fl_cas_hash: init_le32(0),
- fl_object_stripe_unit: init_le32(0),
- fl_pg_preferred: init_le32(-1),
- fl_pg_pool: {CEPH_METADATA_RULE},
-};
-
-struct ceph_file_layout g_default_mds_anchortable_layout = {
- fl_stripe_unit: init_le32(1<<20),
- fl_stripe_count: init_le32(1),
- fl_object_size: init_le32(1<<20),
- fl_cas_hash: init_le32(0),
- fl_object_stripe_unit: init_le32(0),
- fl_pg_preferred: init_le32(-1),
- fl_pg_pool: {CEPH_METADATA_RULE},
-};
#include <msg/msg_types.h>
#define __CEPH_CONFIG_H
extern struct ceph_file_layout g_default_file_layout;
-extern struct ceph_file_layout g_default_casdata_layout;
-extern struct ceph_file_layout g_default_mds_dir_layout;
-extern struct ceph_file_layout g_default_mds_log_layout;
-extern struct ceph_file_layout g_default_mds_anchortable_layout;
#include <vector>
#include <map>
inode_t log_inode;
memset(&log_inode, 0, sizeof(log_inode));
log_inode.ino = MDS_INO_LOG_OFFSET + mds;
- log_inode.layout = g_default_mds_log_layout;
+ log_inode.layout.fl_stripe_unit = 1<<20;
+ log_inode.layout.fl_stripe_count = 1;
+ log_inode.layout.fl_object_size = 1<<20;
+ log_inode.layout.fl_cas_hash = 0;
+ log_inode.layout.fl_object_stripe_unit = 0;
+ log_inode.layout.fl_pg_preferred = -1;
+ log_inode.layout.fl_pg_pool = CEPH_METADATA_RULE;
objecter = new Objecter(messenger, &monmap, &osdmap, lock);
journaler = new Journaler(log_inode.ino, &log_inode.layout, CEPH_FS_ONDISK_MAGIC, objecter, 0, 0, &lock);
}
}
+ /* pg_pools */
+ ceph_decode_32_safe(p, end, n, bad);
+ m->m_num_data_pg_pools = n;
+ m->m_data_pg_pools = kmalloc(sizeof(u32)*n, GFP_NOFS);
+ if (!m->m_data_pg_pools)
+ goto badmem;
+ ceph_decode_need(p, end, sizeof(u32)*(n+1), bad);
+ for (i = 0; i < n; i++)
+ ceph_decode_32(p, m->m_data_pg_pools[i]);
+ ceph_decode_32(p, m->m_cas_pg_pool);
+
/* ok, we don't care about the rest. */
dout(30, "mdsmap_decode success epoch %u\n", m->m_epoch);
return m;
{
kfree(m->m_addr);
kfree(m->m_state);
+ kfree(m->m_data_pg_pools);
kfree(m);
}
u32 m_max_mds; /* size of m_addr, m_state arrays */
struct ceph_entity_addr *m_addr; /* mds addrs */
s32 *m_state; /* states */
+
+ int m_num_data_pg_pools;
+ u32 *m_data_pg_pools;
+ u32 m_cas_pg_pool;
};
static inline struct ceph_entity_addr *
void AnchorServer::init_inode()
{
ino = MDS_INO_ANCHORTABLE;
- layout = g_default_file_layout;
}
void AnchorServer::reset_state()
// start by reading the first hunk of it
C_Dir_Fetch *fin = new C_Dir_Fetch(this);
- cache->mds->objecter->read( get_ondisk_object(),
- cache->mds->objecter->osdmap->file_to_object_layout( get_ondisk_object(),
- g_default_mds_dir_layout ),
- 0, 0, // whole object
- &fin->bl, 0,
- fin );
+ object_t oid = get_ondisk_object();
+ OSDMap *osdmap = cache->mds->objecter->osdmap;
+ ceph_object_layout ol = osdmap->make_object_layout(oid,
+ cache->mds->mdsmap->get_metadata_pg_pool());
+ cache->mds->objecter->read_full(oid, ol, &fin->bl, 0, fin);
}
void CDir::_fetched(bufferlist &bl)
inode->make_path_string(path);
m.setxattr("path", path);
- cache->mds->objecter->mutate( get_ondisk_object(),
- cache->mds->objecter->osdmap->file_to_object_layout( get_ondisk_object(),
- g_default_mds_dir_layout ),
- m, snapc, 0,
- NULL, new C_Dir_Committed(this, get_version()) );
+ object_t oid = get_ondisk_object();
+ OSDMap *osdmap = cache->mds->objecter->osdmap;
+ ceph_object_layout ol = osdmap->make_object_layout(oid,
+ cache->mds->mdsmap->get_metadata_pg_pool());
+
+ cache->mds->objecter->mutate(oid, ol,
+ m, snapc, 0,
+ NULL, new C_Dir_Committed(this, get_version()) );
}
m.setxattr("inode", bl);
object_t oid(ino(), frag_t());
- mdcache->mds->objecter->mutate( oid,
- mdcache->mds->objecter->osdmap->file_to_object_layout( oid,
- g_default_mds_dir_layout ),
- m, snapc, 0,
- NULL, new C_Inode_Stored(this, get_version(), fin) );
+ OSDMap *osdmap = mdcache->mds->objecter->osdmap;
+ ceph_object_layout ol = osdmap->make_object_layout(oid,
+ mdcache->mds->mdsmap->get_metadata_pg_pool());
+
+ mdcache->mds->objecter->mutate(oid, ol,
+ m, snapc, 0,
+ NULL, new C_Inode_Stored(this, get_version(), fin) );
}
void CInode::_stored(version_t v, Context *fin)
ObjectRead rd;
rd.getxattr("inode");
- mdcache->mds->objecter->read( oid,
- mdcache->mds->objecter->osdmap->file_to_object_layout( oid,
- g_default_mds_dir_layout ),
- rd, &c->bl, 0, c );
+ OSDMap *osdmap = mdcache->mds->objecter->osdmap;
+ ceph_object_layout ol = osdmap->make_object_layout(oid,
+ mdcache->mds->mdsmap->get_metadata_pg_pool());
+
+ mdcache->mds->objecter->read(oid, ol,
+ rd, &c->bl, 0, c );
}
void CInode::_fetched(bufferlist& bl, Context *fin)
void InoTable::init_inode()
{
ino = MDS_INO_IDS_OFFSET + mds->get_nodeid();
- layout = g_default_file_layout;
}
void InoTable::reset_state()
delete o;
}
+
+
+void MDCache::init_layouts()
+{
+ default_file_layout = g_default_file_layout;
+ default_file_layout.fl_pg_preferred = -1;
+ default_file_layout.fl_pg_pool = mds->mdsmap->get_data_pg_pool();
+
+ default_dir_layout = g_default_file_layout;
+ default_dir_layout.fl_pg_preferred = -1;
+ default_dir_layout.fl_pg_pool = mds->mdsmap->get_metadata_pg_pool();
+}
+
CInode *MDCache::create_system_inode(inodeno_t ino, int mode)
{
CInode *in = new CInode(this);
in->inode.ctime =
in->inode.mtime = g_clock.now();
in->inode.nlink = 1;
- in->inode.layout = g_default_mds_dir_layout;
+ if (in->inode.is_dir())
+ in->inode.layout = default_dir_layout;
+ else
+ in->inode.layout = default_file_layout;
add_inode(in);
return in;
}
int num_inodes_with_caps;
int num_caps;
+ ceph_file_layout default_file_layout;
+ ceph_file_layout default_dir_layout;
+
// -- client leases --
public:
static const int client_lease_pools = 3;
list<Context*> waiting_for_open;
public:
+ void init_layouts();
CInode *create_system_inode(inodeno_t ino, int mode);
CInode *create_root_inode();
// inode
memset(&log_inode, 0, sizeof(log_inode));
log_inode.ino = MDS_INO_LOG_OFFSET + mds->get_nodeid();
- log_inode.layout = g_default_mds_log_layout;
+ log_inode.layout.fl_stripe_unit = 1<<20;
+ log_inode.layout.fl_stripe_count = 1;
+ log_inode.layout.fl_object_size = 1<<20;
+ log_inode.layout.fl_cas_hash = 0;
+ log_inode.layout.fl_object_stripe_unit = 0;
+ log_inode.layout.fl_pg_preferred = -1;
+ log_inode.layout.fl_pg_pool = mds->mdsmap->get_metadata_pg_pool();
if (g_conf.mds_local_osd)
log_inode.layout.fl_pg_preferred = mds->get_nodeid() + g_conf.num_osd; // hack
C_Gather *fin = new C_Gather(new C_MDS_CreateFinish(this));
+ mdcache->init_layouts();
+
// start with a fresh journal
dout(10) << "boot_create creating fresh journal" << dendl;
mdlog->create(fin->new_sub());
switch (step) {
case 0:
+ mdcache->init_layouts();
step = 1; // fall-thru.
case 1:
class MDSMap {
- public:
+public:
// mds states
/*
static const int STATE_DNE = CEPH_MDS_STATE_DNE; // down, never existed.
::decode(standby_for_name, bl);
}
};
- WRITE_CLASS_ENCODER(mds_info_t)
- protected:
+protected:
// base map
epoch_t epoch;
epoch_t client_epoch; // incremented only when change is significant to client.
__u32 session_timeout;
__u32 session_autoclose;
+
+ vector<__u32> data_pg_pools; // file data pg_pools available to clients (via an ioctl). first is the default.
+ __u32 cas_pg_pool; // where CAS objects go
+ __u32 metadata_pg_pool; // where fs metadata objects go
/*
* in: the set of logical mds #'s that define the cluster. this is the set
friend class MDSMonitor;
- public:
- MDSMap() : epoch(0), client_epoch(0), last_failure(0), tableserver(0), root(0) {
+public:
+ MDSMap() : epoch(0), client_epoch(0), last_failure(0), tableserver(0), root(0),
+ cas_pg_pool(0), metadata_pg_pool(0) {
// hack.. this doesn't really belong here
session_timeout = (int)g_conf.mds_session_timeout;
session_autoclose = (int)g_conf.mds_session_autoclose;
int get_tableserver() const { return tableserver; }
int get_root() const { return root; }
+ const vector<__u32> &get_data_pg_pools() const { return data_pg_pools; }
+ __u32 get_data_pg_pool() const { return data_pg_pools[0]; }
+ __u32 get_cas_pg_pool() const { return cas_pg_pool; }
+ __u32 get_metadata_pg_pool() const { return metadata_pg_pool; }
// counts
unsigned get_num_mds() {
::encode(session_autoclose, bl);
::encode(max_mds, bl);
::encode(mds_info, bl);
+ ::encode(data_pg_pools, bl);
+ ::encode(cas_pg_pool, bl);
// kclient ignores everything from here
+ ::encode(metadata_pg_pool, bl);
::encode(created, bl);
::encode(modified, bl);
::encode(tableserver, bl);
::decode(session_autoclose, p);
::decode(max_mds, p);
::decode(mds_info, p);
+ ::decode(data_pg_pools, p);
+ ::decode(cas_pg_pool, p);
// kclient ignores everything from here
+ ::decode(metadata_pg_pool, p);
::decode(created, p);
::decode(modified, p);
::decode(tableserver, p);
// write (async)
SnapContext snapc;
object_t oid(ino, 0);
- mds->objecter->write_full(oid,
- mds->objecter->osdmap->file_to_object_layout(oid,
- g_default_mds_dir_layout),
+ OSDMap *osdmap = mds->objecter->osdmap;
+ ceph_object_layout ol = osdmap->make_object_layout(oid,
+ mds->mdsmap->get_metadata_pg_pool());
+
+ mds->objecter->write_full(oid, ol,
snapc,
bl, g_clock.now(), 0,
NULL, new C_MT_Save(this, version));
C_MT_Load *c = new C_MT_Load(this, onfinish);
object_t oid(ino, 0);
- mds->objecter->read(oid,
- mds->objecter->osdmap->file_to_object_layout(oid,
- g_default_mds_dir_layout),
- 0, 0, // whole object
- &c->bl, 0, c);
+ OSDMap *osdmap = mds->objecter->osdmap;
+ ceph_object_layout ol = osdmap->make_object_layout(oid,
+ mds->mdsmap->get_metadata_pg_pool());
+ mds->objecter->read_full(oid, ol, &c->bl, 0, c);
}
void MDSTable::load_2(int r, bufferlist& bl, Context *onfinish)
const char *table_name;
inodeno_t ino;
- ceph_file_layout layout;
static const int STATE_UNDEF = 0;
static const int STATE_OPENING = 1;
in->inode.version = 1;
in->inode.nlink = 1; // FIXME
- in->inode.layout = g_default_file_layout;
+ if (in->inode.is_dir())
+ in->inode.layout = mds->mdcache->default_dir_layout;
+ else
+ in->inode.layout = mds->mdcache->default_file_layout;
in->inode.truncate_size = -1ull; // not truncated, yet!
newi->inode.mode = req->head.args.mkdir.mode;
newi->inode.mode &= ~S_IFMT;
newi->inode.mode |= S_IFDIR;
- newi->inode.layout = g_default_mds_dir_layout;
newi->inode.version = dn->pre_dirty();
newi->inode.rstat.rsubdirs = 1;
*/
#include "MDS.h"
+#include "MDCache.h"
#include "SessionMap.h"
#include "osdc/Filer.h"
void SessionMap::init_inode()
{
- memset(&inode, 0, sizeof(inode));
- inode.ino = MDS_INO_SESSIONMAP_OFFSET + mds->get_nodeid();
- inode.layout = g_default_file_layout;
+ ino = MDS_INO_SESSIONMAP_OFFSET + mds->get_nodeid();
}
-
void SessionMap::dump()
{
hash<entity_name_t> H;
waiting_for_load.push_back(onload);
C_SM_Load *c = new C_SM_Load(this);
- object_t oid(inode.ino, 0);
- mds->objecter->read(oid,
- mds->objecter->osdmap->file_to_object_layout(oid,
- g_default_mds_dir_layout),
- 0, 0, // whole object
- &c->bl, 0,
- c);
-
+ object_t oid(ino, 0);
+ OSDMap *osdmap = mds->objecter->osdmap;
+ ceph_object_layout ol = osdmap->make_object_layout(oid,
+ mds->mdsmap->get_metadata_pg_pool());
+ mds->objecter->read_full(oid, ol, &c->bl, 0, c);
}
void SessionMap::_load_finish(bufferlist &bl)
encode(bl);
committing = version;
SnapContext snapc;
- object_t oid(inode.ino, 0);
- mds->objecter->write_full(oid,
- mds->objecter->osdmap->file_to_object_layout(oid,
- g_default_mds_dir_layout),
+ object_t oid(ino, 0);
+ OSDMap *osdmap = mds->objecter->osdmap;
+ ceph_object_layout ol = osdmap->make_object_layout(oid,
+ mds->mdsmap->get_metadata_pg_pool());
+
+ mds->objecter->write_full(oid, ol,
snapc,
bl, g_clock.now(), 0,
NULL, new C_SM_Save(this, version));
}
// -- loading, saving --
- inode_t inode;
+ inodeno_t ino;
list<Context*> waiting_for_load;
void encode(bufferlist& bl);
void SnapServer::init_inode()
{
ino = MDS_INO_SNAPTABLE;
- layout = g_default_file_layout;
}
void SnapServer::reset_state()
dout(10) << "create_initial" << dendl;
pending_mdsmap.max_mds = 1;
pending_mdsmap.created = g_clock.now();
+ pending_mdsmap.data_pg_pools.push_back(CEPH_DATA_RULE);
+ pending_mdsmap.metadata_pg_pool = CEPH_METADATA_RULE;
+ pending_mdsmap.cas_pg_pool = CEPH_CASDATA_RULE;
print_map(pending_mdsmap);
}
ops[0].length = len;
return read(oid, ol, ops, pbl, 0, flags, onfinish);
}
-
+ tid_t read_full(object_t oid, ceph_object_layout ol,
+ bufferlist *pbl, int flags,
+ Context *onfinish) {
+ return read(oid, ol, 0, 0, pbl, flags, onfinish);
+ }
tid_t mutate(object_t oid, ceph_object_layout ol,
ObjectMutation& mutation,