fuseclient: client/Client.o client/Buffercache.o client/fuse.o msg/FakeMessenger.o ${COMMON_OBJS}
${CC} ${CFLAGS} ${LIBS} -lfuse $^ -o $@
-fakemds: test/fakemds.cc msg/FakeMessenger.o fakeclient/FakeClient.o osd/OSD.o mds/allmds.o ${COMMON_OBJS}
+fakemds: test/fakemds.cc msg/FakeMessenger.o fakeclient/FakeClient.o osd/OSD.o mds.o ${COMMON_OBJS}
${CC} ${CFLAGS} ${LIBS} $^ -o $@
-mpitest: test/mpitest.o msg/MPIMessenger.cc mds/allmds.o osd/OSD.o fakeclient/FakeClient.o ${COMMON_OBJS}
+mpitest: test/mpitest.o msg/MPIMessenger.cc mds.o osd/OSD.o fakeclient/FakeClient.o ${COMMON_OBJS}
${MPICC} ${CFLAGS} $^ -o $@
mttest: test/mttest.cc msg/MTMessenger.cc ${COMMON_OBJS}
# fuse
-fakefuse: fakefuse.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o msg/FakeMessenger.cc ${COMMON_OBJS}
+fakefuse: fakefuse.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o msg/FakeMessenger.cc ${COMMON_OBJS}
${CC} -pg ${CFLAGS} ${LIBS} -lfuse $^ -o $@
-tcpfuse: tcpfuse.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o ${TCP_OBJS} ${COMMON_OBJS}
+tcpfuse: tcpfuse.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o ${TCP_OBJS} ${COMMON_OBJS}
${MPICC} ${CFLAGS} ${LIBS} -lfuse $^ -o $@
-mpifuse: mpifuse.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o ${TCP_OBJS} ${COMMON_OBJS}
+mpifuse: mpifuse.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o ${TCP_OBJS} ${COMMON_OBJS}
${MPICC} ${CFLAGS} ${LIBS} -lfuse $^ -o $@
# synthetic workload
-fakesyn: fakesyn.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o msg/FakeMessenger.o ${COMMON_OBJS} ${SYN_OBJS}
+fakesyn: fakesyn.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o msg/FakeMessenger.o ${COMMON_OBJS} ${SYN_OBJS}
${CC} -pg ${CFLAGS} ${LIBS} $^ -o $@
-mpisyn: mpisyn.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o msg/MPIMessenger.cc ${COMMON_OBJS} ${SYN_OBJS}
+mpisyn: mpisyn.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o msg/MPIMessenger.cc ${COMMON_OBJS} ${SYN_OBJS}
${MPICC} ${MPICFLAGS} ${MPILIBS} $^ -o $@
-tcpsyn: tcpsyn.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o ${TCP_OBJS} ${COMMON_OBJS} ${SYN_OBJS}
+tcpsyn: tcpsyn.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o ${TCP_OBJS} ${COMMON_OBJS} ${SYN_OBJS}
${MPICC} ${MPICFLAGS} ${MPILIBS} $^ -o $@
-# obfs + synthetic
-tcpsynobfs: tcpsyn.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.cc osd/OBFSStore.o msg/TCPMessenger.cc ${COMMON_OBJS} $(SYN_OBJS)
+# + obfs
+tcpsynobfs: tcpsyn.cc mds.o client/Client.o client/Buffercache.o osd/OSD.cc osd/OBFSStore.o ${TCP_OBJS} ${COMMON_OBJS} ${SYN_OBJS}
${MPICC} -DUSE_OBFS ${MPICFLAGS} ${MPILIBS} $^ -o $@ ../uofs/uofs.a
+
# ebofs
ebofs: mkfs.ebofs test.ebofs
ebofs.o: ${EBOFS_OBJS}
ld -i -o $@ $^
-mds/allmds.o: ${MDS_OBJS}
+mds.o: ${MDS_OBJS}
ld -i -o $@ $^
%.o: %.cc
- some heuristic behavior to consolidate caps to inode auth
- client will re-tx anything it needed to say upon rx of new mds notification (?)
+- readv+writev
+ - serialized!
+- lazy posix
+
+- LD_PRELOAD
+
+
+filer
+- (optional) serial behavior when read spans objects
+ - (altho we still can't get atomicity)
+
ebofs
+- change Onode vector<Extent> extents to map<block_t,Extent> extent_map
- combine inodes into same blocks
- zero regions?
- journaling
ebofs_abp_zero: false, // zero newly allocated buffers (may shut up valgrind)
ebofs_abp_max_alloc: 4096*16, // max size of new buffers (larger -> more memory fragmentation)
+ uofs: 0,
+ uofs_cache_size: 1<<18, // 256MB
+ uofs_onode_size: (int)1024,
+ uofs_small_block_size: (int)4096, //4KB
+ uofs_large_block_size: (int)524288, //512KB
+ uofs_segment_size: (int)268435456, //256MB
+ uofs_block_meta_ratio: (int)10,
+ uofs_sync_write: (int)0,
+ uofs_nr_hash_buckets: (int)1023,
+ uofs_flush_interval: (int)5, //seconds
+ uofs_min_flush_pages: (int)1024, //4096 4k-pages
+ uofs_delay_allocation: (int)1, //true
+
// --- block device ---
bdev_iothreads: 1, // number of ios to queue with kernel
bdev_idle_kick_after_ms: 100, // ms
else if (strcmp(args[i], "--fakestore_writesync") == 0)
g_conf.fakestore_writesync = atoi(args[++i]);
+ else if (strcmp(args[i], "--obfs") == 0) {
+ g_conf.uofs = 1;
+ g_conf.fake_osd_sync = 2;
+ }
+
else if (strcmp(args[i], "--osd_mkfs") == 0)
g_conf.osd_mkfs = atoi(args[++i]);
else if (strcmp(args[i], "--osd_pg_bits") == 0)
bool ebofs_abp_zero;
size_t ebofs_abp_max_alloc;
+ int uofs;
+ int uofs_cache_size;
+ int uofs_onode_size;
+ int uofs_small_block_size;
+ int uofs_large_block_size;
+ int uofs_segment_size;
+ int uofs_block_meta_ratio;
+ int uofs_sync_write;
+
+ int uofs_nr_hash_buckets;
+ int uofs_flush_interval;
+ int uofs_min_flush_pages;
+ int uofs_delay_allocation;
+
// block device
int bdev_iothreads;
int bdev_idle_kick_after_ms;
inodeno_t ino; // NOTE: ino _must_ come first for MDStore.cc to behave!!
time_t ctime;
- // hard (permissions)
+ FileLayout layout; // ?immutable?
+
+ // hard (namespace permissions)
mode_t mode;
uid_t uid;
gid_t gid;
- FileLayout layout;
- // soft
+ // file (data access)
off_t size;
time_t atime, mtime; // maybe atime different? "lazy"?
+
+ // other
int nlink;
// special stuff
#define PG_NONE 0xffffffffffffffffLL
+
+struct ostat {
+ object_t object_id;
+ size_t size;
+ time_t ctime;
+ time_t mtime;
+};
+
+
struct onode_t {
object_t oid;
pg_t pgid;
}
this->mkfs();
- this->mounted = uofs_mount(this->bdev_id, this->whoami);
+ this->mounted = uofs_mount(this->bdev_id,
+ g_conf.uofs_cache_size,
+ g_conf.uofs_min_flush_pages,
+ this->whoami);
switch (this->mounted) {
case -1:
this->mkfs();
//retry to mount
dout(0) << "remount the OBFS" << endl;
- this->mounted = uofs_mount(this->bdev_id, this->whoami);
+ this->mounted = uofs_mount(this->bdev_id,
+ g_conf.uofs_cache_size,
+ g_conf.uofs_min_flush_pages,
+ this->whoami);
assert(this->mounted >= 0);
break;
case -2:
//fsck
dout(0) << "Need fsck! Simply formatted for now!" << endl;
this->mkfs();
- this->mounted = uofs_mount(this->bdev_id, this->whoami);
+ this->mounted = uofs_mount(this->bdev_id,
+ g_conf.uofs_cache_size,
+ g_conf.uofs_min_flush_pages,
+ this->whoami);
assert(this->mounted >= 0);
break;
case 0:
int OBFSStore::mkfs(void)
{
- int donode_size_byte = 1024,
+ /*int donode_size_byte = 1024,
bd_ratio = 10,
reg_size_mb = 256,
sb_size_kb = 4,
lb_size_kb = 1024,
- nr_hash_table_buckets = 1023,
- delay_allocation = 1,
- flush_interval = 5;
+ nr_hash_table_buckets = 1023,
+ delay_allocation = 1,
+ flush_interval = 5;
FILE *param;
+ */
+
-
if (this->mounted >= 0)
- return 0;
+ return 0;
dout(0) << "OBFS.mkfs!" << endl;
+ /*
if (strlen(this->param) > 0) {
param = fopen(this->param, "r");
if (param) {
}
} else
dout(0) << "use default parameters" << endl;
+ */
if (this->bdev_id <= 0)
if ((this->bdev_id = device_open(this->dev, O_RDWR)) < 0) {
dout(0) << "start formating!" << endl;
- uofs_format(this->bdev_id, donode_size_byte, bd_ratio, (reg_size_mb << 20), (sb_size_kb << 10),
- (lb_size_kb << 10), nr_hash_table_buckets, delay_allocation, flush_interval, this->whoami);
+ uofs_format(this->bdev_id,
+ g_conf.uofs_onode_size,
+ g_conf.uofs_block_meta_ratio,
+ g_conf.uofs_segment_size,
+ g_conf.uofs_small_block_size,
+ g_conf.uofs_large_block_size,
+ g_conf.uofs_nr_hash_buckets,
+ g_conf.uofs_delay_allocation,
+ 0,//g_conf.uofs_dev_force_size,
+ g_conf.uofs_flush_interval,
+ 0);
dout(0) << "formatting complete!" << endl;
return 0;
return 0;
}
+int OBFSStore::statfs(struct statfs *sfs)
+{
+ return 0;
+}
+
bool OBFSStore::exists(object_t oid)
{
//dout(0) << "calling function exists!" << endl;
int OBFSStore::stat(object_t oid, struct stat *st)
{
dout(0) << "calling function stat!" << endl;
+ if (uofs_exist(oid)) return 0;
+ return -1;
}
int OBFSStore::remove(object_t oid)
{
dout(0) << "calling truncate function!" << endl;
//return uofs_truncate(oid, size);
+ return -1;
}
int OBFSStore::read(object_t oid, size_t len,
- off_t offset, char *buffer)
+ off_t offset, bufferlist &bl)
{
//dout(0) << "calling read function!" << endl;
//dout(0) << oid << " 0 " << len << " " << offset << " 100" << endl;
- return uofs_read(oid, buffer, offset, len);
+
+ // FIXME: page-align this and we can avoid a memcpy...
+ bl.push_back(new buffer(len));
+ return uofs_read(oid, bl.c_str(), offset, len);
}
int OBFSStore::write(object_t oid, size_t len,
- off_t offset, char *buffer, bool fsync)
+ off_t offset, bufferlist& bl, bool fsync)
{
int ret;//, sync = 0;
//if (whoami == 0)
// dout(0) << oid << " 0 " << len << " " << offset << " 101" << endl;
//if (fsync) sync = 1;
- ret = uofs_write(oid, buffer, offset, len, 0);
+
+ ret = 0;
+ for (list<bufferptr>::iterator p = bl.buffers().begin();
+ p != bl.buffers().end();
+ p++) {
+ ret += uofs_write(oid, (*p).c_str(), offset, len, 0);
+ }
+
if (fsync)
ret += uofs_sync(oid);
}
-
+int OBFSStore::write(object_t oid, size_t len,
+ off_t offset, bufferlist& bl, Context *onflush)
+{
+ // implement me later.. fake for now!
+ assert(0);
+ return 0;
+}
int umount(void);
int mkfs(void);
+ int statfs(struct statfs *);
+
bool exists(object_t oid);
int stat(object_t oid, struct stat *st);
int truncate(object_t oid, off_t size);
int read(object_t oid, size_t len,
- off_t offset, char *buffer);
+ off_t offset, bufferlist& bl);
int write(object_t oid, size_t len,
- off_t offset,char *buffer,
+ off_t offset, bufferlist& bl,
bool fsync);
+ int write(object_t oid, size_t len,
+ off_t offset, bufferlist& bl,
+ Context *onflush);
};
// use fake store
#ifdef USE_OBFS
- store = new OBFSStore(whoami, NULL, "/dev/sdb3");
+ if (g_conf.uofs) {
+ char hostname[100];
+
+ hostname[0] = 0;
+ gethostname(hostname,100);
+ sprintf(dev_path, "%s/%s", ebofs_base_path, hostname);
+
+ struct stat st;
+ if (::stat(dev_path, &st) != 0)
+ sprintf(dev_path, "%s/%d", ebofs_base_path, whoami);
+
+ store = new OBFSStore(whoami, NULL, dev_path);
+ }
#else
# ifdef USE_EBOFS
if (g_conf.ebofs) {