]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
reintegrated obfs
authorsage <sage@29311d96-e01e-0410-9327-a35deaab8ce9>
Fri, 3 Feb 2006 17:10:22 +0000 (17:10 +0000)
committersage <sage@29311d96-e01e-0410-9327-a35deaab8ce9>
Fri, 3 Feb 2006 17:10:22 +0000 (17:10 +0000)
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@589 29311d96-e01e-0410-9327-a35deaab8ce9

ceph/Makefile
ceph/TODO
ceph/config.cc
ceph/config.h
ceph/include/types.h
ceph/osd/OBFSStore.cc
ceph/osd/OBFSStore.h
ceph/osd/OSD.cc

index 80f3727c5268ea8936c2bc68267cc327a2df19ab..31713d872ce7a62b11feba73a8544ffbfdd1aeca 100644 (file)
@@ -94,10 +94,10 @@ tp: osd/tp.o
 fuseclient: client/Client.o client/Buffercache.o client/fuse.o msg/FakeMessenger.o ${COMMON_OBJS} 
        ${CC} ${CFLAGS} ${LIBS} -lfuse $^ -o $@
 
-fakemds: test/fakemds.cc msg/FakeMessenger.o fakeclient/FakeClient.o osd/OSD.o mds/allmds.o ${COMMON_OBJS} 
+fakemds: test/fakemds.cc msg/FakeMessenger.o fakeclient/FakeClient.o osd/OSD.o mds.o ${COMMON_OBJS} 
        ${CC} ${CFLAGS} ${LIBS} $^ -o $@
 
-mpitest: test/mpitest.o msg/MPIMessenger.cc mds/allmds.o osd/OSD.o fakeclient/FakeClient.o ${COMMON_OBJS} 
+mpitest: test/mpitest.o msg/MPIMessenger.cc mds.o osd/OSD.o fakeclient/FakeClient.o ${COMMON_OBJS} 
        ${MPICC} ${CFLAGS} $^ -o $@
 
 mttest: test/mttest.cc msg/MTMessenger.cc ${COMMON_OBJS}
@@ -105,30 +105,31 @@ mttest: test/mttest.cc msg/MTMessenger.cc ${COMMON_OBJS}
 
 
 # fuse
-fakefuse: fakefuse.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o msg/FakeMessenger.cc ${COMMON_OBJS}
+fakefuse: fakefuse.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o msg/FakeMessenger.cc ${COMMON_OBJS}
        ${CC} -pg ${CFLAGS} ${LIBS} -lfuse $^ -o $@
 
-tcpfuse: tcpfuse.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o ${TCP_OBJS} ${COMMON_OBJS}
+tcpfuse: tcpfuse.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o ${TCP_OBJS} ${COMMON_OBJS}
        ${MPICC} ${CFLAGS} ${LIBS} -lfuse $^ -o $@
 
-mpifuse: mpifuse.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o ${TCP_OBJS} ${COMMON_OBJS}
+mpifuse: mpifuse.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o client/fuse.o ${TCP_OBJS} ${COMMON_OBJS}
        ${MPICC} ${CFLAGS} ${LIBS} -lfuse $^ -o $@
 
 
 # synthetic workload
-fakesyn: fakesyn.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o msg/FakeMessenger.o ${COMMON_OBJS} ${SYN_OBJS}
+fakesyn: fakesyn.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o msg/FakeMessenger.o ${COMMON_OBJS} ${SYN_OBJS}
        ${CC} -pg ${CFLAGS} ${LIBS} $^ -o $@
 
-mpisyn: mpisyn.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o msg/MPIMessenger.cc ${COMMON_OBJS} ${SYN_OBJS}
+mpisyn: mpisyn.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o msg/MPIMessenger.cc ${COMMON_OBJS} ${SYN_OBJS}
        ${MPICC} ${MPICFLAGS} ${MPILIBS} $^ -o $@
 
-tcpsyn: tcpsyn.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.o ${TCP_OBJS} ${COMMON_OBJS} ${SYN_OBJS}
+tcpsyn: tcpsyn.cc mds.o client/Client.o client/Buffercache.o osd/OSD.o ${TCP_OBJS} ${COMMON_OBJS} ${SYN_OBJS}
        ${MPICC} ${MPICFLAGS} ${MPILIBS} $^ -o $@
 
-# obfs + synthetic
-tcpsynobfs: tcpsyn.cc mds/allmds.o client/Client.o client/Buffercache.o osd/OSD.cc osd/OBFSStore.o msg/TCPMessenger.cc ${COMMON_OBJS} $(SYN_OBJS)
+# + obfs
+tcpsynobfs: tcpsyn.cc mds.o client/Client.o client/Buffercache.o osd/OSD.cc osd/OBFSStore.o ${TCP_OBJS} ${COMMON_OBJS} ${SYN_OBJS}
        ${MPICC} -DUSE_OBFS ${MPICFLAGS} ${MPILIBS} $^ -o $@ ../uofs/uofs.a
 
+
 # ebofs
 ebofs: mkfs.ebofs test.ebofs
 
@@ -151,7 +152,7 @@ clean:
 ebofs.o: ${EBOFS_OBJS}
        ld -i -o $@ $^
 
-mds/allmds.o: ${MDS_OBJS}
+mds.o: ${MDS_OBJS}
        ld -i -o $@ $^
 
 %.o: %.cc
index e727018ded8fe44370a83d080f1d014bea5f71b4..bc0eea67ebb6c0b5b8f7ed764b43439016005d9d 100644 (file)
--- a/ceph/TODO
+++ b/ceph/TODO
@@ -4,8 +4,20 @@ client
 - some heuristic behavior to consolidate caps to inode auth
 - client will re-tx anything it needed to say upon rx of new mds notification (?)
 
+- readv+writev
+  - serialized!
+- lazy posix
+
+- LD_PRELOAD
+
+
+filer
+- (optional) serial behavior when read spans objects
+  - (altho we still can't get atomicity)
+
 
 ebofs
+- change Onode vector<Extent> extents to map<block_t,Extent> extent_map
 - combine inodes into same blocks
 - zero regions?
 - journaling
index b19a33d2cd4047f102f9591984cb60506872b550..0f71008f59431380414d8fc717010af5194f2319 100644 (file)
@@ -134,6 +134,19 @@ md_config_t g_conf = {
   ebofs_abp_zero: false,          // zero newly allocated buffers (may shut up valgrind)
   ebofs_abp_max_alloc: 4096*16,   // max size of new buffers (larger -> more memory fragmentation)
 
+  uofs: 0,
+  uofs_cache_size: 1<<18,       // 256MB
+  uofs_onode_size:             (int)1024,
+  uofs_small_block_size:       (int)4096,      //4KB
+  uofs_large_block_size:       (int)524288,    //512KB
+  uofs_segment_size:           (int)268435456, //256MB
+  uofs_block_meta_ratio:       (int)10,
+  uofs_sync_write:             (int)0,
+  uofs_nr_hash_buckets:        (int)1023,
+  uofs_flush_interval:         (int)5,         //seconds
+  uofs_min_flush_pages:        (int)1024,      //4096 4k-pages
+  uofs_delay_allocation:       (int)1,         //true
+
   // --- block device ---
   bdev_iothreads:    1,         // number of ios to queue with kernel
   bdev_idle_kick_after_ms: 100, // ms
@@ -316,6 +329,11 @@ void parse_config_options(vector<char*>& args)
        else if (strcmp(args[i], "--fakestore_writesync") == 0) 
          g_conf.fakestore_writesync = atoi(args[++i]);
 
+       else if (strcmp(args[i], "--obfs") == 0) {
+         g_conf.uofs = 1;
+         g_conf.fake_osd_sync = 2;
+       }
+
        else if (strcmp(args[i], "--osd_mkfs") == 0) 
          g_conf.osd_mkfs = atoi(args[++i]);
        else if (strcmp(args[i], "--osd_pg_bits") == 0) 
index da041c9629062e012abb947d9abea37d68fb0d91..152179741c16a9fabcf12f7aeb73a598436bd0db 100644 (file)
@@ -106,6 +106,20 @@ struct md_config_t {
   bool   ebofs_abp_zero;
   size_t ebofs_abp_max_alloc;
 
+  int uofs;
+  int     uofs_cache_size;
+  int     uofs_onode_size;
+  int     uofs_small_block_size;
+  int     uofs_large_block_size;
+  int     uofs_segment_size;
+  int     uofs_block_meta_ratio;
+  int     uofs_sync_write;
+  
+  int     uofs_nr_hash_buckets;
+  int     uofs_flush_interval;
+  int     uofs_min_flush_pages;
+  int     uofs_delay_allocation;
+
   // block device
   int   bdev_iothreads;
   int   bdev_idle_kick_after_ms;
index 3625a916725c47d7956ebb69f29a2961f507231a..74572ebd7bfc8be4203859d271190d4379095797 100644 (file)
@@ -149,15 +149,18 @@ struct inode_t {
   inodeno_t ino;   // NOTE: ino _must_ come first for MDStore.cc to behave!!
   time_t    ctime;
 
-  // hard (permissions)
+  FileLayout layout;  // ?immutable?
+
+  // hard (namespace permissions)
   mode_t     mode;
   uid_t      uid;
   gid_t      gid;
-  FileLayout layout;  
 
-  // soft
+  // file (data access)
   off_t      size;
   time_t     atime, mtime;      // maybe atime different?  "lazy"?
+  
+  // other
   int        nlink;
 
   // special stuff
@@ -175,6 +178,15 @@ typedef __uint64_t coll_t;        // collection id
 
 #define PG_NONE    0xffffffffffffffffLL
 
+
+struct ostat {
+  object_t   object_id;
+  size_t     size;
+  time_t     ctime;
+  time_t     mtime;
+};
+
+
 struct onode_t {
   object_t    oid;
   pg_t        pgid;
index dce4f6d9f3a82b6b0253105adf632a4315a54d39..9603f5afbdc46c4c6823ae2db2481f1afd2db90d 100644 (file)
@@ -45,20 +45,29 @@ int OBFSStore::mount(void)
        }
 
        this->mkfs();
-       this->mounted = uofs_mount(this->bdev_id, this->whoami);
+       this->mounted = uofs_mount(this->bdev_id, 
+                                                          g_conf.uofs_cache_size,
+                                                          g_conf.uofs_min_flush_pages,
+                                                          this->whoami);
        switch (this->mounted) {
                case -1:
                        this->mkfs();
                        //retry to mount
                        dout(0) << "remount the OBFS" << endl;
-                       this->mounted = uofs_mount(this->bdev_id, this->whoami);
+                       this->mounted = uofs_mount(this->bdev_id, 
+                                                                          g_conf.uofs_cache_size,
+                                                                          g_conf.uofs_min_flush_pages,
+                                                                          this->whoami);
                        assert(this->mounted >= 0);
                        break;
                case -2: 
                        //fsck
                        dout(0) << "Need fsck! Simply formatted for now!" << endl;
                        this->mkfs();
-                       this->mounted = uofs_mount(this->bdev_id, this->whoami);
+                       this->mounted = uofs_mount(this->bdev_id, 
+                                                                          g_conf.uofs_cache_size,
+                                                                          g_conf.uofs_min_flush_pages,
+                                                                          this->whoami);
                        assert(this->mounted >= 0);
                        break;
                case 0:
@@ -78,21 +87,23 @@ int OBFSStore::mount(void)
 
 int OBFSStore::mkfs(void)
 {
-       int     donode_size_byte        = 1024,
+  /*int        donode_size_byte        = 1024,
                bd_ratio                = 10,
                reg_size_mb             = 256,
                sb_size_kb              = 4,
                lb_size_kb              = 1024,
-               nr_hash_table_buckets   = 1023,
-               delay_allocation        = 1,
-               flush_interval          = 5;
+         nr_hash_table_buckets   = 1023,
+         delay_allocation        = 1,
+         flush_interval                = 5;
        FILE    *param;
+  */
+       
        
-
        if (this->mounted >= 0)
-               return 0;
+         return 0;
 
        dout(0) << "OBFS.mkfs!" << endl;
+       /*
        if (strlen(this->param) > 0) {
                param = fopen(this->param, "r");
                if (param) {
@@ -110,6 +121,7 @@ int OBFSStore::mkfs(void)
                }
        } else
                dout(0) << "use default parameters" << endl;
+       */
 
        if (this->bdev_id <= 0)
                if ((this->bdev_id = device_open(this->dev, O_RDWR)) < 0) {
@@ -119,8 +131,17 @@ int OBFSStore::mkfs(void)
        
        dout(0) << "start formating!" << endl;
 
-       uofs_format(this->bdev_id, donode_size_byte, bd_ratio, (reg_size_mb << 20), (sb_size_kb << 10), 
-                       (lb_size_kb << 10), nr_hash_table_buckets, delay_allocation, flush_interval, this->whoami);
+       uofs_format(this->bdev_id,
+                               g_conf.uofs_onode_size, 
+                               g_conf.uofs_block_meta_ratio, 
+                               g_conf.uofs_segment_size,
+                               g_conf.uofs_small_block_size,
+                               g_conf.uofs_large_block_size,
+                               g_conf.uofs_nr_hash_buckets,
+                               g_conf.uofs_delay_allocation, 
+                               0,//g_conf.uofs_dev_force_size,
+                               g_conf.uofs_flush_interval, 
+                               0);
 
        dout(0) << "formatting complete!" << endl;
        return 0;
@@ -134,6 +155,11 @@ int OBFSStore::umount(void)
        return 0;
 }
 
+int OBFSStore::statfs(struct statfs *sfs) 
+{
+  return 0;
+}
+
 bool OBFSStore::exists(object_t oid)
 {
        //dout(0) << "calling function exists!" << endl;
@@ -143,6 +169,8 @@ bool OBFSStore::exists(object_t oid)
 int OBFSStore::stat(object_t oid, struct stat *st)
 {
        dout(0) << "calling function stat!" << endl;
+       if (uofs_exist(oid)) return 0;
+       return -1;
 }
 
 int OBFSStore::remove(object_t oid)
@@ -155,18 +183,22 @@ int OBFSStore::truncate(object_t oid, off_t size)
 {
        dout(0) << "calling truncate function!" << endl;
        //return uofs_truncate(oid, size);
+       return -1;
 }
 
 int OBFSStore::read(object_t oid, size_t len, 
-                   off_t offset, char *buffer)
+                   off_t offset, bufferlist &bl)
 {
        //dout(0) << "calling read function!" << endl;
        //dout(0) << oid << " 0  " << len << " " << offset << " 100" << endl;
-       return uofs_read(oid, buffer, offset, len);
+
+  // FIXME: page-align this and we can avoid a memcpy...
+  bl.push_back(new buffer(len));
+  return uofs_read(oid, bl.c_str(), offset, len);
 }
 
 int OBFSStore::write(object_t oid, size_t len,
-                    off_t offset, char *buffer, bool fsync)
+                                        off_t offset, bufferlist& bl, bool fsync)
 {
        int ret;//, sync = 0;
        
@@ -174,7 +206,14 @@ int OBFSStore::write(object_t oid, size_t len,
        //if (whoami == 0)
        //      dout(0) << oid << " 0  " << len << " " << offset << " 101" << endl;
        //if (fsync) sync = 1;
-       ret = uofs_write(oid, buffer, offset, len, 0);
+
+       ret = 0;
+       for (list<bufferptr>::iterator p = bl.buffers().begin();
+                p != bl.buffers().end();
+                p++) {
+         ret += uofs_write(oid, (*p).c_str(), offset, len, 0);
+       }
+
        if (fsync)
                ret += uofs_sync(oid);
        
@@ -182,4 +221,10 @@ int OBFSStore::write(object_t oid, size_t len,
 }
 
 
-
+int OBFSStore::write(object_t oid, size_t len,
+                    off_t offset, bufferlist& bl, Context *onflush)
+{
+  // implement me later.. fake for now!
+  assert(0);
+  return 0;
+}
index ded098f5ee7f7e416ea29961a8f79b6d9e5b8a90..be4def5fa9135a7f01ea6ef0a9cbc22c08cee78f 100644 (file)
@@ -20,6 +20,8 @@ class OBFSStore : public ObjectStore,
   int umount(void);
   int mkfs(void);
   
+  int statfs(struct statfs *);
+
   bool exists(object_t oid);
   int stat(object_t oid, struct stat *st);
   
@@ -27,10 +29,13 @@ class OBFSStore : public ObjectStore,
   int truncate(object_t oid, off_t size);
   
   int read(object_t oid, size_t len, 
-                  off_t offset, char *buffer);
+                  off_t offset, bufferlist& bl);
   int write(object_t oid, size_t len, 
-                       off_t offset,char *buffer,
+                       off_t offset, bufferlist& bl,
                        bool fsync);
+  int write(object_t oid, size_t len, 
+                       off_t offset, bufferlist& bl,
+                       Context *onflush);
   
 };
 
index 13d46df7864e2fe38bed2ef69d67b41d42d77e2f..d663040cb7faadaaceab7b7edec7c3498fc0583f 100644 (file)
@@ -79,7 +79,19 @@ OSD::OSD(int id, Messenger *m)
 
   // use fake store
 #ifdef USE_OBFS
-  store = new OBFSStore(whoami, NULL, "/dev/sdb3");
+  if (g_conf.uofs) {
+       char hostname[100];
+
+       hostname[0] = 0;
+       gethostname(hostname,100);
+       sprintf(dev_path, "%s/%s", ebofs_base_path, hostname);
+       
+       struct stat st;
+       if (::stat(dev_path, &st) != 0)
+         sprintf(dev_path, "%s/%d", ebofs_base_path, whoami);
+       
+       store = new OBFSStore(whoami, NULL, dev_path);
+  }
 #else
 # ifdef USE_EBOFS
   if (g_conf.ebofs) {