${MPICC} -DUSE_OBFS ${MPICFLAGS} ${MPILIBS} $^ -o $@ ../uofs/uofs.a
# ebofs
-mkfs.ebofs: ebofs/mkfs.ebofs.cc config.cc common/Clock.o ${EBOFS_OBJS}
+mkfs.ebofs: ebofs/mkfs.ebofs.cc config.cc common/Clock.o ebofs/ebo.o
${CC} -pg ${CFLAGS} ${LIBS} $^ -o $@
clean:
rm -f *.o */*.o ${TARGETS} ${TEST_TARGETS}
+ebofs/ebo.o: ${EBOFS_OBJS}
+ ld -i -o $@ $^
+
mds/allmds.o: ${MDS_OBJS}
- ld -i -o mds/allmds.o $^
+ ld -i -o $@ $^
%.o: %.cc
${CC} ${CFLAGS} -c $< -o $@
void Allocator::dump_freelist()
{
for (int b=0; b<EBOFS_NUM_FREE_BUCKETS; b++) {
- cout << "bucket " << b << endl;
+ dout(20) << "dump bucket " << b << endl;
if (fs->free_tab[b]->get_num_keys() > 0) {
Table<block_t,block_t>::Cursor cursor(fs->free_tab[b]);
fs->free_tab[b]->find(0, cursor);
while (1) {
- cout << " ex " << cursor.current().key << " + " << cursor.current().value << endl;
- if (cursor.move_right() < 0) break;
+ dout(20) << "dump ex " << cursor.current().key << "~" << cursor.current().value << endl;
+ if (cursor.move_right() <= 0) break;
}
} else {
- cout << " empty" << endl;
+ //cout << " empty" << endl;
}
}
}
r = _read(bio->start, bio->length, bio->bl);
} else assert(0);
- dout(20) << "do_io finish " << (void*)bio << " " << bio->start << "+" << bio->length << " " << (void*)bio->cond << " " << (void*)bio->context << endl;
+ dout(20) << "do_io finish " << (void*)bio << " " << bio->start << "~" << bio->length << " " << (void*)bio->cond << " " << (void*)bio->context << endl;
if (bio->cond) {
bio->cond->Signal();
int BlockDevice::_read(block_t bno, unsigned num, bufferlist& bl)
{
- dout(10) << "_read " << bno << "+" << num << endl;
+ dout(10) << "_read " << bno << "~" << num << endl;
assert(fd > 0);
int BlockDevice::_write(unsigned bno, unsigned num, bufferlist& bl)
{
- dout(10) << "_write " << bno << "+" << num << endl;
+ dout(10) << "_write " << bno << "~" << num << endl;
assert(fd > 0);
bc->lock.Lock();
- dout(10) << "rx_finish " << start << "+" << length << endl;
+ dout(10) << "rx_finish " << start << "~" << length << endl;
for (map<block_t, BufferHead*>::iterator p = data.lower_bound(start);
p != data.end();
p++) {
bc->lock.Lock();
- dout(10) << "tx_finish " << start << "+" << length << " v" << version << endl;
+ dout(10) << "tx_finish " << start << "~" << length << " v" << version << endl;
for (map<block_t, BufferHead*>::iterator p = data.lower_bound(start);
p != data.end();
p++) {
inline ostream& operator<<(ostream& out, BufferHead& bh)
{
- out << "bufferhead(" << bh.start() << "+" << bh.length();
+ out << "bufferhead(" << bh.start() << "~" << bh.length();
out << " v" << bh.get_version() << "/" << bh.get_last_flushed();
if (bh.is_missing()) out << " missing";
if (bh.is_dirty()) out << " dirty";
for (int i=0; i<EBOFS_NUM_FREE_BUCKETS; i++)
free_tab[i] = new Table<block_t, block_t>( table_nodepool, sb->free_tab[i] );
+ collection_tab = new Table<coll_t, Extent>( table_nodepool, sb->collection_tab );
+ oc_tab = new Table<idpair_t, bool>( table_nodepool, sb->oc_tab );
+ co_tab = new Table<idpair_t, bool>( table_nodepool, sb->co_tab );
+
dout(2) << "mount mounted" << endl;
mounted = true;
return 0;
for (int i=0; i<EBOFS_NUM_FREE_BUCKETS; i++)
free_tab[i] = new Table<block_t,block_t>( table_nodepool, empty );
-
+ oc_tab = new Table<idpair_t, bool>( table_nodepool, empty );
+ co_tab = new Table<idpair_t, bool>( table_nodepool, empty );
+
// add free space
Extent left;
left.start = nr.start + nr.length;
// close tables
delete object_tab;
- delete collection_tab;
for (int i=0; i<EBOFS_NUM_FREE_BUCKETS; i++)
delete free_tab[i];
+ delete collection_tab;
+ delete oc_tab;
+ delete co_tab;
+
return 0;
}
sb.free_tab[i].depth = free_tab[i]->get_depth();
}
+ sb.collection_tab.num_keys = collection_tab->get_num_keys();
+ sb.collection_tab.root = collection_tab->get_root();
+ sb.collection_tab.depth = collection_tab->get_depth();
+
+ sb.oc_tab.num_keys = oc_tab->get_num_keys();
+ sb.oc_tab.root = oc_tab->get_root();
+ sb.oc_tab.depth = oc_tab->get_depth();
+
+ sb.co_tab.num_keys = co_tab->get_num_keys();
+ sb.co_tab.root = co_tab->get_root();
+ sb.co_tab.depth = co_tab->get_depth();
+
// pools
sb.table_nodepool.num_regions = table_nodepool.get_num_regions();
for (int i=0; i<table_nodepool.get_num_regions(); i++) {
p += key.length() + 1;
int len = *(int*)(p);
p += sizeof(len);
- on->attr[key] = OnodeAttrVal(p, len);
+ on->attr[key] = AttrVal(p, len);
}
// parse extents
// attr
unsigned off = sizeof(eo);
- for (map<string, OnodeAttrVal >::iterator i = on->attr.begin();
+ for (map<string, AttrVal >::iterator i = on->attr.begin();
i != on->attr.end();
i++) {
bl.copy_in(off, i->first.length()+1, i->first.c_str());
}
+
+// *** cnodes ****
+
+Cnode* Ebofs::new_cnode(object_t cid)
+{
+ Cnode* cn = new Cnode(cid);
+
+ assert(cnode_map.count(cid) == 0);
+ cnode_map[cid] = cn;
+ cnode_lru.lru_insert_mid(cn);
+
+ collection_tab->insert( cid, cn->cnode_loc ); // even tho i'm not placed yet
+
+ cn->get();
+ return cn;
+}
+
+Cnode* Ebofs::get_cnode(object_t cid)
+{
+ // in cache?
+ if (cnode_map.count(cid)) {
+ // yay
+ Cnode *cn = cnode_map[cid];
+ cn->get();
+ return cn;
+ }
+
+ // on disk?
+ Extent cnode_loc;
+ if (collection_tab->lookup(cid, cnode_loc) != Table<coll_t,Extent>::Cursor::MATCH) {
+ // object dne.
+ return 0;
+ }
+
+ // read it!
+ bufferlist bl;
+ bufferpool.alloc_list( cnode_loc.length, bl );
+ dev.read( cnode_loc.start, cnode_loc.length, bl );
+
+ // parse data block
+ Cnode *cn = new Cnode(cid);
+
+ struct ebofs_cnode *ec = (struct ebofs_cnode*)bl.c_str();
+ cn->cnode_loc = ec->cnode_loc;
+
+ // parse attributes
+ char *p = bl.c_str() + sizeof(*ec);
+ for (int i=0; i<ec->num_attr; i++) {
+ string key = p;
+ p += key.length() + 1;
+ int len = *(int*)(p);
+ p += sizeof(len);
+ cn->attr[key] = AttrVal(p, len);
+ }
+
+ cn->get();
+ return cn;
+}
+
+void Ebofs::write_cnode(Cnode *cn)
+{
+ // allocate
+ int bytes = sizeof(ebofs_cnode) + cn->get_attr_bytes();
+ unsigned blocks = (bytes-1)/EBOFS_BLOCK_SIZE + 1;
+
+ bufferlist bl;
+ bufferpool.alloc_list( blocks, bl );
+
+ // place on disk
+ if (cn->cnode_loc.length < blocks) {
+ // relocate cnode!
+ if (cn->cnode_loc.length)
+ allocator.release(cn->cnode_loc);
+
+ allocator.allocate(cn->cnode_loc, blocks, 0);
+ collection_tab->remove( cn->coll_id );
+ collection_tab->insert( cn->coll_id, cn->cnode_loc );
+ }
+
+ struct ebofs_cnode ec;
+ ec.cnode_loc = cn->cnode_loc;
+ ec.coll_id = cn->coll_id;
+ ec.num_attr = cn->attr.size();
+
+ bl.copy_in(0, sizeof(ec), (char*)&ec);
+
+ // attr
+ unsigned off = sizeof(ec);
+ for (map<string, AttrVal >::iterator i = cn->attr.begin();
+ i != cn->attr.end();
+ i++) {
+ bl.copy_in(off, i->first.length()+1, i->first.c_str());
+ off += i->first.length()+1;
+ bl.copy_in(off, sizeof(int), (char*)&i->second.len);
+ off += sizeof(int);
+ bl.copy_in(off, i->second.len, i->second.data);
+ off += i->second.len;
+ }
+
+ // write
+ dev.write( cn->cnode_loc.start, cn->cnode_loc.length, bl );
+}
+
+void Ebofs::remove_cnode(Cnode *cn)
+{
+ // remove from table
+ collection_tab->remove(cn->coll_id);
+
+ // free cnode space
+ if (cn->cnode_loc.length)
+ allocator.release(cn->cnode_loc);
+
+ // delete mappings
+ //cn->clear();
+
+ delete cn;
+}
+
+void Ebofs::put_cnode(Cnode *cn)
+{
+ cn->put();
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
// *** buffer cache ***
void Ebofs::trim_buffer_cache()
return 0;
}
+
+
+
+
+
+/***************** collections ******************/
+
+int Ebofs::list_collections(list<coll_t>& ls)
+{
+ Table<coll_t, Extent>::Cursor cursor(collection_tab);
+
+ int num = 0;
+ if (collection_tab->find(0, cursor) >= 0) {
+ while (1) {
+ ls.push_back(cursor.current().key);
+ num++;
+ if (cursor.move_right() < 0) break;
+ }
+ }
+
+ return num;
+}
+
+int Ebofs::create_collection(coll_t cid)
+{
+ if (collection_exists(cid)) return -1;
+ Cnode *cn = new_cnode(cid);
+ put_cnode(cn);
+ return 0;
+}
+
+int Ebofs::destroy_collection(coll_t cid)
+{
+ if (!collection_exists(cid)) return -1;
+ Cnode *cn = new_cnode(cid);
+
+ // hose mappings
+ list<object_t> objects;
+ collection_list(cid, objects);
+ for (list<object_t>::iterator i = objects.begin();
+ i != objects.end();
+ i++) {
+ oc_tab->remove(idpair_t(*i,cid));
+ co_tab->remove(idpair_t(cid,*i));
+ }
+
+ remove_cnode(cn);
+ return 0;
+}
+
+bool Ebofs::collection_exists(coll_t cid)
+{
+ Table<coll_t, Extent>::Cursor cursor(collection_tab);
+ if (collection_tab->find(cid, cursor) == Table<coll_t, Extent>::Cursor::MATCH)
+ return true;
+ return false;
+}
+
+int Ebofs::collection_add(coll_t cid, object_t oid)
+{
+ if (!collection_exists(cid)) return -1;
+ oc_tab->insert(idpair_t(oid,cid), true);
+ co_tab->insert(idpair_t(cid,oid), true);
+ return 0;
+}
+
+int Ebofs::collection_remove(coll_t cid, object_t oid)
+{
+ if (!collection_exists(cid)) return -1;
+ oc_tab->remove(idpair_t(oid,cid));
+ co_tab->remove(idpair_t(cid,oid));
+ return 0;
+}
+
+int Ebofs::collection_list(coll_t cid, list<object_t>& ls)
+{
+ if (!collection_exists(cid)) return -1;
+
+ Table<idpair_t, bool>::Cursor cursor(co_tab);
+
+ int num = 0;
+ if (co_tab->find(idpair_t(cid,0), cursor) >= 0) {
+ while (1) {
+ const coll_t c = cursor.current().key.first;
+ const object_t o = cursor.current().key.second;
+ if (c != cid) break; // end!
+ ls.push_back(o);
+ num++;
+ if (cursor.move_right() < 0) break;
+ }
+ }
+
+ return num;
+}
+
#include "types.h"
#include "Onode.h"
+#include "Cnode.h"
#include "BlockDevice.h"
#include "nodes.h"
#include "Allocator.h"
#include "common/Cond.h"
+typedef pair<object_t,coll_t> idpair_t;
+
+inline ostream& operator<<(ostream& out, idpair_t oc) {
+ return out << hex << oc.first << "->" << oc.second << dec << endl;
+}
class Ebofs {
// tables
Table<object_t, Extent> *object_tab;
- Table<coll_t, Extent> *collection_tab;
Table<block_t,block_t> *free_tab[EBOFS_NUM_FREE_BUCKETS];
- // sets?
+ // collections
+ Table<coll_t, Extent> *collection_tab;
+ Table<idpair_t, bool> *oc_tab;
+ Table<idpair_t, bool> *co_tab;
// ** onode cache **
void remove_onode(Onode *on);
void put_onode(Onode* o); // put it back down. ref--.
+ // ** cnodes **
+ hash_map<coll_t, Cnode*> cnode_map;
+ LRU cnode_lru;
+
+ Cnode* new_cnode(coll_t cid);
+ Cnode* get_cnode(coll_t cid);
+ void write_cnode(Cnode *cn);
+ void remove_cnode(Cnode *cn);
+ void put_cnode(Cnode *cn);
+
+
public:
void trim_onode_cache();
protected:
dev(d),
free_blocks(0), allocator(this),
bufferpool(EBOFS_BLOCK_SIZE),
- object_tab(0), collection_tab(0),
+ object_tab(0), collection_tab(0), oc_tab(0), co_tab(0),
bc(dev, bufferpool) {
for (int i=0; i<EBOFS_NUM_FREE_BUCKETS; i++)
free_tab[i] = 0;
size_t len, off_t offset,
bufferlist& bl,
Context *onsafe);
+ int truncate(object_t oid, off_t size);
+ int remove(object_t oid);
- // attr
+ // object attr
int setattr(object_t oid, const char *name, void *value, size_t size);
int getattr(object_t oid, const char *name, void *value, size_t size);
- int listattr(object_t oid, char *attrs, size_t max);
+ int listattr(object_t oid, vector<string>& attrs);
// collections
- // ...
-
+ int list_collections(list<coll_t>& ls);
+ //int collection_stat(coll_t c, struct stat *st);
+ int create_collection(coll_t c);
+ int destroy_collection(coll_t c);
+
+ bool collection_exists(coll_t c);
+ int collection_add(coll_t c, object_t o);
+ int collection_remove(coll_t c, object_t o);
+ int collection_list(coll_t c, list<object_t>& o);
+
+ int collection_setattr(object_t oid, const char *name, void *value, size_t size);
+ int collection_getattr(object_t oid, const char *name, void *value, size_t size);
+ int collection_listattr(object_t oid, vector<string>& attrs);
+
};
#include "types.h"
#include "BufferCache.h"
-class OnodeAttrVal {
+class AttrVal {
public:
char *data;
int len;
- OnodeAttrVal() : data(0), len(0) {}
- OnodeAttrVal(char *from, int l) :
+ AttrVal() : data(0), len(0) {}
+ AttrVal(char *from, int l) :
len(l) {
data = new char[len];
memcpy(data, from, len);
}
- OnodeAttrVal(const OnodeAttrVal &other) {
+ AttrVal(const AttrVal &other) {
len = other.len;
data = new char[len];
memcpy(data, other.data, len);
}
- OnodeAttrVal& operator=(const OnodeAttrVal &other) {
+ AttrVal& operator=(const AttrVal &other) {
if (data) delete[] data;
len = other.len;
data = new char[len];
memcpy(data, other.data, len);
return *this;
}
- ~OnodeAttrVal() {
+ ~AttrVal() {
delete[] data;
}
};
unsigned object_blocks;
// onode
- map<string, OnodeAttrVal > attr;
- vector<Extent> extents;
+ map<string, AttrVal > attr;
+ vector<Extent> extents;
ObjectCache *oc;
// pack/unpack
int get_attr_bytes() {
int s = 0;
- for (map<string, OnodeAttrVal >::iterator i = attr.begin();
+ for (map<string, AttrVal >::iterator i = attr.begin();
i != attr.end();
i++) {
s += i->first.length() + 1;
}
+ void clear(Cursor& cursor, int node_loc, int level) {
+ Nodeptr node = pool.get_node( node_loc );
+ cursor.open[level] = node;
+
+ // hose children?
+ if (level < depth-1) {
+ for (int i=0; i<node.size(); i++) {
+ // index
+ cursor.pos[level] = i;
+ nodeid_t child = cursor.open[level].index_item(i).node;
+ clear( cursor, child, level+1 );
+ }
+ }
+
+ // hose myself
+ pool.release( node_loc );
+ }
+
+ void clear() {
+ int count = 0;
+ Cursor cursor(this);
+ if (root == -1 && depth == 0) return; // already empty!
+ int err = clear(cursor, root, 0);
+ root = -1;
+ depth = 0;
+ }
+
int verify(Cursor& cursor, int node_loc, int level, int& count) {
int err = 0;
inline ostream& operator<<(ostream& out, Extent& ex)
{
- return out << ex.start << "+" << ex.length;
+ return out << ex.start << "~" << ex.length;
}
int num_extents; /* number of extents used. if 0, data is in the onode */
};
+struct ebofs_cnode {
+ Extent cnode_loc; /* this is actually the block we live in */
+ object_t coll_id;
+ int num_attr; // num attr in cnode
+};
+
+
//static const int EBOFS_MAX_DATA_IN_ONODE = (EBOFS_BLOCK_SIZE - sizeof(struct ebofs_onode));
//static const int EBOFS_MAX_EXTENTS_IN_ONODE = (EBOFS_MAX_DATA_IN_ONODE / sizeof(Extent));
unsigned num_fragmented;
struct ebofs_table object_tab; // object directory
- //struct ebofs_table cdir; // collection directory
struct ebofs_table free_tab[EBOFS_NUM_FREE_BUCKETS];
+ struct ebofs_table collection_tab; // collection directory
+ struct ebofs_table oc_tab;
+ struct ebofs_table co_tab;
+
struct ebofs_nodepool table_nodepool;
};