EXTRA_CFLAGS = -I${HOME}/include -L${HOME}/lib
# base
-CFLAGS = -g -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE ${EXTRA_CFLAGS}
+CFLAGS = -pg -g -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE ${EXTRA_CFLAGS}
LDINC = ld -i -o
CC = g++
LIBS = -pthread
// add to biols
int nv = bio->bl.buffers().size(); // how many iov's in this bio's bufferlist?
- if (num_iovs + nv >= g_conf.bdev_iov_max) break; // too many!
+ if (num_iovs + nv >= IOV_MAX) break; // to many //g_conf.bdev_iov_max) break; // too many!
num_iovs += nv;
start = MIN(start, bio->start);
assert(fd > 0);
- off_t offset = (off_t)bno << EBOFS_BLOCK_BITS;
- assert((off_t)bno * (off_t)EBOFS_BLOCK_SIZE == offset);
- off_t actual = lseek(fd, offset, SEEK_SET);
- assert(actual == offset);
-
- // write buffers
- size_t len = num*EBOFS_BLOCK_SIZE;
-
- struct iovec iov[ bl.buffers().size() ];
-
- int n = 0;
- size_t left = len;
- for (list<bufferptr>::const_iterator i = bl.buffers().begin();
- i != bl.buffers().end();
- i++) {
- assert(i->length() % EBOFS_BLOCK_SIZE == 0);
-
- iov[n].iov_base = (void*)i->c_str();
- iov[n].iov_len = MIN(left, i->length());
-
- assert((((intptr_t)iov[n].iov_base) & ((intptr_t)4095ULL)) == 0);
- assert((iov[n].iov_len & 4095) == 0);
+ while (1) {
+ off_t offset = (off_t)bno << EBOFS_BLOCK_BITS;
+ assert((off_t)bno * (off_t)EBOFS_BLOCK_SIZE == offset);
+ off_t actual = lseek(fd, offset, SEEK_SET);
+ assert(actual == offset);
- left -= iov[n].iov_len;
- n++;
- if (left == 0) break;
- }
-
- int r = ::writev(fd, iov, n);
-
- if (r < 0) {
- dout(1) << "couldn't write bno " << bno << " num " << num
- << " (" << len << " bytes) in " << n << " iovs, r=" << r
- << " errno " << errno << " " << strerror(errno) << dendl;
- dout(1) << "bl is " << bl << dendl;
- assert(0);
- } else {
- assert(r == (int)len);
+ // write buffers
+ size_t len = num*EBOFS_BLOCK_SIZE;
+
+ struct iovec iov[ bl.buffers().size() ];
+
+ int n = 0;
+ size_t left = len;
+ for (list<bufferptr>::const_iterator i = bl.buffers().begin();
+ i != bl.buffers().end();
+ i++) {
+ assert(i->length() % EBOFS_BLOCK_SIZE == 0);
+
+ iov[n].iov_base = (void*)i->c_str();
+ iov[n].iov_len = MIN(left, i->length());
+
+ assert((((intptr_t)iov[n].iov_base) & ((intptr_t)4095ULL)) == 0);
+ assert((iov[n].iov_len & 4095) == 0);
+
+ left -= iov[n].iov_len;
+ n++;
+ if (left == 0) break;
+ }
+
+ int r = ::writev(fd, iov, n);
+
+ if (r < 0) {
+ dout(1) << "couldn't write bno " << bno << " num " << num
+ << " (" << len << " bytes) in " << n << " iovs, r=" << r
+ << " errno " << errno << " " << strerror(errno) << dendl;
+ dout(1) << "bl is " << bl << dendl;
+ assert(0);
+ } else if (r < (int)len) {
+ // hrm, we didn't write _all_ of our data. WTF kind of FS is this?
+ dout(-1) << "bloody hell, writev only wrote " << r << " of " << len << " bytes, looping" << dendl;
+ assert(r % 4096 == 0);
+ int wrote = r / 4096;
+ bno += wrote;
+ num -= wrote;
+ bufferlist tail;
+ tail.substr_of(bl, r, len-r);
+ bl.claim(tail);
+ continue;
+ } else {
+ // yay
+ assert(r == (int)len);
+ break;
+ }
}
-
return 0;
}
* - don't worry about disk extent boundaries (yet)
*/
int ObjectCache::map_write(block_t start, block_t len,
- interval_set<block_t>& alloc,
map<block_t, BufferHead*>& hits,
version_t super_epoch)
{
map<block_t, BufferHead*>::iterator p = data.lower_bound(start);
- dout(10) << "map_write " << *on << " " << start << "~" << len << " ... alloc " << alloc << dendl;
+ dout(10) << "map_write " << *on << " " << start << "~" << len << dendl;
// p->first >= start
block_t cur = start;
while (left > 0) {
// max for this bh (bc of (re)alloc on disk)
block_t max = left;
- bool newalloc = false;
-
- // based on alloc/no-alloc boundary ...
- if (alloc.contains(cur, left)) {
- if (alloc.contains(cur)) {
- block_t ends = alloc.end_after(cur);
- max = MIN(left, ends-cur);
- newalloc = true;
- } else {
- if (alloc.starts_after(cur)) {
- block_t st = alloc.start_after(cur);
- max = MIN(left, st-cur);
- }
- }
- }
// based on disk extent boundary ...
vector<Extent> exv;
if (exv.size() > 1)
max = exv[0].length;
- if (newalloc) {
- dout(10) << "map_write " << cur << "~" << max << " is new alloc on disk" << dendl;
- } else {
- dout(10) << "map_write " << cur << "~" << max << " keeps old alloc on disk" << dendl;
- }
+ dout(10) << "map_write " << cur << "~" << max << dendl;
// at end?
if (p == data.end()) {
BufferHead *right = bc->split(bh, cur);
bc->bh_read(on, bh); // reread left bit
bh = right;
- } else if (bh->is_tx() && !newalloc && bc->bh_cancel_write(bh, super_epoch)) {
+ } else if (bh->is_tx() && bh->epoch_modified == super_epoch && bc->bh_cancel_write(bh, super_epoch)) {
BufferHead *right = bc->split(bh, cur);
bc->bh_write(on, bh); // rewrite left bit
bh = right;
BufferHead *right = bc->split(middle, cur+max);
bc->bh_read(on, right); // reread right
bh = middle;
- } else if (bh->is_tx() && !newalloc && bc->bh_cancel_write(bh, super_epoch)) {
+ } else if (bh->is_tx() && bh->epoch_modified == super_epoch && bc->bh_cancel_write(bh, super_epoch)) {
BufferHead *middle = bc->split(bh, cur);
bc->bh_write(on, bh); // redo left
p++;
if (bh->is_rx() && bc->bh_cancel_read(bh)) {
BufferHead *right = bc->split(bh, cur+max);
bc->bh_read(on, right); // re-rx the right bit
- } else if (bh->is_tx() && !newalloc && bc->bh_cancel_write(bh, super_epoch)) {
+ } else if (bh->is_tx() && bh->epoch_modified == super_epoch && bc->bh_cancel_write(bh, super_epoch)) {
BufferHead *right = bc->split(bh, cur+max);
bc->bh_write(on, right); // re-tx the right bit
} else {
}
// try to cancel tx?
- if (bh->is_tx() && !newalloc) bc->bh_cancel_write(bh, super_epoch);
+ if (bh->is_tx() && bh->epoch_modified == super_epoch) bc->bh_cancel_write(bh, super_epoch);
// put in our map
hits[cur] = bh;
BufferHead *right = bc->split(bh, blocks);
bc->bh_read(on, bh); // reread left bit
bh = right;
- } else if (bh->is_tx() && uncom && bc->bh_cancel_write(bh, super_epoch)) {
+ } else if (bh->is_tx() && uncom && bh->epoch_modified == super_epoch && bc->bh_cancel_write(bh, super_epoch)) {
BufferHead *right = bc->split(bh, blocks);
bc->bh_write(on, bh); // rewrite left bit
bh = right;
// cancel any pending/queued io, if possible.
if (bh->is_rx())
bc->bh_cancel_read(bh);
- if (bh->is_tx() && uncom)
+ if (bh->is_tx() && uncom && bh->epoch_modified == super_epoch)
bc->bh_cancel_write(bh, super_epoch);
if (bh->shadow_of) {
dout(10) << "truncate " << *bh << " unshadowing " << *bh->shadow_of << dendl;
bool BufferCache::bh_cancel_write(BufferHead *bh, version_t cur_epoch)
{
+ assert(bh->is_tx());
+ assert(bh->epoch_modified == cur_epoch);
+ assert(bh->epoch_modified > 0);
if (bh->tx_ioh && dev.cancel_io(bh->tx_ioh) >= 0) {
dout(10) << "bh_cancel_write on " << *bh << dendl;
bh->tx_ioh = 0;
mark_dirty(bh);
- assert(bh->epoch_modified == cur_epoch);
- assert(bh->epoch_modified > 0);
dec_unflushed( EBOFS_BC_FLUSH_BHWRITE, bh->epoch_modified ); // assert.. this should be the same epoch!
int l = bh->oc->put();
if (bh.is_rx()) out << " rx";
if (bh.is_tx()) out << " tx";
if (bh.is_partial()) out << " partial";
+
+ // include epoch modified?
+ if (bh.is_dirty() || bh.is_tx() || bh.is_partial())
+ out << "(e" << bh.epoch_modified << ")";
+
//out << " " << bh.data.length();
out << " " << &bh;
out << ")";
int map_write(block_t start, block_t len,
- interval_set<block_t>& alloc,
map<block_t, BufferHead*>& hits,
version_t super_epoch); // can write to these.
void touch_bottom(block_t bstart, block_t blast);
// init node pools
dout(3) << "mount nodepool" << dendl;
nodepool.init( &sb->nodepool );
- nodepool.read_usemap( dev, super_epoch );
- nodepool.read_clean_nodes( dev );
+ nodepool.read_usemap_and_clean_nodes( dev, super_epoch );
// open tables
dout(3) << "mount opening tables" << dendl;
nodepool.usemap_odd.length = usemap_len;
dout(10) << "mkfs: even usemap at " << nodepool.usemap_even << dendl;
dout(10) << "mkfs: odd usemap at " << nodepool.usemap_odd << dendl;
+ nodepool.init_usemap();
// init tables
struct ebofs_table empty;
// write nodes, super, 2x
dout(10) << "mkfs: flushing nodepool and superblocks (2x)" << dendl;
- nodepool.commit_start( dev, 0 );
- nodepool.commit_wait();
- bufferptr superbp0;
- prepare_super(0, superbp0);
- write_super(0, superbp0);
-
- nodepool.commit_start( dev, 1 );
- nodepool.commit_wait();
- bufferptr superbp1;
- prepare_super(1, superbp1);
- write_super(1, superbp1);
-
+ for (epoch_t e=0; e<2; e++) {
+ nodepool.commit_start(dev, e);
+ nodepool.commit_wait();
+ bufferptr superbp;
+ prepare_super(e, superbp);
+ write_super(e, superbp);
+ }
+
// free memory
dout(10) << "mkfs: cleaning up" << dendl;
close_tables();
<< "%) limbo in " << get_limbo_extents()
<< dendl;
dout(2) << "commit_thread nodes: "
- << 100*nodepool.num_used()/nodepool.num_total() << "% used, "
- << nodepool.num_free() << " (" << 100*nodepool.num_free()/nodepool.num_total() << "%) free, "
- << nodepool.num_limbo() << " (" << 100*nodepool.num_limbo()/nodepool.num_total() << "%) limbo, "
- << nodepool.num_total() << " total." << dendl;
+ << 100*nodepool.get_num_used()/nodepool.get_num_total() << "% used, "
+ << nodepool.get_num_free() << " (" << 100*nodepool.get_num_free()/nodepool.get_num_total() << "%) free, "
+ << nodepool.get_num_limbo() << " (" << 100*nodepool.get_num_limbo()/nodepool.get_num_total() << "%) limbo, "
+ << nodepool.get_num_total() << " total." << dendl;
dout(2) << "commit_thread bc: "
<< "size " << bc.get_size()
<< ", trimmable " << bc.get_trimmable()
allocator.release_limbo(); // limbo_tab -> free_tabs
// do we need more node space?
- if (nodepool.num_free() < nodepool.num_total() / 3) {
+ if (nodepool.get_num_free() < nodepool.get_num_total() / 3) {
dout(2) << "commit_thread running low on node space, allocating more." << dendl;
alloc_more_node_space();
}
void Ebofs::alloc_more_node_space()
{
- dout(1) << "alloc_more_node_space free " << nodepool.num_free() << "/" << nodepool.num_total() << dendl;
+ dout(1) << "alloc_more_node_space free " << nodepool.get_num_free() << "/" << nodepool.get_num_total() << dendl;
if (nodepool.num_regions() < EBOFS_MAX_NODE_REGIONS) {
- int want = nodepool.num_total();
+ int want = nodepool.get_num_total();
Extent ex;
allocator.allocate(ex, want, 2);
dout(1) << "alloc_more_node_space wants " << want << " more, got " << ex << dendl;
Extent even, odd;
- unsigned ulen = nodepool.get_usemap_len(nodepool.num_total() + ex.length);
+ unsigned ulen = nodepool.get_usemap_len(nodepool.get_num_total() + ex.length);
allocator.allocate(even, ulen, 2);
allocator.allocate(odd, ulen, 2);
dout(1) << "alloc_more_node_space maps need " << ulen << " x2, got " << even << " " << odd << dendl;
allocator.release(nodepool.usemap_even);
allocator.release(nodepool.usemap_odd);
nodepool.add_region(ex);
+
+ // expand usemap?
nodepool.usemap_even = even;
nodepool.usemap_odd = odd;
+ nodepool.expand_usemap();
} else {
dout (1) << "alloc_more_node_space failed to get space for new usemaps" << dendl;
allocator.release(ex);
buf->f_bfree = get_free_blocks()
+ get_limbo_blocks(); /* free blocks in fs */
buf->f_bavail = get_free_blocks(); /* free blocks avail to non-superuser -- actually, for writing. */
- buf->f_files = nodepool.num_total(); /* total file nodes in file system */
- buf->f_ffree = nodepool.num_free(); /* free file nodes in fs */
+ buf->f_files = nodepool.get_num_total(); /* total file nodes in file system */
+ buf->f_ffree = nodepool.get_num_free(); /* free file nodes in fs */
//buf->f_fsid = 0; /* file system id */
#ifndef DARWIN
buf->f_namelen = 8; /* maximum length of filenames */
// map b range onto buffer_heads
map<block_t, BufferHead*> hits;
- oc->map_write(bstart, blen, alloc, hits, super_epoch);
+ oc->map_write(bstart, blen, hits, super_epoch);
// get current versions
//version_t lowv, highv;
Nodeptr() : node(0) {}
Nodeptr(Node *n) : node(n) {}
+ Nodeptr(NodePool& pool, nodeid_t nid) {
+ open(pool, nid);
+ }
Nodeptr& operator=(Node *n) {
node = n;
return *this;
}
+
+ void open(NodePool& pool, nodeid_t nid) {
+ node = pool.get_node(nid);
+ if (is_index() && node->children.empty()) init_index(pool);
+ }
LeafItem& leaf_item(int i) { return (( LeafItem*)(node->item_ptr()))[i]; }
IndexItem& index_item(int i) { return ((IndexItem*)(node->item_ptr()))[i]; }
else
return leaf_item(i).key;
}
-
+
bool is_leaf() { return node->is_leaf(); }
bool is_index() { return node->is_index(); }
void set_type(int t) { node->set_type(t); }
int size() { return node->size(); }
void set_size(int s) { node->set_size(s); }
+ void init_index(NodePool& nodepool) {
+ /*
+ node->children = vector<Node*>(max_items());
+ for (int i=0; i<max_items(); i++)
+ if (i < size())
+ node->children[i] = nodepool.get_node(index_item(i).node);
+ else
+ node->children[i] = 0;
+ */
+ }
+
+
void remove_at_pos(int p) {
if (node->is_index()) {
- for (int i=p; i<size()-1; i++)
+ for (int i=p; i<size()-1; i++) {
index_item(i) = index_item(i+1);
+ //node->children[i] = node->children[i+1];
+ }
} else {
for (int i=p; i<size()-1; i++)
leaf_item(i) = leaf_item(i+1);
leaf_item(p).value = value;
set_size(size() + 1);
}
- void insert_at_index_pos(int p, K key, nodeid_t node) {
+ void insert_at_index_pos(int p, K key, nodeid_t nid) {
assert(is_index());
- for (int i=size(); i>p; i--)
+ for (int i=size(); i>p; i--) {
index_item(i) = index_item(i-1);
+ //node->children[i] = node->children[i-1];
+ }
index_item(p).key = key;
- index_item(p).node = node;
+ index_item(p).node = nid;
set_size(size() + 1);
}
// work back down right side
for (; l<level; l++) {
- open[l+1] = table->pool.get_node( open[l].index_item(pos[l]).node );
+ open[l+1].open(table->pool, open[l].index_item(pos[l]).node);
pos[l+1] = open[l+1].size() - 1;
}
return 1;
/* work back down */
for (; l<level; l++) {
- open[l+1] = table->pool.get_node( open[l].index_item(pos[l]).node );
+ open[l+1].open(table->pool, open[l].index_item(pos[l]).node );
pos[l+1] = 0; // furthest left
}
return 1;
Nodeptr here = open[level];
Nodeptr parent = open[level-1];
- Nodeptr left = table->pool.get_node( parent.index_item(pos[level-1] - 1).node );
+ Nodeptr left(table->pool, parent.index_item(pos[level-1] - 1).node );
if (left.size() == left.max_items()) return -1; // it's full
// make both dirty
Nodeptr here = open[level];
Nodeptr parent = open[level-1];
- Nodeptr right = table->pool.get_node( parent.index_item( pos[level-1] + 1 ).node );
+ Nodeptr right(table->pool, parent.index_item( pos[level-1] + 1 ).node );
if (right.size() == right.max_items()) return -1; // it's full
// make both dirty
public:
bool almost_full() {
- if (2*(depth+1) > pool.num_free()) // worst case, plus some.
+ if (2*(depth+1) > pool.get_num_free()) // worst case, plus some.
return true;
return false;
}
cursor.level = 0;
// start at root
- Nodeptr curnode( pool.get_node(root) );
+ Nodeptr curnode(pool, root);
cursor.open[0] = curnode;
if (curnode.size() == 0) return -1; // empty!
cursor.pos[cursor.level] = i;
/* get child node */
- curnode = pool.get_node( cursor.open[cursor.level].index_item(i).node );
+ curnode.open(pool, cursor.open[cursor.level].index_item(i).node );
cursor.open[cursor.level+1] = curnode;
}
// left?
if (cursor.pos[cursor.level-1] > 0) {
int left_loc = cursor.open[cursor.level-1].index_item( cursor.pos[cursor.level-1] - 1).node;
- left = pool.get_node( left_loc );
+ left.open(pool, left_loc);
if (left.size() > left.min_items()) {
/* move cursor left, shift right */
else {
assert(cursor.pos[cursor.level-1] < cursor.open[cursor.level-1].size() - 1);
int right_loc = cursor.open[cursor.level-1].index_item( cursor.pos[cursor.level-1] + 1 ).node;
- right = pool.get_node( right_loc );
+ right.open(pool, right_loc );
if (right.size() > right.min_items()) {
/* move cursor right, shift an item left */
void clear(Cursor& cursor, int node_loc, int level) {
dbtout << "clear" << std::endl;
- Nodeptr node = pool.get_node( node_loc );
+ Nodeptr node(pool, node_loc);
cursor.open[level] = node;
// hose children?
int verify_sub(Cursor& cursor, int node_loc, int level, int& count, K& last, const char *on) {
int err = 0;
- Nodeptr node = pool.get_node( node_loc );
+ Nodeptr node(pool, node_loc);
cursor.open[level] = node;
// identify max, min, and validate key range
parse_config_options(args);
if (args.size() < 1) {
- cerr << "usage: mkfs.ebofs [options] <device file>" << endl;
+ cerr << "usage: mkfs.ebofs [options] <device file>" << std::endl;
return -1;
}
char *filename = args[0];
// test-o-rama!
Ebofs fs(filename);
fs.mount();
-
+
+ // zillion objects
if (1) {
char crap[1024*1024];
memset(crap, 0, 1024*1024);
+ bufferlist bl;
+ int sz = 10000;
+ bl.append(crap, sz);
+
+ int n = 100000;
+ utime_t start = g_clock.now();
+ for (int i=0; i<n; i++) {
+ if (i && i % 1000 == 0) {
+ utime_t now = g_clock.now();
+ utime_t end = now;
+ end -= start;
+ start = now;
+ cout << i << " / " << n << " in " << end << std::endl;
+ }
+ object_t oid(i,0);
+ fs.write(oid, 0, sz, bl, (Context*)0);
+ }
+ }
+
+ // streaming write test
+ if (0) {
+ char crap[1024*1024];
+ memset(crap, 0, 1024*1024);
object_t oid(1,2);
ts.tv_nsec = 1000*1000*40; // ms -> nsec
while (1) {
- cout << g_clock.now() << " writing " << pos << "~" << sz << endl;
+ cout << g_clock.now() << " writing " << pos << "~" << sz << std::endl;
fs.write(oid, pos, sz, bl, (Context*)0);
pos += sz;
nanosleep(&ts, 0);
bufferlist big;
big.append(crap, 1024*1024);
- cout << "0" << endl;
+ cout << "0" << std::endl;
fs.write(10, 0, 1024*1024, big, (Context*)0);
fs.sync();
fs.trim_buffer_cache();
- cout << "1" << endl;
+ cout << "1" << std::endl;
fs.write(10, 10, 10, small, 0);
fs.write(10, 1, 1000, med, 0);
fs.sync();
fs.trim_buffer_cache();
- cout << "2" << endl;
+ cout << "2" << std::endl;
fs.write(10, 10, 10, small, 0);
//fs.sync();
fs.write(10, 1, 1000, med, 0);
fs.sync();
fs.trim_buffer_cache();
- cout << "3" << endl;
+ cout << "3" << std::endl;
fs.write(10, 1, 1000, med, 0);
fs.write(10, 10000, 10, small, 0);
fs.truncate(10, 100, 0);
fs.sync();
fs.trim_buffer_cache();
- cout << "4" << endl;
+ cout << "4" << std::endl;
fs.remove(10);
fs.sync();
fs.write(10, 10, 10, small, 0);
char *p = bl.c_str();
off_t o = 0;
for (int i=0; i<n; i++) {
- cout << "write at " << o << endl;
+ cout << "write at " << o << std::endl;
for (int j=0;j<l;j++)
p[j] = (char)(oid^(o+j));
fs.write(oid, l, o, bl, (Context*)0);
o = 0;
for (int i=0; i<n; i++) {
- cout << "read at " << o << endl;
+ cout << "read at " << o << std::endl;
bl.clear();
fs.read(oid, l, o, bl);
for (off_t m=0; m<megs; m++) {
//if (m%100 == 0)
- cout << m << " / " << megs << endl;
+ cout << m << " / " << megs << std::endl;
fs.write(10, bl.length(), 1024LL*1024LL*m, bl, (Context*)0);
}
fs.sync();
utime_t end = g_clock.now();
end -= start;
- cout << "elapsed " << end << endl;
+ cout << "elapsed " << end << std::endl;
float mbs = (float)megs / (float)end;
- cout << "mb/s " << mbs << endl;
+ cout << "mb/s " << mbs << std::endl;
}
if (0) { // test
for (int i=0; i<10000; i++) {
off_t off = rand() % 1000000;
size_t len = 1+rand() % 10000;
- cout << endl << i << " writing bit at " << off << " len " << len << endl;
+ cout << std::endl << i << " writing bit at " << off << " len " << len << std::endl;
fs.write(10, len, off, bl, (Context*)0);
//fs.sync();
//fs.trim_buffer_cache();
for (int i=0; i<100; i++) {
off_t off = rand() % 1000000;
size_t len = 1+rand() % 10000;
- cout << endl << i << " writing bit at " << off << " len " << len << endl;
+ cout << std::endl << i << " writing bit at " << off << " len " << len << std::endl;
fs.write(10, len, off, bl, (Context*)0);
//fs.sync();
//fs.trim_buffer_cache();
off_t off = 0;
for (int i=0; i<10000; i++) {
size_t len = 1024*1024;//1+rand() % 10000;
- cout << endl << i << " writing bit at " << off << " len " << len << endl;
+ cout << std::endl << i << " writing bit at " << off << " len " << len << std::endl;
fs.write(10, len, off, bl, (Context*)0);
off += len;
}
bufferlist bl;
off_t off = rand() % 1000000;
size_t len = rand() % 1000;
- cout << endl << "read bit at " << off << " len " << len << endl;
+ cout << std::endl << "read bit at " << off << " len " << len << std::endl;
int r = fs.read(10, len, off, bl);
assert(bl.length() == len);
assert(r == (int)len);
bufferlist bl;
off_t off = rand() % 1000000;
size_t len = 100;
- cout << endl << "read bit at " << off << " len " << len << endl;
+ cout << std::endl << "read bit at " << off << " len " << len << std::endl;
int r = fs.read(10, len, off, bl);
assert(bl.length() == len);
assert(r == (int)len);
for (int i=0; i<100; i++) {
off_t off = rand() % 1000000;
size_t len = 100;
- cout << endl << "writing bit at " << off << " len " << len << endl;
+ cout << std::endl << "writing bit at " << off << " len " << len << std::endl;
fs.write(10, len, off, bl, (Context*)0);
}
}
free free -> free can alloc
free used -> dirty can modify
- free used used -> tx
+ free used used -> clean
free used free -> limbo
used used -> clean
// bit fields
static const int STATE_CLEAN = 1;
static const int STATE_DIRTY = 2;
- static const int STATE_TX = 3;
static const int ITEM_LEN = EBOFS_NODE_BYTES - sizeof(int) - sizeof(int) - sizeof(int);
protected:
nodeid_t id;
+ int pos_in_bitmap; // position in bitmap
int state; // use bit fields above!
bufferptr bptr;
- bufferptr shadow_bptr;
// in disk buffer
int *type;
int *nrecs;
public:
- xlist<Node*>::item xlist;
+ xlist<Node*>::item xlist; // dirty
- Node(nodeid_t i, bufferptr& b, int s) : id(i), state(s), bptr(b), xlist(this) {
+ vector<Node*> children;
+
+ Node(nodeid_t i, int pib, bufferptr& b, int s) :
+ id(i), pos_in_bitmap(pib),
+ state(s), bptr(b), xlist(this) {
nrecs = (int*)(bptr.c_str());
type = (int*)(bptr.c_str() + sizeof(*nrecs));
}
-
+ void do_cow() {
+ bptr.do_cow();
+ }
+
+
// id
nodeid_t get_id() const { return id; }
void set_id(nodeid_t n) { id = n; }
+ int get_pos_in_bitmap() const { return pos_in_bitmap; }
+ void set_pos_in_bitmap(int i) { pos_in_bitmap = i; }
// buffer
bufferptr& get_buffer() { return bptr; }
// state
bool is_dirty() { return state == STATE_DIRTY; }
- bool is_tx() { return state == STATE_TX; }
bool is_clean() { return state == STATE_CLEAN; }
void set_state(int s) { state = s; }
-
- void make_shadow() {
- assert(is_tx());
-
- shadow_bptr = bptr;
-
- // new buffer
- bptr = buffer::create_page_aligned(EBOFS_NODE_BYTES);
- nrecs = (int*)(bptr.c_str());
- type = (int*)(bptr.c_str() + sizeof(*nrecs));
-
- // copy contents!
- memcpy(bptr.c_str(), shadow_bptr.c_str(), EBOFS_NODE_BYTES);
- }
-
+
};
vector<Extent> region_loc; // region locations
Extent usemap_even;
Extent usemap_odd;
+
+ buffer::ptr usemap_data;
+ bitmapper usemap_bits;
protected:
// on-disk block states
int num_nodes;
- set<nodeid_t> free;
- set<nodeid_t> clean;
- set<nodeid_t> limbo;
- set<nodeid_t> dirty;
- set<nodeid_t> tx;
+ int num_dirty;
+ int num_clean;
+ int num_free;
+ int num_limbo;
+
+ xlist<Node*> dirty_ls;
+ interval_set<nodeid_t> free;
+ interval_set<nodeid_t> limbo;
Mutex &ebofs_lock;
Cond commit_cond;
int flushing;
- static int make_nodeid(int region, int offset) {
- return (region << 24) | offset;
+ nodeid_t make_nodeid(int region, int offset) {
+ return region_loc[region].start + (block_t)offset;
}
- static int nodeid_region(nodeid_t nid) {
- return nid >> 24;
- }
- static int nodeid_offset(nodeid_t nid) {
- return nid & ((1 << 24) - 1);
+ int nodeid_pos_in_bitmap(nodeid_t nid) {
+ unsigned region;
+ int num = 0;
+ for (region = 0;
+ (block_t)nid < region_loc[region].start || (block_t)nid > region_loc[region].end();
+ region++) {
+ //generic_dout(-20) << "node " << nid << " not in " << region << " " << region_loc[region] << dendl;
+ num += region_loc[region].length;
+ }
+ num += nid - region_loc[region].start;
+ //generic_dout(-20) << "node " << nid << " is in " << region << ", overall bitmap pos is " << num << dendl;
+ return num;
}
public:
NodePool(Mutex &el) :
- num_nodes(0),
+ num_nodes(0),
+ num_dirty(0), num_clean(0), num_free(0), num_limbo(0),
ebofs_lock(el),
flushing(0) {}
~NodePool() {
release_all();
}
- int num_free() { return free.size(); }
- int num_dirty() { return dirty.size(); }
- int num_limbo() { return limbo.size(); }
- int num_tx() { return tx.size(); }
- int num_clean() { return clean.size(); }
- int num_total() { return num_nodes; }
- int num_used() { return num_clean() + num_dirty() + num_tx(); }
+ int get_num_free() { return num_free; }
+ int get_num_dirty() { return num_dirty; }
+ int get_num_limbo() { return num_limbo; }
+ int get_num_clean() { return num_clean; }
+ int get_num_total() { return num_nodes; }
+ int get_num_used() { return num_clean + num_dirty; }
int get_usemap_len(int n=0) {
if (n == 0) n = num_nodes;
return ((n-1) / 8 / EBOFS_BLOCK_SIZE) + 1;
}
- int num_regions() { return region_loc.size(); }
+ unsigned num_regions() { return region_loc.size(); }
// the caller had better adjust usemap locations...
void add_region(Extent ex) {
- int region = region_loc.size();
- assert(ex.length <= (1 << 24));
+ assert(region_loc.size() < EBOFS_MAX_NODE_REGIONS);
region_loc.push_back(ex);
- for (unsigned o = 0; o < ex.length; o++) {
- free.insert( make_nodeid(region, o) );
- }
+ free.insert(ex.start, ex.length);
+ num_free += ex.length;
num_nodes += ex.length;
}
+ void init_usemap() {
+ usemap_data = buffer::create_page_aligned(EBOFS_BLOCK_SIZE*usemap_even.length);
+ usemap_data.zero();
+ usemap_bits.set_data(usemap_data.c_str(), usemap_data.length());
+ }
+
+ void expand_usemap() {
+ block_t have = usemap_data.length() / EBOFS_BLOCK_SIZE;
+ if (have < usemap_even.length) {
+ // use bufferlist to copy/merge two chunks
+ bufferlist bl;
+ bl.push_back(usemap_data);
+ bufferptr newbit = buffer::create_page_aligned(EBOFS_BLOCK_SIZE*(usemap_even.length - have));
+ newbit.zero();
+ bl.push_back(newbit);
+ assert(bl.buffers().size() == 1);
+ usemap_data = bl.buffers().front();
+ usemap_bits.set_data(usemap_data.c_str(), usemap_data.length());
+ }
+ }
+
+
+
int init(struct ebofs_nodepool *np) {
// regions
assert(region_loc.empty());
debofs(3) << "init even map at " << usemap_even << std::endl;
debofs(3) << "init odd map at " << usemap_odd << std::endl;
+ init_usemap();
return 0;
}
release_all();
region_loc.clear();
+
+ num_free = 0;
+ num_dirty = 0;
+ num_clean = 0;
+ num_limbo = 0;
+ dirty_ls.clear();
+
free.clear();
- dirty.clear();
- tx.clear();
- clean.clear();
limbo.clear();
+
flushing = 0;
node_map.clear();
}
// *** blocking i/o routines ***
- int read_usemap(BlockDevice& dev, version_t epoch) {
+ int read_usemap_and_clean_nodes(BlockDevice& dev, version_t epoch) {
// read map
Extent loc;
if (epoch & 1)
else
loc = usemap_even;
- bufferptr bp = buffer::create_page_aligned(EBOFS_BLOCK_SIZE*loc.length);
- dev.read(loc.start, loc.length, bp);
+ // usemap
+ dev.read(loc.start, loc.length, usemap_data);
- // parse
- unsigned region = 0; // current region
- unsigned roff = 0; // offset in region
- for (unsigned byte = 0; byte<bp.length(); byte++) { // each byte
- // get byte
- int x = *(unsigned char*)(bp.c_str() + byte);
- int mask = 0x80; // left-most bit
- for (unsigned bit=0; bit<8; bit++) {
- nodeid_t nid = make_nodeid(region, roff);
-
- if (x & mask)
- clean.insert(nid);
- else
- free.insert(nid);
-
- mask = mask >> 1; // move one bit right.
- roff++;
- if (roff == region_loc[region].length) {
- // next region!
- roff = 0;
- region++;
- break;
- }
- }
- if (region == region_loc.size()) break;
- }
- return 0;
- }
-
- int read_clean_nodes(BlockDevice& dev) {
- /*
- this relies on the clean set begin defined so that we know which nodes
- to read. so it only really works when called from mount()!
- */
- for (unsigned r=0; r<region_loc.size(); r++) {
- debofs(3) << "ebofs.nodepool.read region " << r << " at " << region_loc[r] << std::endl;
+ // nodes
+ unsigned region = 0;
+ unsigned region_pos = 0;
+ for (int i=0; i<num_nodes; i++) {
+ nodeid_t nid = make_nodeid(region, region_pos);
+ region_pos++;
+ if (region_pos == region_loc[region].length) {
+ region_pos = 0;
+ region++;
+ }
- for (block_t boff = 0; boff < region_loc[r].length; boff++) {
- nodeid_t nid = make_nodeid(r, boff);
-
- if (!clean.count(nid)) continue;
- debofs(20) << "ebofs.nodepool.read node " << nid << std::endl;
-
+ if (usemap_bits[i]) {
+ num_clean++;
bufferptr bp = buffer::create_page_aligned(EBOFS_NODE_BYTES);
- dev.read(region_loc[r].start + (block_t)boff, EBOFS_NODE_BLOCKS,
- bp);
+ dev.read((block_t)nid, EBOFS_NODE_BLOCKS, bp);
- Node *n = new Node(nid, bp, Node::STATE_CLEAN);
+ Node *n = new Node(nid, i, bp, Node::STATE_CLEAN);
node_map[nid] = n;
- debofs(10) << "ebofs.nodepool.read node " << n << " at " << (void*)n << std::endl;
+ debofs(10) << "ebofs.nodepool.read node " << nid << " at " << (void*)n << std::endl;
+
+ } else {
+ //debofs(-10) << "ebofs.nodepool.read node " << nid << " is free" << std::endl;
+ free.insert(nid);
+ num_free++;
}
}
+ debofs(10) << "ebofs.nodepool.read free is " << free.m << std::endl;
+ assert(num_dirty == 0);
+ assert(num_limbo == 0);
+ assert(num_clean + num_free == num_nodes);
+
return 0;
}
-
// **** non-blocking i/o ****
private:
else
loc = usemap_even;
- bufferptr bp = buffer::create_page_aligned(EBOFS_BLOCK_SIZE*loc.length);
-
- // fill in
- unsigned region = 0; // current region
- unsigned roff = 0; // offset in region
- for (unsigned byte = 0; byte<bp.length(); byte++) { // each byte
- int x = 0; // start with empty byte
- int mask = 0x80; // left-most bit
- for (unsigned bit=0; bit<8; bit++) {
- nodeid_t nid = make_nodeid(region, roff);
-
- if (clean.count(nid) ||
- dirty.count(nid))
- x |= mask;
-
- roff++;
- mask = mask >> 1;
- if (roff == region_loc[region].length) {
- // next region!
- roff = 0;
- region++;
- break;
- }
- }
-
- *(unsigned char*)(bp.c_str() + byte) = x;
- if (region == region_loc.size()) break;
- }
-
-
// write
bufferlist bl;
+ bufferptr bp = usemap_data.clone();
bl.append(bp);
dev.write(loc.start, loc.length, bl,
new C_NP_FlushUsemap(this), "usemap");
void flushed_node(nodeid_t nid) {
ebofs_lock.Lock();
-
- // mark nid clean|limbo
- if (tx.count(nid)) { // tx -> clean
- tx.erase(nid);
- clean.insert(nid);
-
- // make node itself clean
- node_map[nid]->set_state(Node::STATE_CLEAN);
- }
- else { // already limbo (was dirtied, or released)
- assert(limbo.count(nid));
- }
-
flushing--;
if (flushing == 0)
commit_cond.Signal();
public:
void commit_start(BlockDevice& dev, version_t version) {
- generic_dout(20) << "ebofs.nodepool.commit_start start" << dendl;
+ debofs(20) << "ebofs.nodepool.commit_start start" << std::endl;
assert(flushing == 0);
/*if (0)
flushing++;
write_usemap(dev, version & 1);
- // dirty -> tx (write to disk)
- assert(tx.empty());
- set<block_t> didb;
- for (set<nodeid_t>::iterator i = dirty.begin();
- i != dirty.end();
- i++) {
- Node *n = get_node(*i);
+ // dirty -> clean (write to disk)
+ while (!dirty_ls.empty()) {
+ Node *n = dirty_ls.front();
assert(n);
assert(n->is_dirty());
- n->set_state(Node::STATE_TX);
-
- unsigned region = nodeid_region(*i);
- block_t off = nodeid_offset(*i);
- block_t b = region_loc[region].start + off;
-
- if (0) { // sanity check debug FIXME
- assert(didb.count(b) == 0);
- didb.insert(b);
- }
+ n->set_state(Node::STATE_CLEAN);
+ dirty_ls.remove(&n->xlist);
+ num_dirty--;
+ num_clean++;
+ debofs(20) << "ebofs.nodepool.commit_start writing node " << n->get_id() << std::endl;
+
bufferlist bl;
bl.append(n->get_buffer());
- dev.write(b, EBOFS_NODE_BLOCKS,
+ dev.write(n->get_id(), EBOFS_NODE_BLOCKS,
bl,
- new C_NP_FlushNode(this, *i), "node");
+ new C_NP_FlushNode(this, n->get_id()), "node");
flushing++;
-
- tx.insert(*i);
}
- dirty.clear();
// limbo -> free
- for (set<nodeid_t>::iterator i = limbo.begin();
- i != limbo.end();
+ for (map<nodeid_t,nodeid_t>::iterator i = limbo.m.begin();
+ i != limbo.m.end();
i++) {
- free.insert(*i);
+ num_free += i->second;
+ num_limbo -= i->second;
+ free.insert(i->first, i->second);
}
limbo.clear();
return node_map[nid];
}
- // unopened node
- /* not implemented yet!!
- Node* open_node(nodeid_t nid) {
- Node *n = node_regions[ NodeRegion::nodeid_region(nid) ]->open_node(nid);
- dbtout << "pool.open_node " << n->get_id() << std::endl;
- node_map[n->get_id()] = n;
- return n;
- }
- */
-
// allocate id/block on disk. always free -> dirty.
nodeid_t alloc_id() {
// pick node id
assert(!free.empty());
- nodeid_t nid = *(free.begin());
+ nodeid_t nid = free.start();
free.erase(nid);
- dirty.insert(nid);
+ num_free--;
return nid;
}
// alloc node
bufferptr bp = buffer::create_page_aligned(EBOFS_NODE_BYTES);
- Node *n = new Node(nid, bp, Node::STATE_DIRTY);
+ bp.zero();
+ Node *n = new Node(nid, nodeid_pos_in_bitmap(nid), bp, Node::STATE_DIRTY);
n->set_type(type);
n->set_size(0);
+ usemap_bits.set(n->get_pos_in_bitmap());
+
+ n->set_state(Node::STATE_DIRTY);
+ dirty_ls.push_back(&n->xlist);
+ num_dirty++;
+
assert(node_map.count(nid) == 0);
node_map[nid] = n;
+
return n;
}
node_map.erase(nid);
if (n->is_dirty()) {
- assert(dirty.count(nid));
- dirty.erase(nid);
+ dirty_ls.remove(&n->xlist);
+ num_dirty--;
free.insert(nid);
+ num_free++;
+ usemap_bits.clear(n->get_pos_in_bitmap());
} else if (n->is_clean()) {
- assert(clean.count(nid));
- clean.erase(nid);
- limbo.insert(nid);
- } else if (n->is_tx()) {
- assert(tx.count(nid)); // i guess htis happens? -sage
- tx.erase(nid);
limbo.insert(nid);
+ num_limbo++;
+ num_clean--;
+ usemap_bits.clear(n->get_pos_in_bitmap());
}
delete n;
+ assert(num_clean + num_dirty + num_limbo + num_free == num_nodes);
}
void release_all() {
nodeid_t oldid = n->get_id();
nodeid_t newid = alloc_id();
debofs(15) << "ebofs.nodepool.dirty_node on " << oldid << " now " << newid << std::endl;
+
+ // dup data?
+ // this only does a memcpy if there are multiple references..
+ // i.e. if we are still writing the old data
+ n->do_cow();
// release old block
- if (n->is_clean()) {
- assert(clean.count(oldid));
- clean.erase(oldid);
- } else {
- assert(n->is_tx());
- assert(tx.count(oldid));
- tx.erase(oldid);
-
- // move/copy current -> shadow buffer as necessary
- n->make_shadow();
- }
+ assert(n->is_clean());
+ num_clean--;
limbo.insert(oldid);
+ num_limbo++;
+ usemap_bits.clear(n->get_pos_in_bitmap());
+
+ // rename node
node_map.erase(oldid);
-
- n->set_state(Node::STATE_DIRTY);
-
- // move to new one!
n->set_id(newid);
+ n->set_pos_in_bitmap(nodeid_pos_in_bitmap(newid));
node_map[newid] = n;
- }
-
+
+ // new block
+ n->set_state(Node::STATE_DIRTY);
+ dirty_ls.push_back(&n->xlist);
+ num_dirty++;
+ usemap_bits.set(n->get_pos_in_bitmap());
+
+ assert(num_clean + num_dirty + num_limbo + num_free == num_nodes);
+ }
};
case 0:
{
oid.rev = rand() % 10;
- cout << t << " read " << hex << oid << dec << " at " << off << " len " << len << endl;
+ cout << t << " read " << hex << oid << dec << " at " << off << " len " << len << std::endl;
bufferlist bl;
fs.read(oid, off, len, bl);
int l = MIN(len,bl.length());
if (l) {
- cout << t << " got " << l << endl;
+ cout << t << " got " << l << std::endl;
bl.copy(0, l, b);
char *p = b;
while (l--) {
case 1:
{
- cout << t << " write " << hex << oid << dec << " at " << off << " len " << len << endl;
+ cout << t << " write " << hex << oid << dec << " at " << off << " len " << len << std::endl;
for (int j=0;j<len;j++)
b[j] = (char)(oid.ino^(off+j));
bufferptr wp(b, len);
break;
case 2:
- cout << t << " remove " << hex << oid << dec << endl;
+ cout << t << " remove " << hex << oid << dec << std::endl;
fs.remove(oid);
break;
case 3:
- cout << t << " collection_add " << hex << oid << dec << " to " << cid << endl;
+ cout << t << " collection_add " << hex << oid << dec << " to " << cid << std::endl;
fs.collection_add(cid, oid, 0);
break;
case 4:
- cout << t << " collection_remove " << hex << oid << dec << " from " << cid << endl;
+ cout << t << " collection_remove " << hex << oid << dec << " from " << cid << std::endl;
fs.collection_remove(cid, oid, 0);
break;
case 5:
- cout << t << " setattr " << hex << oid << dec << " " << a << " len " << l << endl;
+ cout << t << " setattr " << hex << oid << dec << " " << a << " len " << l << std::endl;
fs.setattr(oid, a, (void*)a, l, 0);
break;
case 6:
- cout << t << " rmattr " << hex << oid << dec << " " << a << endl;
+ cout << t << " rmattr " << hex << oid << dec << " " << a << std::endl;
fs.rmattr(oid,a);
break;
case 7:
{
char v[4];
- cout << t << " getattr " << hex << oid << dec << " " << a << endl;
+ cout << t << " getattr " << hex << oid << dec << " " << a << std::endl;
if (fs.getattr(oid,a,(void*)v,3) == 0) {
v[3] = 0;
assert(strcmp(v,a) == 0);
case 8:
{
- cout << t << " truncate " << hex << oid << dec << " " << off << endl;
+ cout << t << " truncate " << hex << oid << dec << " " << off << std::endl;
fs.truncate(oid, 0);
}
break;
{
object_t newoid = oid;
newoid.rev = rand() % 10;
- cout << t << " clone " << oid << " to " << newoid << endl;
+ cout << t << " clone " << oid << " to " << newoid << std::endl;
fs.clone(oid, newoid, 0);
}
}
}
- cout << t << " done" << endl;
+ cout << t << " done" << std::endl;
return 0;
}
};
int threads = atoi(args[2]);
if (!threads) threads = 1;
- cout << "dev " << filename << " .. " << threads << " threads .. " << seconds << " seconds" << endl;
+ cout << "dev " << filename << " .. " << threads << " threads .. " << seconds << " seconds" << std::endl;
Ebofs fs(filename);
if (fs.mount() < 0) return -1;
utime_t now = g_clock.now();
utime_t dur(seconds,0);
utime_t end = now + dur;
- cout << "stop at " << end << endl;
+ cout << "stop at " << end << std::endl;
while (now < end) {
sleep(1);
now = g_clock.now();
- cout << now << endl;
+ cout << now << std::endl;
}
- cout << "stopping" << endl;
+ cout << "stopping" << std::endl;
stop = true;
while (!ls.empty()) {
// tree/set nodes
-typedef int nodeid_t;
+//typedef int nodeid_t;
+typedef int64_t nodeid_t; // actually, a block number. FIXME.
-static const int EBOFS_NODE_BLOCKS = 1;
-static const int EBOFS_NODE_BYTES = EBOFS_NODE_BLOCKS * EBOFS_BLOCK_SIZE;
-static const int EBOFS_MAX_NODE_REGIONS = 10; // pick a better value!
+static const unsigned EBOFS_NODE_BLOCKS = 1;
+static const unsigned EBOFS_NODE_BYTES = EBOFS_NODE_BLOCKS * EBOFS_BLOCK_SIZE;
+static const unsigned EBOFS_MAX_NODE_REGIONS = 10; // pick a better value!
struct ebofs_nodepool {
Extent node_usemap_even; // for even sb versions
*
*/
-
#ifndef __BITMAPPER_H
#define __BITMAPPER_H
class bitmapper {
char *_data;
+ int _len;
public:
- bitmapper(char *data) : _data(data) { }
+ bitmapper() : _data(0), _len(0) { }
+ bitmapper(char *data, int len) : _data(data), _len(len) { }
+
+ void set_data(char *data, int len) { _data = data; _len = len; }
- bool operator[](int b) {
+ int bytes() const { return _len; }
+ int bits() const { return _len * 8; }
+
+ bool operator[](int b) const {
+ return get(b);
+ }
+ bool get(int b) const {
return _data[b >> 3] & (1 << (b&7));
}
void set(int b) {
~ptr() {
release();
}
+
+ raw *clone() {
+ return _raw->clone();
+ }
+
+ void do_cow() {
+ if (_raw->nref != 1) {
+ std::cout << "doing cow on " << _raw << " len " << _len << std::endl;
+ _raw = _raw->clone();
+ }
+ }
void swap(ptr& other) {
raw *r = _raw;
#ifndef __XLIST_H
#define __XLIST_H
-/*
-class xlist_head;
-
-class xlist_item {
- private:
- xlist_item *_prev, *_next;
- xlist_head *_head;
- friend class xlist_head;
-
- public:
- xlist_item() : _prev(0), _next(0), _head(0) {}
- xlist_head* _get_containing_xlist() { return _head; }
-};
-
-class xlist_head {
- private:
- xlist_item *_front, *_back;
- int _size;
-
- friend class xlist_item;
-
- public:
- int size() { return _size; }
- bool empty() { return _front == 0; }
-
- void push_back(xlist_item *item) {
- if (item->_head) item->_head->remove(item);
-
- item->_head = this;
- item->_next = 0;
- item->_prev = _back;
- if (_back) _back->_next = item;
- _back = item;
- _size++;
- }
- void remove(xlist_item *item) {
- assert(item->_head == this);
-
- if (item->_prev)
- item->_prev->_next = item->_next;
- else
- _front = item->_next;
- if (item->_next)
- item->_next->_prev = item->_prev;
- else
- _back = item->_prev;
- _size--;
-
- item->_head = 0;
- item->_next = item->_prev = 0;
- }
-
-};
-*/
-
-
-
template<typename T>
class xlist {
public:
int _size;
public:
+ xlist() : _front(0), _back(0), _size(0) {}
+
int size() { return _size; }
bool empty() { return _front == 0; }