From 242449ae20c244fd7d92b89fbc019702b86c5bf2 Mon Sep 17 00:00:00 2001 From: sage Date: Thu, 15 Dec 2005 22:01:26 +0000 Subject: [PATCH] working pretty well! git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@525 29311d96-e01e-0410-9327-a35deaab8ce9 --- ceph/common/Cond.h | 7 +- ceph/config.cc | 7 +- ceph/config.h | 3 + ceph/ebofs/AlignedBufferPool.h | 28 +++++- ceph/ebofs/Allocator.cc | 80 +++++++++------ ceph/ebofs/Allocator.h | 6 +- ceph/ebofs/BlockDevice.cc | 45 +++++---- ceph/ebofs/BlockDevice.h | 2 + ceph/ebofs/BufferCache.cc | 171 +++++++++++++++++++------------- ceph/ebofs/BufferCache.h | 55 +++++++++-- ceph/ebofs/Ebofs.cc | 120 ++++++++++++++++------ ceph/ebofs/Ebofs.h | 19 +++- ceph/ebofs/Onode.h | 19 +++- ceph/ebofs/Table.h | 18 ++-- ceph/ebofs/mkfs.ebofs.cc | 50 +++++++--- ceph/ebofs/nodes.h | 1 - ceph/ebofs/types.h | 8 +- ceph/include/interval_set.h | 175 ++++++++++++++++++++++++--------- 18 files changed, 572 insertions(+), 242 deletions(-) diff --git a/ceph/common/Cond.h b/ceph/common/Cond.h index 3a376108d5a4c..eaa40db97398f 100644 --- a/ceph/common/Cond.h +++ b/ceph/common/Cond.h @@ -44,11 +44,16 @@ class Cond return Wait(mutex, utime_t(tv->tv_sec, tv->tv_usec)); } int Wait(Mutex &mutex, - utime_t when) { + utime_t wait) { + utime_t when = g_clock.now(); + when += wait; + // timeval -> timespec struct timespec ts; + memset(&ts, 0, sizeof(ts)); ts.tv_sec = when.sec(); ts.tv_nsec = when.nsec(); + //cout << "timedwait for " << ts.tv_sec << " sec " << ts.tv_nsec << " nsec" << endl; int r = pthread_cond_timedwait(&C, &mutex.M, &ts); return r; } diff --git a/ceph/config.cc b/ceph/config.cc index 8b818095d754e..7d03cf6f012cd 100644 --- a/ceph/config.cc +++ b/ceph/config.cc @@ -50,13 +50,14 @@ md_config_t g_conf = { fake_osdmap_expand: 0, fake_osd_sync: true, - debug: 10, + debug: 30, debug_mds_balancer: 1, debug_mds_log: 1, debug_buffer: 0, debug_filer: 0, debug_client: 0, debug_osd: 0, + debug_bdev: 1, // block device // --- client --- client_cache_size: 300, @@ -112,6 +113,8 @@ md_config_t g_conf = { osd_fakestore_syncthreads: 4, + ebofs_bc_size: 100, // measured in 4k blocks + // --- fakeclient (mds regression testing) --- num_fakeclient: 100, @@ -201,6 +204,8 @@ void parse_config_options(vector& args) g_conf.debug_client = atoi(args[++i]); else if (strcmp(args[i], "--debug_osd") == 0) g_conf.debug_osd = atoi(args[++i]); + else if (strcmp(args[i], "--debug_bdev") == 0) + g_conf.debug_bdev = atoi(args[++i]); else if (strcmp(args[i], "--log") == 0) g_conf.log = atoi(args[++i]); diff --git a/ceph/config.h b/ceph/config.h index 66e8a5cd8d48b..1fdc64fd231df 100644 --- a/ceph/config.h +++ b/ceph/config.h @@ -35,6 +35,7 @@ struct md_config_t { int debug_filer; int debug_client; int debug_osd; + int debug_bdev; // client int client_cache_size; @@ -87,6 +88,8 @@ struct md_config_t { int osd_fakestore_syncthreads; + off_t ebofs_bc_size; + // fake client int num_fakeclient; unsigned fakeclient_requests; diff --git a/ceph/ebofs/AlignedBufferPool.h b/ceph/ebofs/AlignedBufferPool.h index 3347bd7abe879..ab099fcefc965 100644 --- a/ceph/ebofs/AlignedBufferPool.h +++ b/ceph/ebofs/AlignedBufferPool.h @@ -6,6 +6,12 @@ #include using namespace std; +// for posix_memalign +#define _XOPEN_SOURCE 600 +#include +#include + +// for mmap #include #include "include/buffer.h" @@ -13,17 +19,23 @@ using namespace std; + class AlignedBufferPool { int alignment; // err, this isn't actually enforced! we just use mmap. + bool dommap; + public: - AlignedBufferPool(int a) : alignment(a) {} + AlignedBufferPool(int a) : alignment(a), dommap(false) {} ~AlignedBufferPool() { } void free(char *p, unsigned len) { - dout(10) << "bufferpool.free " << (void*)p << " len " << len << endl; - munmap(p, len); + dout(30) << "bufferpool.free " << (void*)p << " len " << len << endl; + if (dommap) + munmap(p, len); + else + ::free((void*)p); } static void aligned_buffer_free_func(void *arg, char *ptr, unsigned len) { @@ -33,10 +45,16 @@ class AlignedBufferPool { buffer *alloc(int bytes) { assert(bytes % alignment == 0); - char *p = (char*)mmap(NULL, bytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + char *p = 0; + if (dommap) + p = (char*)mmap(NULL, bytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + else + ::posix_memalign((void**)&p, alignment, bytes); assert(p); + + ::memset(p, 0, bytes); // only to shut up valgrind - dout(10) << "bufferpool.alloc " << (void*)p << endl; + dout(30) << "bufferpool.alloc " << (void*)p << endl; return new buffer(p, bytes, BUFFER_MODE_NOCOPY|BUFFER_MODE_NOFREE|BUFFER_MODE_CUSTOMFREE, bytes, diff --git a/ceph/ebofs/Allocator.cc b/ceph/ebofs/Allocator.cc index 69bd76466dae7..daf8f10cbcdfc 100644 --- a/ceph/ebofs/Allocator.cc +++ b/ceph/ebofs/Allocator.cc @@ -9,19 +9,34 @@ void Allocator::dump_freelist() { - if (0) - for (int b=0; bfree_tab[b]->get_num_keys() > 0) { - Table::Cursor cursor(fs->free_tab[b]); - fs->free_tab[b]->find(0, cursor); - while (1) { - dout(20) << "dump ex " << cursor.current().key << "~" << cursor.current().value << endl; - if (cursor.move_right() <= 0) break; + if (1) { + interval_set free; // validate too + + for (int b=0; b<=EBOFS_NUM_FREE_BUCKETS; b++) { + Table *tab; + if (b < EBOFS_NUM_FREE_BUCKETS) { + dout(30) << "dump bucket " << b << endl; + tab = fs->free_tab[b]; + } else { + dout(30) << "dump limbo" << endl; + tab = fs->limbo_tab; + } + + if (tab->get_num_keys() > 0) { + Table::Cursor cursor(tab); + tab->find(0, cursor); + while (1) { + dout(30) << "dump ex " << cursor.current().key << "~" << cursor.current().value << endl; + assert(!free.contains( cursor.current().key, cursor.current().value )); + free.insert( cursor.current().key, cursor.current().value ); + if (cursor.move_right() <= 0) break; + } + } else { + //cout << " empty" << endl; } - } else { - //cout << " empty" << endl; } + + dout(31) << "dump combined freelist is " << free << endl; } } @@ -37,7 +52,7 @@ int Allocator::find(Extent& ex, int bucket, block_t num, block_t near) do { if (cursor.current().value >= num) found = true; - } while (!found && cursor.move_right() >= 0); + } while (!found && cursor.move_right() > 0); } if (!found) { @@ -60,6 +75,8 @@ int Allocator::find(Extent& ex, int bucket, block_t num, block_t near) int Allocator::allocate(Extent& ex, block_t num, block_t near) { + dump_freelist(); + /* if (!near) { near = num/2; // this is totally wrong and stupid. @@ -75,7 +92,7 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) // remove original fs->free_tab[bucket]->remove( ex.start ); - fs->free_blocks += ex.length; + fs->free_blocks -= ex.length; if (ex.length > num) { if (ex.start < near) { @@ -88,7 +105,7 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) ex.start += left.length; ex.length -= left.length; assert(ex.length == num); - release_now(left); + _release(left); } else { // take middle part. Extent left,right; @@ -98,8 +115,8 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) right.start = ex.start + num; right.length = ex.length - left.length - num; ex.length = num; - release_now(left); - release_now(right); + _release(left); + _release(right); } } else { @@ -108,7 +125,7 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) right.start = ex.start + num; right.length = ex.length - num; ex.length = num; - release_now(right); + _release(right); } } @@ -141,26 +158,35 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) int Allocator::release(Extent& ex) { dout(10) << "release " << ex << " (into limbo)" << endl; - limbo.insert(ex.start, ex.length); + fs->limbo_tab->insert(ex.start, ex.length); + fs->limbo_blocks += ex.length; return 0; } int Allocator::release_limbo() { - for (map::iterator i = limbo.m.begin(); - i != limbo.m.end(); - i++) { - Extent ex(i->first, i->second); - release_now(ex); + if (fs->limbo_tab->get_num_keys() > 0) { + Table::Cursor cursor(fs->limbo_tab); + fs->limbo_tab->find(0, cursor); + while (1) { + Extent ex(cursor.current().key, cursor.current().value); + dout(20) << "release_limbo ex " << ex << endl; + _release(ex); + if (cursor.move_right() <= 0) break; + } } + fs->limbo_tab->clear(); + fs->limbo_blocks = 0; return 0; } -int Allocator::release_now(Extent& ex) +int Allocator::_release(Extent& ex) { Extent newex = ex; - dout(10) << "release " << ex << endl; + dout(15) << "_release " << ex << endl; + + fs->free_blocks += ex.length; // one after us? for (int b=0; bfree_blocks += ex.length; - // ok, insert newex int b = pick_bucket(ex.length); fs->free_tab[b]->insert(ex.start, ex.length); - - dump_freelist(); return 0; } diff --git a/ceph/ebofs/Allocator.h b/ceph/ebofs/Allocator.h index 02473739906ca..6f172d08f8d26 100644 --- a/ceph/ebofs/Allocator.h +++ b/ceph/ebofs/Allocator.h @@ -11,7 +11,7 @@ class Allocator { protected: Ebofs *fs; - interval_set limbo; + //interval_set limbo; static int pick_bucket(block_t num) { int b = 0; @@ -28,13 +28,13 @@ class Allocator { void dump_freelist(); + int _release(Extent& ex); + public: Allocator(Ebofs *f) : fs(f) {} int allocate(Extent& ex, block_t num, block_t near=0); int release(Extent& ex); - int release_now(Extent& ex); - int release_limbo(); }; diff --git a/ceph/ebofs/BlockDevice.cc b/ceph/ebofs/BlockDevice.cc index 8bfea6a2424f8..b956be09d2e09 100644 --- a/ceph/ebofs/BlockDevice.cc +++ b/ceph/ebofs/BlockDevice.cc @@ -19,7 +19,7 @@ #include #undef dout -#define dout(x) if (x <= g_conf.debug) cout << "dev." +#define dout(x) if (x <= g_conf.debug_bdev) cout << "dev." block_t BlockDevice::get_num_blocks() @@ -165,14 +165,32 @@ void BlockDevice::do_io(list& biols) for (p = biols.begin(); p != biols.end(); p++) (*p)->rval = r; - // put in completion queue - complete_lock.Lock(); - complete_queue.splice( complete_queue.end(), biols ); - complete_wakeup.Signal(); - complete_lock.Unlock(); + if (1) { + // put in completion queue + complete_lock.Lock(); + complete_queue.splice( complete_queue.end(), biols ); + complete_wakeup.Signal(); + complete_lock.Unlock(); + } else { + // be slow and finish synchronously + for (p = biols.begin(); p != biols.end(); p++) + finish_io(*p); + } } +void BlockDevice::finish_io(biovec *bio) +{ + if (bio->cond) { + bio->cond->Signal(); + } + else if (bio->cb) { + bio->cb->finish((ioh_t)bio, bio->rval); + delete bio->cb; + delete bio; + } +} + int BlockDevice::complete_thread_entry() { complete_lock.Lock(); @@ -180,7 +198,7 @@ int BlockDevice::complete_thread_entry() while (!io_stop) { - if (!complete_queue.empty()) { + while (!complete_queue.empty()) { list ls; ls.swap(complete_queue); @@ -192,20 +210,12 @@ int BlockDevice::complete_thread_entry() p++) { biovec *bio = *p; dout(20) << "complete_thread finishing " << (void*)bio << endl; - if (bio->cond) { - bio->cond->Signal(); - } - else if (bio->cb) { - bio->cb->finish((ioh_t)bio, bio->rval); - delete bio->cb; - delete bio; - } + finish_io(bio); } complete_lock.Lock(); - - if (io_stop) break; } + if (io_stop) break; dout(25) << "complete_thread sleeping" << endl; complete_wakeup.Wait(complete_lock); @@ -341,7 +351,6 @@ int BlockDevice::_write(unsigned bno, unsigned num, bufferlist& bl) int BlockDevice::open() { - dout(1) << "open " << dev << endl; assert(fd == 0); fd = ::open(dev, O_CREAT|O_RDWR|O_SYNC|O_DIRECT); diff --git a/ceph/ebofs/BlockDevice.h b/ceph/ebofs/BlockDevice.h index 87be8860338f3..e3aca9d279bf3 100644 --- a/ceph/ebofs/BlockDevice.h +++ b/ceph/ebofs/BlockDevice.h @@ -75,6 +75,8 @@ class BlockDevice { Cond complete_wakeup; list complete_queue; + void finish_io(biovec *bio); + // complete thread int complete_thread_entry(); class CompleteThread : public Thread { diff --git a/ceph/ebofs/BufferCache.cc b/ceph/ebofs/BufferCache.cc index bf3663bf669d8..9c6245f83f152 100644 --- a/ceph/ebofs/BufferCache.cc +++ b/ceph/ebofs/BufferCache.cc @@ -60,7 +60,6 @@ void BufferHead::queue_partial_write(block_t b) #define dout(x) if (x <= g_conf.debug) cout << "ebofs.oc." - void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length) { list waiters; @@ -71,41 +70,51 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length) for (map::iterator p = data.lower_bound(start); p != data.end(); p++) { - dout(10) << "rx_finish ?" << *p->second << endl; + BufferHead *bh = p->second; + dout(10) << "rx_finish ?" << *bh << endl; + assert(p->first == bh->start()); // past? if (p->first >= start+length) break; - if (p->second->end() > start+length) break; // past + if (bh->end() > start+length) break; // past assert(p->first >= start); - assert(p->second->end() <= start+length); + assert(bh->end() <= start+length); - dout(10) << "rx_finish !" << *p->second << endl; + dout(10) << "rx_finish !" << *bh << endl; - if (p->second->rx_ioh == ioh) - p->second->rx_ioh = 0; + if (bh->rx_ioh == ioh) + bh->rx_ioh = 0; - if (p->second->is_partial_writes()) - p->second->finish_partials(); + if (bh->is_partial_writes()) + bh->finish_partials(); - if (p->second->is_rx()) { - assert(p->second->get_version() == 0); - assert(p->second->end() <= start+length); - dout(10) << "rx_finish rx -> clean on " << *p->second << endl; - bc->mark_clean(p->second); + if (bh->is_rx()) { + assert(bh->get_version() == 0); + assert(bh->end() <= start+length); + dout(10) << "rx_finish rx -> clean on " << *bh << endl; + bc->mark_clean(bh); } - else if (p->second->is_partial()) { - dout(10) << "rx_finish partial -> clean on " << *p->second << endl; - p->second->apply_partial(); - bc->mark_clean(p->second); + else if (bh->is_partial()) { + dout(10) << "rx_finish partial -> clean on " << *bh << endl; + bh->apply_partial(); + bc->mark_clean(bh); } else { - dout(10) << "rx_finish ignoring status on (dirty|tx) " << *p->second << endl; - assert(p->second->is_dirty() || p->second->is_tx()); + dout(10) << "rx_finish ignoring status on (dirty|tx|clean) " << *bh << endl; + assert(bh->is_dirty() || // was overwritten + bh->is_tx() || // was overwritten and queued + bh->is_clean()); // was overwritten, queued, _and_ flushed to disk } // trigger waiters - waiters.splice(waiters.begin(), p->second->waitfor_read); + for (map >::iterator p = bh->waitfor_read.begin(); + p != bh->waitfor_read.end(); + p++) { + assert(p->first >= bh->start() && p->first < bh->end()); + waiters.splice(waiters.begin(), p->second); + } + bh->waitfor_read.clear(); } finish_contexts(waiters); @@ -117,7 +126,7 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length) void ObjectCache::tx_finish(ioh_t ioh, block_t start, block_t length, version_t version, version_t epoch) { - list waiters; + //list waiters; bc->ebofs_lock.Lock(); @@ -125,37 +134,38 @@ void ObjectCache::tx_finish(ioh_t ioh, block_t start, block_t length, for (map::iterator p = data.lower_bound(start); p != data.end(); p++) { - dout(20) << "tx_finish ?bh " << *p->second << endl; - assert(p->first == p->second->start()); + BufferHead *bh = p->second; + dout(30) << "tx_finish ?bh " << *bh << endl; + assert(p->first == bh->start()); // past? if (p->first >= start+length) break; - if (p->second->tx_ioh == ioh) - p->second->tx_ioh = 0; + if (bh->tx_ioh == ioh) + bh->tx_ioh = 0; - if (!p->second->is_tx()) { + if (!bh->is_tx()) { dout(10) << "tx_finish bh not marked tx, skipping" << endl; continue; } - assert(p->second->is_tx()); - assert(p->second->end() <= start+length); + assert(bh->is_tx()); + assert(bh->end() <= start+length); - if (version == p->second->version) { - dout(10) << "tx_finish tx -> clean on " << *p->second << endl; - p->second->set_last_flushed(version); - bc->mark_clean(p->second); + if (version == bh->version) { + dout(10) << "tx_finish tx -> clean on " << *bh << endl; + bh->set_last_flushed(version); + bc->mark_clean(bh); // trigger waiters - waiters.splice(waiters.begin(), p->second->waitfor_flush); + //waiters.splice(waiters.begin(), bh->waitfor_flush); } else { - dout(10) << "tx_finish leaving tx, " << p->second->version << " > " << version - << " on " << *p->second << endl; + dout(10) << "tx_finish leaving tx, " << bh->version << " > " << version + << " on " << *bh << endl; } } - finish_contexts(waiters); + //finish_contexts(waiters); // update unflushed counter assert(bc->get_unflushed(epoch) > 0); @@ -211,10 +221,9 @@ int ObjectCache::map_read(Onode *on, on->map_extents(cur, left, exv); // we might consider some prefetch here. for (unsigned i=0; iadd_bh(n); n->set_start( cur ); n->set_length( exv[i].length ); - data[cur] = n; + bc->add_bh(n); missing[cur] = n; cur += exv[i].length; } @@ -257,10 +266,9 @@ int ObjectCache::map_read(Onode *on, for (unsigned i=0; iadd_bh(n); n->set_start( cur ); n->set_length( exv[i].length ); - data[cur] = n; + bc->add_bh(n); missing[cur] = n; cur += n->length(); left -= n->length(); @@ -291,6 +299,8 @@ int ObjectCache::map_write(Onode *on, map& hits) { map::iterator p = data.lower_bound(start); + + dout(10) << "map_write " << *on << " " << start << "~" << len << " ... alloc " << alloc << endl; // p->first >= start block_t cur = start; @@ -303,7 +313,9 @@ int ObjectCache::map_write(Onode *on, p++; // doesn't overlap. } - for (; left > 0; p++) { + //dump(); + + while (left > 0) { // max for this bh (bc of (re)alloc on disk) block_t max = left; bool newalloc = false; @@ -337,13 +349,17 @@ int ObjectCache::map_write(Onode *on, // at end? if (p == data.end()) { BufferHead *n = new BufferHead(this); - bc->add_bh(n); n->set_start( cur ); n->set_length( max ); - data[cur] = n; + bc->add_bh(n); hits[cur] = n; - break; + left -= max; + cur += max; + continue; } + + dout(10) << "p is " << *p->second << endl; + if (p->first <= cur) { BufferHead *bh = p->second; @@ -416,23 +432,19 @@ int ObjectCache::map_write(Onode *on, if (bh->start() < cur) lenfromcur -= cur - bh->start(); - if (lenfromcur < left) { - cur += lenfromcur; - left -= lenfromcur; - continue; // more! - } else { - break; // done. - } + cur += lenfromcur; + left -= lenfromcur; + p++; + continue; } else { // gap! block_t next = p->first; block_t glen = MIN(next-cur, max); dout(10) << "map_write gap " << cur << "~" << glen << endl; BufferHead *n = new BufferHead(this); - bc->add_bh(n); n->set_start( cur ); n->set_length( glen ); - data[cur] = n; + bc->add_bh(n); hits[cur] = n; cur += glen; @@ -441,6 +453,8 @@ int ObjectCache::map_write(Onode *on, } } + assert(left == 0); + assert(cur == start+len); return 0; } @@ -487,8 +501,12 @@ void ObjectCache::tear_down() if (bh->is_rx()) bc->bh_cancel_read(bh); - finish_contexts(bh->waitfor_read, -1); - finish_contexts(bh->waitfor_flush, -1); + for (map >::iterator p = bh->waitfor_read.begin(); + p != bh->waitfor_read.end(); + p++) { + finish_contexts(p->second, -1); + } + //finish_contexts(bh->waitfor_flush, -1); delete bh; } @@ -508,23 +526,23 @@ BufferHead *BufferCache::split(BufferHead *orig, block_t after) { dout(20) << "split " << *orig << " at " << after << endl; + // split off right BufferHead *right = new BufferHead(orig->get_oc()); - orig->get_oc()->add_bh(right, after); - right->set_version(orig->get_version()); right->set_state(orig->get_state()); - - block_t mynewlen = after - orig->start(); + block_t newleftlen = after - orig->start(); right->set_start( after ); - right->set_length( orig->length() - mynewlen ); - - add_bh(right); + right->set_length( orig->length() - newleftlen ); + // shorten left stat_sub(orig); - orig->set_length( mynewlen ); + orig->set_length( newleftlen ); stat_add(orig); - // buffers! + // add right + add_bh(right); + + // split buffers too bufferlist bl; bl.claim(orig->data); if (bl.length()) { @@ -533,12 +551,22 @@ BufferHead *BufferCache::split(BufferHead *orig, block_t after) orig->data.substr_of(bl, 0, orig->length()*EBOFS_BLOCK_SIZE); } - // FIXME: waiters? + // move read waiters + if (!orig->waitfor_read.empty()) { + map >::iterator o, p = orig->waitfor_read.end(); + p--; + while (p != orig->waitfor_read.begin()) { + if (p->first < right->start()) break; + dout(20) << "split moving waiters at block " << p->first << " to right bh" << endl; + right->waitfor_read[p->first].swap( p->second ); + o = p; + p--; + orig->waitfor_read.erase(o); + } + } dout(20) << "split left is " << *orig << endl; dout(20) << "split right is " << *right << endl; - - return right; } @@ -627,7 +655,6 @@ bool BufferCache::bh_cancel_write(BufferHead *bh) void BufferCache::bh_queue_partial_write(Onode *on, BufferHead *bh) { - dout(10) << "bh_queue_partial_write " << *on << " on " << *bh << endl; assert(bh->get_version() > 0); assert(bh->is_partial()); @@ -640,7 +667,13 @@ void BufferCache::bh_queue_partial_write(Onode *on, BufferHead *bh) block_t b = exv[0].start; assert(exv[0].length == 1); + dout(10) << "bh_queue_partial_write " << *on << " on " << *bh << " block " << b << endl; + + // copy map state, queue for this block bh->queue_partial_write( b ); } + + + diff --git a/ceph/ebofs/BufferCache.h b/ceph/ebofs/BufferCache.h index 23fda4b8ac407..17189b40357c5 100644 --- a/ceph/ebofs/BufferCache.h +++ b/ceph/ebofs/BufferCache.h @@ -44,8 +44,8 @@ class BufferHead : public LRUObject { ioh_t rx_ioh; // ioh_t tx_ioh; // - list waitfor_read; - list waitfor_flush; + map< block_t, list > waitfor_read; + //list waitfor_flush; private: map partial; // partial dirty content overlayed onto incoming data @@ -272,14 +272,15 @@ class BufferHead : public LRUObject { inline ostream& operator<<(ostream& out, BufferHead& bh) { out << "bufferhead(" << bh.start() << "~" << bh.length(); - out << " v" << bh.get_version() << "/" << bh.get_last_flushed(); + //out << " v" << bh.get_version() << "/" << bh.get_last_flushed(); if (bh.is_missing()) out << " missing"; if (bh.is_dirty()) out << " dirty"; if (bh.is_clean()) out << " clean"; if (bh.is_rx()) out << " rx"; if (bh.is_tx()) out << " tx"; if (bh.is_partial()) out << " partial"; - out << " " << bh.data.length(); + //out << " " << bh.data.length(); + //out << " " << &bh; out << ")"; return out; } @@ -298,8 +299,28 @@ class ObjectCache { object_t get_object_id() { return object_id; } - void add_bh(BufferHead *bh, block_t at) { - data[at] = bh; + void add_bh(BufferHead *bh) { + // add to my map + assert(data.count(bh->start()) == 0); + + if (1) { // sanity check FIXME DEBUG + //cout << "add_bh " << bh->start() << "~" << bh->length() << endl; + map::iterator p = data.lower_bound(bh->start()); + if (p != data.end()) { + //cout << " after " << *p->second << endl; + //cout << " after starts at " << p->first << endl; + assert(p->first >= bh->end()); + } + if (p != data.begin()) { + p--; + //cout << " before starts at " << p->second->start() + //<< " and ends at " << p->second->end() << endl; + //cout << " before " << *p->second << endl; + assert(p->second->end() <= bh->start()); + } + } + + data[bh->start()] = bh; } void remove_bh(BufferHead *bh) { assert(data.count(bh->start())); @@ -328,6 +349,13 @@ class ObjectCache { void tx_finish(ioh_t ioh, block_t start, block_t length, version_t v, version_t epoch); void partial_tx_finish(version_t epoch); + void dump() { + for (map::iterator i = data.begin(); + i != data.end(); + i++) + cout << "dump: " << i->first << ": " << *i->second << endl; + } + void tear_down(); }; @@ -397,8 +425,18 @@ class BufferCache { stat_clean(0), stat_dirty(0), stat_rx(0), stat_tx(0), stat_partial(0), stat_missing(0) {} + + off_t get_size() { + return stat_clean+stat_dirty+stat_rx+stat_tx+stat_partial; + } + off_t get_trimmable() { + return stat_clean; + } + + // bh's in cache void add_bh(BufferHead *bh) { + bh->get_oc()->add_bh(bh); if (bh->is_dirty()) { lru_dirty.lru_insert_mid(bh); dirty_bh.insert(bh); @@ -413,6 +451,7 @@ class BufferCache { lru_rest.lru_touch(bh); } void remove_bh(BufferHead *bh) { + bh->get_oc()->remove_bh(bh); stat_sub(bh); if (bh->is_dirty()) { lru_dirty.lru_remove(bh); @@ -514,10 +553,8 @@ class BufferCache { friend class C_E_FlushPartial; - + // bh fun BufferHead *split(BufferHead *orig, block_t after); - - }; diff --git a/ceph/ebofs/Ebofs.cc b/ceph/ebofs/Ebofs.cc index 5bcfb9c2d66cf..5d575ac154805 100644 --- a/ceph/ebofs/Ebofs.cc +++ b/ceph/ebofs/Ebofs.cc @@ -37,6 +37,9 @@ int Ebofs::mount() dout(3) << "mount epoch " << super_epoch << endl; assert(super_epoch == sb->epoch); + free_blocks = sb->free_blocks; + limbo_blocks = sb->limbo_blocks; + // init node pools dout(3) << "mount nodepool" << endl; nodepool.init( &sb->nodepool ); @@ -48,11 +51,14 @@ int Ebofs::mount() object_tab = new Table( nodepool, sb->object_tab ); for (int i=0; i( nodepool, sb->free_tab[i] ); + limbo_tab = new Table( nodepool, sb->limbo_tab ); collection_tab = new Table( nodepool, sb->collection_tab ); oc_tab = new Table( nodepool, sb->oc_tab ); co_tab = new Table( nodepool, sb->co_tab ); - + + allocator.release_limbo(); + dout(3) << "mount starting commit thread" << endl; commit_thread.create(); @@ -74,7 +80,7 @@ int Ebofs::mkfs() // create first noderegion Extent nr; nr.start = 2; - nr.length = num_blocks / 10000; + nr.length = num_blocks / 100; if (nr.length < 10) nr.length = 10; nodepool.add_region(nr); dout(1) << "mkfs: first node region at " << nr << endl; @@ -99,6 +105,7 @@ int Ebofs::mkfs() for (int i=0; i( nodepool, empty ); + limbo_tab = new Table( nodepool, empty ); oc_tab = new Table( nodepool, empty ); co_tab = new Table( nodepool, empty ); @@ -107,8 +114,9 @@ int Ebofs::mkfs() Extent left; left.start = nodepool.usemap_odd.end(); left.length = num_blocks - left.start; - dout(1) << "mkfs: free blocks at " << left << endl; - allocator.release_now( left ); + dout(1) << "mkfs: free data blocks at " << left << endl; + allocator.release( left ); + allocator.release_limbo(); // write nodes, super, 2x dout(1) << "mkfs: flushing nodepool and superblocks (2x)" << endl; @@ -141,6 +149,7 @@ void Ebofs::close_tables() delete object_tab; for (int i=0; iget_root(); sb.free_tab[i].depth = free_tab[i]->get_depth(); } + sb.limbo_tab.num_keys = limbo_tab->get_num_keys(); + sb.limbo_tab.root = limbo_tab->get_root(); + sb.limbo_tab.depth = limbo_tab->get_depth(); sb.collection_tab.num_keys = collection_tab->get_num_keys(); sb.collection_tab.root = collection_tab->get_root(); @@ -240,14 +253,20 @@ int Ebofs::commit_thread_entry() ebofs_lock.Lock(); dout(10) << "commit_thread start" << endl; + assert(!commit_thread_started); // there can be only one commit_thread_started = true; sync_cond.Signal(); while (mounted) { - // wait - //commit_cond.Wait(ebofs_lock, utime_t(EBOFS_COMMIT_INTERVAL,0)); // wait for kick, or 10s. - commit_cond.Wait(ebofs_lock);//, utime_t(EBOFS_COMMIT_INTERVAL,0)); // wait for kick, or 10s. + // wait for kick, or timeout + if (EBOFS_COMMIT_INTERVAL) { + commit_cond.Wait(ebofs_lock, utime_t(EBOFS_COMMIT_INTERVAL,0)); + } else { + // DEBUG.. wait until kicked + dout(10) << "commit_thread no commit_interval, waiting until kicked" << endl; + commit_cond.Wait(ebofs_lock); + } if (unmounting) { dout(10) << "commit_thread unmounting: final commit pass" << endl; @@ -257,12 +276,12 @@ int Ebofs::commit_thread_entry() } super_epoch++; - dout(10) << "commit_thread commit start, new epoch is " << super_epoch << endl; + dout(10) << "commit_thread commit start, new epoch " << super_epoch + << ". " << get_free_blocks() << " free in " << get_free_extents() << ", " + << get_limbo_blocks() << " limbo in " << get_limbo_extents() << endl; // (async) write onodes+condes (do this first; it currently involves inode reallocation) commit_inodes_start(); - - allocator.release_limbo(); // (async) write btree nodes nodepool.commit_start( dev, super_epoch ); @@ -271,10 +290,10 @@ int Ebofs::commit_thread_entry() bufferptr superbp; prepare_super(super_epoch, superbp); - // wait it all to flush (drops global lock) + // wait for it all to flush (drops global lock) + commit_bc_wait(super_epoch-1); commit_inodes_wait(); nodepool.commit_wait(); - commit_bc_wait(super_epoch-1); // ok, now (synchronously) write the prior super! dout(10) << "commit_thread commit flushed, writing super for prior epoch" << endl; @@ -282,12 +301,18 @@ int Ebofs::commit_thread_entry() write_super(super_epoch, superbp); ebofs_lock.Lock(); + // free limbo space now (since we're done allocating things, AND we've flushed all previous epoch data) + allocator.release_limbo(); + // kick waiters dout(10) << "commit_thread kicking commit+sync waiters" << endl; - finish_contexts(commit_waiters[super_epoch], 0); - commit_waiters.erase(super_epoch); + finish_contexts(commit_waiters[super_epoch-1], 0); + commit_waiters.erase(super_epoch-1); sync_cond.Signal(); + // trim bc? + trim_bc(); + dout(10) << "commit_thread commit finish" << endl; } @@ -441,7 +466,7 @@ void Ebofs::remove_onode(Onode *on) while (!on->commit_waiters.empty()) { Context *c = on->commit_waiters.front(); on->commit_waiters.pop_front(); - commit_waiters[super_epoch+1].remove(c); // FIXME slow, O(n) + commit_waiters[super_epoch].remove(c); // FIXME slow, O(n) c->finish(-ENOENT); delete c; } @@ -700,34 +725,42 @@ void Ebofs::commit_inodes_wait() void Ebofs::trim_buffer_cache() { ebofs_lock.Lock(); - dout(10) << "trim_buffer_cache start: " - << bc.lru_rest.lru_get_size() << " rest + " - << bc.lru_dirty.lru_get_size() << " dirty " << endl; + trim_bc(); + ebofs_lock.Unlock(); +} + +void Ebofs::trim_bc() +{ + off_t max = g_conf.ebofs_bc_size; + dout(10) << "trim_bc start: size " << bc.get_size() << ", trimmable " << bc.get_trimmable() << ", max " << max << endl; - // trim trimmable bufferheads - while (bc.lru_rest.lru_get_size() > bc.lru_rest.lru_get_max()) { + while (bc.get_size() > max && + bc.get_trimmable()) { BufferHead *bh = (BufferHead*) bc.lru_rest.lru_expire(); if (!bh) break; - dout(10) << "trim_buffer_cache trimming " << *bh << endl; + dout(25) << "trim_bc trimming " << *bh << endl; assert(bh->is_clean()); ObjectCache *oc = bh->oc; - oc->remove_bh(bh); + bc.remove_bh(bh); delete bh; if (oc->is_empty()) { Onode *on = get_onode( oc->get_object_id() ); - dout(10) << "trim_buffer_cache closing oc on " << *on << endl; + dout(10) << "trim_bc closing oc on " << *on << endl; on->close_oc(); put_onode(on); } } + + dout(10) << "trim_bc finish: size " << bc.get_size() << ", trimmable " << bc.get_trimmable() << ", max " << max << endl; + + /* dout(10) << "trim_buffer_cache finish: " << bc.lru_rest.lru_get_size() << " rest + " << bc.lru_dirty.lru_get_size() << " dirty " << endl; - - ebofs_lock.Unlock(); + */ } @@ -784,21 +817,42 @@ void Ebofs::alloc_write(Onode *on, // first decide what pages to (re)allocate on->map_alloc_regions(start, len, alloc); - dout(10) << "alloc_write need to (re)alloc " << alloc << endl; + dout(10) << "alloc_write need to (re)alloc " << alloc << " on " << *on << endl; + if (alloc.empty()) return; + // merge alloc into onode uncommitted map - //dout(10) << " union of " << on->uncommitted << " and " << alloc << endl; + dout(10) << " union of " << on->uncommitted << " and " << alloc << endl; + interval_set old = on->uncommitted; on->uncommitted.union_of(alloc); + dout(10) << "alloc_write onode.uncommitted is now " << on->uncommitted << endl; + if (0) { + // verify + interval_set ta; + ta.intersection_of(on->uncommitted, alloc); + cout << " ta " << ta << endl; + assert(alloc == ta); + + interval_set tb; + tb.intersection_of(on->uncommitted, old); + cout << " tb " << tb << endl; + assert(old == tb); + } + + dirty_onode(on); + // allocate the space for (map::iterator i = alloc.m.begin(); i != alloc.m.end(); i++) { + dout(15) << "alloc_write need to (re)alloc " << i->first << "~" << i->second << endl; + // get old region vector old; on->map_extents(i->first, i->second, old); - for (unsigned o=0; osecond); } BufferHead *wait_on = missing.begin()->second; - wait_on->waitfor_read.push_back(new C_Cond(will_wait_on)); + block_t b = MAX(wait_on->start(), bstart); + wait_on->waitfor_read[b].push_back(new C_Cond(will_wait_on)); return false; } @@ -1088,7 +1143,7 @@ bool Ebofs::attempt_read(Onode *on, size_t len, off_t off, bufferlist& bl, Cond if (partials_ok) { // wait on this one dout(15) <<"attempt_read insufficient partial buffer " << *(i->second) << endl; - i->second->waitfor_read.push_back(new C_Cond(will_wait_on)); + i->second->waitfor_read[i->second->start()].push_back(new C_Cond(will_wait_on)); } partials_ok = false; } @@ -1099,7 +1154,8 @@ bool Ebofs::attempt_read(Onode *on, size_t len, off_t off, bufferlist& bl, Cond if (!rx.empty()) { BufferHead *wait_on = rx.begin()->second; dout(15) <<"attempt_read waiting for read to finish on " << *wait_on << endl; - wait_on->waitfor_read.push_back(new C_Cond(will_wait_on)); + block_t b = MAX(wait_on->start(), bstart); + wait_on->waitfor_read[b].push_back(new C_Cond(will_wait_on)); return false; } @@ -1247,7 +1303,7 @@ int Ebofs::write(object_t oid, if (onsafe) { // commit on next full fs commit. // FIXME when we add journaling. - commit_waiters[super_epoch+1].push_back(onsafe); + commit_waiters[super_epoch].push_back(onsafe); on->commit_waiters.push_back(onsafe); // in case we delete the object. } diff --git a/ceph/ebofs/Ebofs.h b/ceph/ebofs/Ebofs.h index 0749067f8cba4..439f34c9959df 100644 --- a/ceph/ebofs/Ebofs.h +++ b/ceph/ebofs/Ebofs.h @@ -26,7 +26,7 @@ inline ostream& operator<<(ostream& out, idpair_t oc) { } -const int EBOFS_COMMIT_INTERVAL = 3; // whatever +const int EBOFS_COMMIT_INTERVAL = 2; // 0 == never class Ebofs : public ObjectStore { @@ -60,10 +60,21 @@ class Ebofs : public ObjectStore { // ** allocator ** - block_t free_blocks; + block_t free_blocks, limbo_blocks; Allocator allocator; friend class Allocator; + block_t get_free_blocks() { return free_blocks; } + block_t get_limbo_blocks() { return limbo_blocks; } + block_t get_free_extents() { + int n = 0; + for (int i=0; iget_num_keys(); + return n; + } + block_t get_limbo_extents() { return limbo_tab->get_num_keys(); } + + // ** buffers ** AlignedBufferPool bufferpool; @@ -75,6 +86,7 @@ class Ebofs : public ObjectStore { // tables Table *object_tab; Table *free_tab[EBOFS_NUM_FREE_BUCKETS]; + Table *limbo_tab; // collections Table *collection_tab; @@ -125,6 +137,7 @@ class Ebofs : public ObjectStore { version_t trigger_commit(); void commit_bc_wait(version_t epoch); + void trim_bc(); public: void sync(); @@ -153,7 +166,7 @@ class Ebofs : public ObjectStore { free_blocks(0), allocator(this), bufferpool(EBOFS_BLOCK_SIZE), nodepool(ebofs_lock), - object_tab(0), collection_tab(0), oc_tab(0), co_tab(0), + object_tab(0), limbo_tab(0), collection_tab(0), oc_tab(0), co_tab(0), inodes_flushing(0), bc(dev, bufferpool, ebofs_lock) { for (int i=0; i attr; + map attr; vector extents; interval_set uncommitted; @@ -109,13 +109,26 @@ public: extents.push_back(ex); } void verify_extents() { - block_t count = 0;; + block_t count = 0; + interval_set is; + set s; for (unsigned i=0; i @@ -579,11 +579,17 @@ class Table { int remove(K key) { - if (almost_full()) return -1; + if (almost_full()) { + cout << "table almost full, failing" << endl; + assert(0); + return -1; + } Cursor cursor(this); - if (find(key, cursor) <= 0) + if (find(key, cursor) <= 0) { + assert(0); return -1; // key dne + } while (1) { @@ -693,16 +699,16 @@ class Table { } // hose myself - pool.release( node_loc ); + pool.release( node.node ); } void clear() { - int count = 0; Cursor cursor(this); if (root == -1 && depth == 0) return; // already empty! - int err = clear(cursor, root, 0); + clear(cursor, root, 0); root = -1; depth = 0; + nkeys = 0; } int verify(Cursor& cursor, int node_loc, int level, int& count) { diff --git a/ceph/ebofs/mkfs.ebofs.cc b/ceph/ebofs/mkfs.ebofs.cc index b4737a8af20b5..8990cd7b2d395 100644 --- a/ceph/ebofs/mkfs.ebofs.cc +++ b/ceph/ebofs/mkfs.ebofs.cc @@ -52,16 +52,34 @@ int main(int argc, char **argv) bufferlist bl; bl.append(crap, 10000); - // write - srand(0); - for (int i=0; i<100; i++) { - off_t off = rand() % 1000000; - size_t len = rand() % 10000; - cout << endl << "writing bit at " << off << " len " << len << endl; - fs.write(10, len, off, bl, (Context*)0); + // reandom write + if (0) { + srand(0); + for (int i=0; i<10000; i++) { + off_t off = rand() % 1000000; + size_t len = 1+rand() % 10000; + cout << endl << i << " writing bit at " << off << " len " << len << endl; + fs.write(10, len, off, bl, (Context*)0); + //fs.sync(); + //fs.trim_buffer_cache(); + } } if (1) { + // sequential write + srand(0); + off_t off = 0; + for (int i=0; i<10000; i++) { + size_t len = 1+rand() % 10000; + cout << endl << i << " writing bit at " << off << " len " << len << endl; + fs.write(10, len, off, bl, (Context*)0); + off += len; + } + + } + + + if (0) { // read srand(0); for (int i=0; i<100; i++) { @@ -80,7 +98,7 @@ int main(int argc, char **argv) fs.trim_buffer_cache(); //fs.trim_buffer_cache(); - if (1) { + if (0) { // read again srand(0); for (int i=0; i<100; i++) { @@ -98,13 +116,15 @@ int main(int argc, char **argv) fs.trim_buffer_cache(); } - // write on empty cache - srand(0); - for (int i=0; i<100; i++) { - off_t off = rand() % 1000000; - size_t len = 100; - cout << endl << "writing bit at " << off << " len " << len << endl; - fs.write(10, len, off, bl, (Context*)0); + if (0) { + // write on empty cache + srand(0); + for (int i=0; i<100; i++) { + off_t off = rand() % 1000000; + size_t len = 100; + cout << endl << "writing bit at " << off << " len " << len << endl; + fs.write(10, len, off, bl, (Context*)0); + } } } diff --git a/ceph/ebofs/nodes.h b/ceph/ebofs/nodes.h index d81677ba373cf..943fcd308a380 100644 --- a/ceph/ebofs/nodes.h +++ b/ceph/ebofs/nodes.h @@ -340,7 +340,6 @@ class NodePool { void flushed_node(nodeid_t nid) { ebofs_lock.Lock(); - assert(tx.count(nid)); // mark nid clean|limbo if (tx.count(nid)) { diff --git a/ceph/ebofs/types.h b/ceph/ebofs/types.h index 407eeb4eb0b7e..c2f3cba9699d5 100644 --- a/ceph/ebofs/types.h +++ b/ceph/ebofs/types.h @@ -147,15 +147,17 @@ struct ebofs_super { unsigned num_blocks; /* # blocks in filesystem */ - // basic stats, for kicks + // some basic stats, for kicks unsigned free_blocks; /* unused blocks */ - unsigned num_objects; - unsigned num_fragmented; + unsigned limbo_blocks; /* limbo blocks */ + //unsigned num_objects; + //unsigned num_fragmented; struct ebofs_nodepool nodepool; // tables struct ebofs_table free_tab[EBOFS_NUM_FREE_BUCKETS]; + struct ebofs_table limbo_tab; struct ebofs_table object_tab; // object directory struct ebofs_table collection_tab; // collection directory struct ebofs_table oc_tab; diff --git a/ceph/include/interval_set.h b/ceph/include/interval_set.h index ab4a72c19594e..d2b9345c00504 100644 --- a/ceph/include/interval_set.h +++ b/ceph/include/interval_set.h @@ -14,10 +14,21 @@ class interval_set { // helpers private: - typename map::iterator find_inc(T start) { + typename map::const_iterator find_inc(T start) const { + typename map::const_iterator p = m.lower_bound(start); // p->first >= start + if (p != m.begin() && + (p == m.end() || p->first > start)) { + p--; // might overlap? + if (p->first + p->second <= start) + p++; // it doesn't. + } + return p; + } + + typename map::iterator find_inc_m(T start) { typename map::iterator p = m.lower_bound(start); if (p != m.begin() && - (p->first > start || p == m.end())) { + (p == m.end() || p->first > start)) { p--; // might overlap? if (p->first + p->second <= start) p++; // it doesn't. @@ -25,10 +36,21 @@ class interval_set { return p; } - typename map::iterator find_adj(T start) { + typename map::const_iterator find_adj(T start) const { + typename map::const_iterator p = m.lower_bound(start); + if (p != m.begin() && + (p == m.end() || p->first > start)) { + p--; // might touch? + if (p->first + p->second < start) + p++; // it doesn't. + } + return p; + } + + typename map::iterator find_adj_m(T start) { typename map::iterator p = m.lower_bound(start); if (p != m.begin() && - (p->first > start || p == m.end())) { + (p == m.end() || p->first > start)) { p--; // might touch? if (p->first + p->second < start) p++; // it doesn't. @@ -37,21 +59,24 @@ class interval_set { } public: + bool operator==(const interval_set& other) const { + return m == other.m; + } void clear() { m.clear(); } - bool contains(T i) { - typename map::iterator p = find_inc(i); + bool contains(T i) const { + typename map::const_iterator p = find_inc(i); if (p == m.end()) return false; if (p->first > i) return false; if (p->first+p->second <= i) return false; assert(p->first <= i && p->first+p->second > i); return true; } - bool contains(T start, T len) { - typename map::iterator p = find_inc(start); + bool contains(T start, T len) const { + typename map::const_iterator p = find_inc(start); if (p == m.end()) return false; if (p->first > start) return false; if (p->first+p->second <= start) return false; @@ -61,47 +86,54 @@ class interval_set { } // outer range of set - bool empty() { + bool empty() const { return m.empty(); } - T start() { + T start() const { assert(!empty()); - typename map::iterator p = m.begin(); + typename map::const_iterator p = m.begin(); return p->first; } - T end() { + T end() const { assert(!empty()); - typename map::iterator p = m.end(); + typename map::const_iterator p = m.end(); p--; return p->first+p->second; } // interval start after p (where p not in set) - bool starts_after(T i) { + bool starts_after(T i) const { assert(!contains(i)); - typename map::iterator p = find_inc(i); + typename map::const_iterator p = find_inc(i); if (p == m.end()) return false; return true; } - T start_after(T i) { + T start_after(T i) const { assert(!contains(i)); - typename map::iterator p = find_inc(i); + typename map::const_iterator p = find_inc(i); return p->first; } // interval end that contains start - T end_after(T start) { + T end_after(T start) const { assert(contains(start)); - typename map::iterator p = find_inc(start); + typename map::const_iterator p = find_inc(start); return p->first+p->second; } void insert(T start, T len) { - typename map::iterator p = find_adj(start); + //cout << "insert " << start << "~" << len << endl; + typename map::iterator p = find_adj_m(start); if (p == m.end()) { m[start] = len; // new interval } else { if (p->first < start) { + + if (p->first + p->second != start) { + cout << "p is " << p->first << "~" << p->second << ", start is " << start << ", len is " << len << endl; + assert(0); + } + assert(p->first + p->second == start); p->second += len; // append to end @@ -124,7 +156,7 @@ class interval_set { } void erase(T start, T len) { - typename map::iterator p = find_inc(start); + typename map::iterator p = find_inc_m(start); assert(p != m.end()); assert(p->first <= start); @@ -142,24 +174,28 @@ class interval_set { } - void subtract(interval_set &a) { - for (typename map::iterator p = a.m.begin(); + void subtract(const interval_set &a) { + for (typename map::const_iterator p = a.m.begin(); p != a.m.end(); p++) erase(p->first, p->second); } - void insert(interval_set &a) { - for (typename map::iterator p = a.m.begin(); + void insert(const interval_set &a) { + for (typename map::const_iterator p = a.m.begin(); p != a.m.end(); p++) insert(p->first, p->second); } - void intersection_of(interval_set &a, interval_set &b) { - typename map::iterator pa = a.m.begin(); - typename map::iterator pb = b.m.begin(); + void intersection_of(const interval_set &a, const interval_set &b) { + assert(&a != this); + assert(&b != this); + clear(); + + typename map::const_iterator pa = a.m.begin(); + typename map::const_iterator pb = b.m.begin(); while (pa != a.m.end() && pb != b.m.end()) { // passing? @@ -171,42 +207,93 @@ class interval_set { T end = MIN(pa->first+pa->second, pb->first+pb->second); assert(end > start); insert(start, end-start); - pa++; - pb++; + if (pa->first+pa->second > pb->first+pb->second) + pb++; + else + pa++; } } - void union_of(interval_set &a, interval_set &b) { - typename map::iterator pa = a.m.begin(); - typename map::iterator pb = b.m.begin(); + void union_of(const interval_set &a, const interval_set &b) { + assert(&a != this); + assert(&b != this); + clear(); + + //cout << "union_of" << endl; + + // a + m = a.m; + + // - (a*b) + interval_set ab; + ab.intersection_of(a, b); + subtract(ab); + + // + b + insert(b); + return; + + + /* i think this is correct? + typename map::const_iterator pa = a.m.begin(); + typename map::const_iterator pb = b.m.begin(); + T upto = 0; while (pa != a.m.end() || pb != b.m.end()) { - // passing? + // passed? + if (pa != a.m.end() && pa->first + pa->second <= upto) + { pa++; continue; } + if (pb != b.m.end() && pb->first + pb->second <= upto) + { pb++; continue; } + + // extending? + if (pa != a.m.end() && pa->first <= upto && pa->first+pa->second > upto) { + insert( upto, pa->first+pa->second - upto ); + upto = pa->first+pa->second; + pa++; + continue; + } + if (pb != b.m.end() && pb->first <= upto && pb->first+pb->second > upto) { + insert( upto, pb->first+pb->second - upto ); + upto = pb->first+pb->second; + pb++; + continue; + } + + // gap! + + // non-overlapping? if (pb == b.m.end() || (pa != a.m.end() && pa->first + pa->second <= pb->first)) { insert(pa->first, pa->second); + upto = pa->first + pa->second; pa++; continue; } if (pa == a.m.end() || (pb != b.m.end() && pb->first + pb->second <= pa->first)) { insert(pb->first, pb->second); + upto = pb->first + pb->second; pb++; continue; } - T start = MIN(pa->first, pb->first); - T end = MAX(pa->first+pa->second, pb->first+pb->second); - insert(start, end-start); + + // overlapping! + const T start = MIN(pa->first, pb->first); + upto = MAX(pa->first+pa->second, pb->first+pb->second); + insert(start, upto-start); pa++; pb++; } + */ + } - void union_of(interval_set &a) { - interval_set b; - b.m.swap(m); + void union_of(const interval_set &b) { + interval_set a; + a.m.swap(m); union_of(a, b); } - bool subset_of(interval_set &big) { - for (typename map::iterator i = m.begin(); + bool subset_of(const interval_set &big) const { + for (typename map::const_iterator i = m.begin(); i != m.end(); i++) if (!big.contains(i->first, i->second)) return false; @@ -216,9 +303,9 @@ class interval_set { }; template -inline ostream& operator<<(ostream& out, interval_set &s) { +inline ostream& operator<<(ostream& out, const interval_set &s) { out << "["; - for (typename map::iterator i = s.m.begin(); + for (typename map::const_iterator i = s.m.begin(); i != s.m.end(); i++) { if (i != s.m.begin()) out << ","; -- 2.39.5