From 74f13f4307f6e8ac08457daac20854c9dddbf03d Mon Sep 17 00:00:00 2001 From: sageweil Date: Fri, 24 Aug 2007 17:29:15 +0000 Subject: [PATCH] ebofs tuning, locked debug output git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1675 29311d96-e01e-0410-9327-a35deaab8ce9 --- trunk/ceph/Makefile | 2 +- trunk/ceph/config.cc | 26 +- trunk/ceph/config.h | 29 +- trunk/ceph/ebofs/Allocator.cc | 89 +++--- trunk/ceph/ebofs/Allocator.h | 5 +- trunk/ceph/ebofs/BlockDevice.cc | 143 +++++----- trunk/ceph/ebofs/BufferCache.cc | 121 ++++----- trunk/ceph/ebofs/Ebofs.cc | 464 ++++++++++++++++---------------- trunk/ceph/ebofs/FileJournal.cc | 82 +++--- trunk/ceph/ebofs/mkfs.ebofs.cc | 25 ++ trunk/ceph/include/Context.h | 2 +- trunk/ceph/mds/journal.cc | 26 +- 12 files changed, 531 insertions(+), 483 deletions(-) diff --git a/trunk/ceph/Makefile b/trunk/ceph/Makefile index ccc2ef1fd1fa7..0d06187988377 100644 --- a/trunk/ceph/Makefile +++ b/trunk/ceph/Makefile @@ -16,7 +16,7 @@ EXTRA_CFLAGS = -I${HOME}/include -L${HOME}/lib # base -CFLAGS = -pg -g -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE ${EXTRA_CFLAGS} +CFLAGS = -O3 -g -Wall -I. -D_FILE_OFFSET_BITS=64 -D_REENTRANT -D_THREAD_SAFE ${EXTRA_CFLAGS} LDINC = ld -i -o CC = g++ LIBS = -pthread diff --git a/trunk/ceph/config.cc b/trunk/ceph/config.cc index 6241fa694494d..8620c97ec5eaa 100644 --- a/trunk/ceph/config.cc +++ b/trunk/ceph/config.cc @@ -187,7 +187,6 @@ md_config_t g_conf = { mds_log_read_inc: 1<<20, mds_log_pad_entry: 128,//256,//64, mds_log_flush_on_shutdown: true, - //mds_log_subtree_map_interval: 128*1024, // frequency (in bytes) of EImportMap in log mds_log_eopen_size: 100, // # open inodes per log entry mds_bal_sample_interval: 5.0, // every 5 seconds @@ -227,6 +226,8 @@ md_config_t g_conf = { mds_dump_cache_on_map: false, mds_dump_cache_after_rejoin: true, + mds_hack_log_expire_for_better_stats: false, + // --- osd --- osd_rep: OSD_REP_PRIMARY, @@ -253,9 +254,12 @@ md_config_t g_conf = { osd_replay_window: 5, osd_max_pull: 2, osd_pad_pg_log: false, + + osd_hack_fast_startup: false, // this breaks localized pgs. + // --- fakestore --- - fakestore_fake_sync: .5, // seconds + fakestore_fake_sync: .2, // seconds fakestore_fsync: false,//true, fakestore_writesync: false, fakestore_syncthreads: 4, @@ -267,25 +271,25 @@ md_config_t g_conf = { ebofs: 1, ebofs_cloneable: false, ebofs_verify: false, - ebofs_commit_ms: 2000, // 0 = no forced commit timeout (for debugging/tracing) - ebofs_idle_commit_ms: 20, // 0 = no idle detection. use this -or- bdev_idle_kick_after_ms + ebofs_commit_ms: 500, // 0 = no forced commit timeout (for debugging/tracing) + ebofs_idle_commit_ms: 0, // 0 = no idle detection. UGLY HACK. use bdev_idle_kick_after_ms instead. ebofs_oc_size: 10000, // onode cache ebofs_cc_size: 10000, // cnode cache ebofs_bc_size: (80 *256), // 4k blocks, *256 for MB ebofs_bc_max_dirty: (60 *256), // before write() will block ebofs_max_prefetch: 1000, // 4k blocks - ebofs_realloc: true, + ebofs_realloc: false, // hrm, this can cause bad fragmentation, don't use! ebofs_abp_zero: false, // zero newly allocated buffers (may shut up valgrind) ebofs_abp_max_alloc: 4096*16, // max size of new buffers (larger -> more memory fragmentation) // --- block device --- bdev_lock: true, - bdev_iothreads: 1, // number of ios to queue with kernel - bdev_idle_kick_after_ms: 0,//100, // ms ** FIXME ** this seems to break things, not sure why yet ** + bdev_iothreads: 2, // number of ios to queue with kernel + bdev_idle_kick_after_ms: 100, // ms ** FIXME ** this seems to break things, not sure why yet ** bdev_el_fw_max_ms: 10000, // restart elevator at least once every 1000 ms bdev_el_bw_max_ms: 3000, // restart elevator at least once every 300 ms - bdev_el_bidir: true, // bidirectional elevator? + bdev_el_bidir: false, // bidirectional elevator? bdev_iov_max: 512, // max # iov's to collect into a single readv()/writev() call bdev_debug_check_io_overlap: true, // [DEBUG] check for any pending io overlaps bdev_fake_mb: 0, @@ -692,6 +696,9 @@ void parse_config_options(std::vector& args) g_conf.mds_thrash_fragments = atoi(args[++i]); else if (strcmp(args[i], "--mds_dump_cache_on_map") == 0) g_conf.mds_dump_cache_on_map = true; + + else if (strcmp(args[i], "--mds_hack_log_expire_for_better_stats") == 0) + g_conf.mds_hack_log_expire_for_better_stats = atoi(args[++i]); else if (strcmp(args[i], "--client_use_random_mds") == 0) g_conf.client_use_random_mds = true; @@ -808,6 +815,9 @@ void parse_config_options(std::vector& args) else if (strcmp(args[i], "--osd_pad_pg_log") == 0) g_conf.osd_pad_pg_log = atoi(args[++i]); + else if (strcmp(args[i], "--osd_hack_fast_startup") == 0) + g_conf.osd_hack_fast_startup = atoi(args[++i]); + else if (strcmp(args[i], "--bdev_lock") == 0) g_conf.bdev_lock = atoi(args[++i]); diff --git a/trunk/ceph/config.h b/trunk/ceph/config.h index 5e58aeb07c399..30dfab203fcd4 100644 --- a/trunk/ceph/config.h +++ b/trunk/ceph/config.h @@ -186,7 +186,6 @@ struct md_config_t { int mds_log_read_inc; int mds_log_pad_entry; bool mds_log_flush_on_shutdown; - //off_t mds_log_subtree_map_interval; int mds_log_eopen_size; float mds_bal_sample_interval; @@ -226,6 +225,8 @@ struct md_config_t { bool mds_dump_cache_on_map; bool mds_dump_cache_after_rejoin; + bool mds_hack_log_expire_for_better_stats; + // osd int osd_rep; @@ -252,6 +253,8 @@ struct md_config_t { int osd_max_pull; bool osd_pad_pg_log; + bool osd_hack_fast_startup; + double fakestore_fake_sync; bool fakestore_fsync; bool fakestore_writesync; @@ -344,30 +347,6 @@ extern md_config_t g_debug_after_conf; #define pdout(x,p) if ((x) <= (p)) std::cout -/** - * for cleaner output, bracket each line with - * dbeginl (in the dout macro) and dendl (in place of endl). - */ -extern Mutex _dout_lock; -struct _dbeginl_t { - _dbeginl_t(int) {} -}; -struct _dendl_t { - _dendl_t(int) {} -}; -static const _dbeginl_t dbeginl = 0; -static const _dendl_t dendl = 0; - -inline ostream& operator<<(ostream& out, _dbeginl_t) { - _dout_lock.Lock(); - return out; -} -inline ostream& operator<<(ostream& out, _dendl_t) { - out << endl; - _dout_lock.Unlock(); - return out; -} - /** * command line / environment argument parsing diff --git a/trunk/ceph/ebofs/Allocator.cc b/trunk/ceph/ebofs/Allocator.cc index a9c79f01a315a..7d6ed57d05acf 100644 --- a/trunk/ceph/ebofs/Allocator.cc +++ b/trunk/ceph/ebofs/Allocator.cc @@ -17,9 +17,10 @@ #include "Allocator.h" #include "Ebofs.h" +#include "debug.h" #undef dout -#define dout(x) if (x <= g_conf.debug_ebofs) cout << g_clock.now() << " ebofs(" << fs->dev.get_device_name() << ").allocator." +#define dout(x) if (x <= g_conf.debug_ebofs) cout << dbeginl << g_clock.now() << " ebofs(" << fs->dev.get_device_name() << ").allocator." void Allocator::dump_freelist() @@ -32,35 +33,35 @@ void Allocator::dump_freelist() Table *tab; if (b < EBOFS_NUM_FREE_BUCKETS) { tab = fs->free_tab[b]; - dout(0) << "dump bucket " << b << " " << tab->get_num_keys() << endl; + dout(0) << "dump bucket " << b << " " << tab->get_num_keys() << dendl; } else { tab = fs->limbo_tab; - dout(0) << "dump limbo " << tab->get_num_keys() << endl;; + dout(0) << "dump limbo " << tab->get_num_keys() << dendl;; } if (tab->get_num_keys() > 0) { Table::Cursor cursor(tab); assert(tab->find(0, cursor) >= 0); while (1) { - dout(0) << "dump ex " << cursor.current().key << "~" << cursor.current().value << endl; + dout(0) << "dump ex " << cursor.current().key << "~" << cursor.current().value << dendl; assert(cursor.current().value > 0); if (b < EBOFS_NUM_FREE_BUCKETS) n += cursor.current().value; if (free.contains( cursor.current().key, cursor.current().value )) - dout(0) << "dump bad " << cursor.current().key << "~" << cursor.current().value << endl; + dout(0) << "dump bad " << cursor.current().key << "~" << cursor.current().value << dendl; assert(!free.contains( cursor.current().key, cursor.current().value )); free.insert( cursor.current().key, cursor.current().value ); if (cursor.move_right() <= 0) break; } } else { - //cout << " empty" << endl; + //cout << " empty" << dendl; } } assert(n == fs->free_blocks); - dout(0) << "dump combined freelist is " << free << endl; + dout(0) << "dump combined freelist is " << free << dendl; // alloc_tab @@ -70,7 +71,7 @@ void Allocator::dump_freelist() while (1) { dout(0) << "alloc ex " << cursor.current().key << "~" << cursor.current().value.first << " ref " << cursor.current().value.second - << endl; + << dendl; assert(cursor.current().value.first > 0); if (cursor.move_right() <= 0) break; @@ -173,7 +174,7 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) } } - dout(20) << "allocate " << ex << " near " << near << endl; + dout(20) << "allocate " << ex << " near " << near << dendl; last_pos = ex.end(); //dump_freelist(); if (g_conf.ebofs_cloneable) @@ -196,7 +197,7 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) fs->free_tab[bucket]->remove(ex.start); fs->free_blocks -= ex.length; last_pos = ex.end(); - dout(20) << "allocate partial " << ex << " (wanted " << num << ") near " << near << endl; + dout(20) << "allocate partial " << ex << " (wanted " << num << ") near " << near << dendl; //dump_freelist(); if (g_conf.ebofs_cloneable) alloc_inc(ex); @@ -204,7 +205,7 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) } } - dout(1) << "allocate failed, fs completely full! " << fs->free_blocks << endl; + dout(1) << "allocate failed, fs completely full! " << fs->free_blocks << dendl; assert(0); //dump_freelist(); return -1; @@ -212,8 +213,8 @@ int Allocator::allocate(Extent& ex, block_t num, block_t near) int Allocator::_release_into_limbo(Extent& ex) { - dout(10) << "_release_into_limbo " << ex << endl; - dout(10) << "limbo is " << limbo << endl; + dout(10) << "_release_into_limbo " << ex << dendl; + dout(10) << "limbo is " << limbo << dendl; assert(ex.length > 0); limbo.insert(ex.start, ex.length); fs->limbo_blocks += ex.length; @@ -231,7 +232,7 @@ int Allocator::release(Extent& ex) int Allocator::commit_limbo() { - dout(20) << "commit_limbo" << endl; + dout(20) << "commit_limbo" << dendl; for (map::iterator i = limbo.m.begin(); i != limbo.m.end(); i++) { @@ -252,7 +253,7 @@ int Allocator::release_limbo() fs->limbo_tab->find(0, cursor); while (1) { Extent ex(cursor.current().key, cursor.current().value); - dout(20) << "release_limbo ex " << ex << endl; + dout(20) << "release_limbo ex " << ex << dendl; fs->limbo_blocks -= ex.length; _release_merge(ex); @@ -279,11 +280,11 @@ int Allocator::_alloc_loner_inc(Extent& ex) v.second++; dout(10) << "_alloc_loner_inc " << ex << " " << (v.second-1) << " -> " << v.second - << endl; + << dendl; } else { // insert it, @1 fs->alloc_tab->insert(ex.start, pair(ex.length,1)); - dout(10) << "_alloc_loner_inc " << ex << " 0 -> 1" << endl; + dout(10) << "_alloc_loner_inc " << ex << " 0 -> 1" << dendl; } return 0; } @@ -296,14 +297,14 @@ int Allocator::_alloc_loner_dec(Extent& ex) == Table >::Cursor::MATCH) { assert(cursor.current().value.first == ex.length); if (cursor.current().value.second == 1) { - dout(10) << "_alloc_loner_dec " << ex << " 1 -> 0" << endl; + dout(10) << "_alloc_loner_dec " << ex << " 1 -> 0" << dendl; fs->alloc_tab->remove( cursor.current().key ); } else { pair& v = cursor.dirty_current_value(); --v.second; dout(10) << "_alloc_loner_dec " << ex << " " << (v.second+1) << " -> " << v.second - << endl; + << dendl; } } else { assert(0); @@ -315,13 +316,13 @@ int Allocator::_alloc_loner_dec(Extent& ex) int Allocator::alloc_inc(Extent ex) { - dout(10) << "alloc_inc " << ex << endl; + dout(10) << "alloc_inc " << ex << dendl; // empty table? if (fs->alloc_tab->get_num_keys() == 0) { // easy. fs->alloc_tab->insert(ex.start, pair(ex.length,1)); - dout(10) << "alloc_inc + " << ex << " 0 -> 1 (first entry)" << endl; + dout(10) << "alloc_inc + " << ex << " 0 -> 1 (first entry)" << dendl; return 0; } @@ -332,14 +333,14 @@ int Allocator::alloc_inc(Extent ex) if (r == Table >::Cursor::OOB || cursor.current().key > ex.start) { r = cursor.move_left(); - dout(10) << "alloc_inc move_left r = " << r << endl; + dout(10) << "alloc_inc move_left r = " << r << dendl; } while (1) { dout(10) << "alloc_inc loop at " << cursor.current().key << "~" << cursor.current().value.first << " ref " << cursor.current().value.second - << endl; + << dendl; // too far left? if (cursor.current().key < ex.start && @@ -352,7 +353,7 @@ int Allocator::alloc_inc(Extent ex) // no overlap. r = cursor.move_right(); - dout(10) << "alloc_inc move_right r = " << r << endl; + dout(10) << "alloc_inc move_right r = " << r << dendl; // at end? if (r <= 0) { @@ -362,11 +363,11 @@ int Allocator::alloc_inc(Extent ex) cursor.move_left(); pair &v = cursor.dirty_current_value(); v.first += ex.length; // yay! - dout(10) << "alloc_inc + " << ex << " 0 -> 1 (adjust at end)" << endl; + dout(10) << "alloc_inc + " << ex << " 0 -> 1 (adjust at end)" << dendl; } else { // insert at end, finish. int r = fs->alloc_tab->insert(ex.start, pair(ex.length,1)); - dout(10) << "alloc_inc + " << ex << " 0 -> 1 (at end) .. r = " << r << endl; + dout(10) << "alloc_inc + " << ex << " 0 -> 1 (at end) .. r = " << r << dendl; //dump_freelist(); } return 0; @@ -380,7 +381,7 @@ int Allocator::alloc_inc(Extent ex) block_t l = MIN(ex.length, cursor.current().key - ex.start); fs->alloc_tab->insert(ex.start, pair(l,1)); - dout(10) << "alloc_inc + " << ex.start << "~" << l << " 0 -> 1" << endl; + dout(10) << "alloc_inc + " << ex.start << "~" << l << " 0 -> 1" << dendl; ex.start += l; ex.length -= l; if (ex.length == 0) break; @@ -402,7 +403,7 @@ int Allocator::alloc_inc(Extent ex) dout(10) << "alloc_inc " << ex.start << "~" << l << " " << ref << " -> " << ref+1 - << " (right split)" << endl; + << " (right split)" << dendl; ex.start += l; ex.length -= l; @@ -425,7 +426,7 @@ int Allocator::alloc_inc(Extent ex) dout(10) << "alloc_inc " << ex << " " << ref << " -> " << ref+1 << " (double split finish)" - << endl; + << dendl; break; } @@ -442,7 +443,7 @@ int Allocator::alloc_inc(Extent ex) dout(10) << "alloc_inc " << ex.start << "~" << cursor.current().value.first << " " << cursor.current().value.second-1 << " -> " << cursor.current().value.second - << " (left split)" << endl; + << " (left split)" << dendl; ex.start += v.first; ex.length -= v.first; if (ex.length == 0) break; @@ -463,7 +464,7 @@ int Allocator::alloc_inc(Extent ex) dout(10) << "alloc_inc " << ex << " " << ref << " -> " << ref+1 << " (left split finish)" - << endl; + << dendl; break; } @@ -476,7 +477,7 @@ int Allocator::alloc_inc(Extent ex) int Allocator::alloc_dec(Extent ex) { - dout(10) << "alloc_dec " << ex << endl; + dout(10) << "alloc_dec " << ex << dendl; assert(fs->alloc_tab->get_num_keys() >= 0); @@ -484,12 +485,12 @@ int Allocator::alloc_dec(Extent ex) // try to move to left (to check for overlap) int r = fs->alloc_tab->find( ex.start, cursor ); - dout(10) << "alloc_dec find r = " << r << endl; + dout(10) << "alloc_dec find r = " << r << dendl; if (r == Table >::Cursor::OOB || cursor.current().key > ex.start) { r = cursor.move_left(); - dout(10) << "alloc_dec move_left r = " << r << endl; + dout(10) << "alloc_dec move_left r = " << r << dendl; // too far left? if (cursor.current().key < ex.start && @@ -505,7 +506,7 @@ int Allocator::alloc_dec(Extent ex) << "~" << cursor.current().value.first << " " << cursor.current().value.second << ", ex is " << ex - << endl; + << dendl; assert(cursor.current().key <= ex.start); // no gap allowed. @@ -521,14 +522,14 @@ int Allocator::alloc_dec(Extent ex) int ref = v.second; dout(10) << "alloc_dec s " << cursor.current().key << "~" << cursor.current().value.first << " " << ref - << " shortened left bit of single" << endl; + << " shortened left bit of single" << dendl; block_t l = end - ex.start; if (ref > 1) { fs->alloc_tab->insert(ex.start, pair(l, ref-1)); dout(10) << "alloc_dec . " << ex.start << "~" << l << " " << ref << " -> " << ref-1 - << endl; + << dendl; } else { Extent r(ex.start, l); _release_into_limbo(r); @@ -548,14 +549,14 @@ int Allocator::alloc_dec(Extent ex) int ref = v.second; dout(10) << "alloc_dec s " << cursor.current().key << "~" << cursor.current().value.first << " " << ref - << " shorted left bit of double split" << endl; + << " shorted left bit of double split" << dendl; if (ref > 1) { fs->alloc_tab->insert(ex.start, pair(ex.length, ref-1)); dout(10) << "alloc_inc s " << ex << " " << ref << " -> " << ref-1 << " reinserted middle bit of double split" - << endl; + << dendl; } else { _release_into_limbo(ex); } @@ -564,7 +565,7 @@ int Allocator::alloc_dec(Extent ex) fs->alloc_tab->insert(ex.end(), pair(rl, ref)); dout(10) << "alloc_dec s " << ex.end() << "~" << rl << " " << ref - << " reinserted right bit of double split" << endl; + << " reinserted right bit of double split" << dendl; break; } } @@ -580,7 +581,7 @@ int Allocator::alloc_dec(Extent ex) v.second--; dout(10) << "alloc_dec s " << ex.start << "~" << cursor.current().value.first << " " << cursor.current().value.second+1 << " -> " << cursor.current().value.second - << endl; + << dendl; ex.start += v.first; ex.length -= v.first; if (ex.length == 0) break; @@ -609,7 +610,7 @@ int Allocator::alloc_dec(Extent ex) v.second--; dout(10) << "alloc_inc . " << ex << " " << ref << " -> " << ref-1 - << endl; + << dendl; } else { _release_into_limbo(ex); cursor.remove(); @@ -617,7 +618,7 @@ int Allocator::alloc_dec(Extent ex) dout(10) << "alloc_dec s " << ex.end() << "~" << l << " " << ref - << " reinserted right bit of single split" << endl; + << " reinserted right bit of single split" << dendl; fs->alloc_tab->insert(ex.end(), pair(l, ref)); break; } @@ -649,7 +650,7 @@ int Allocator::_release_loner(Extent& ex) */ int Allocator::_release_merge(Extent& orig) { - dout(15) << "_release_merge " << orig << endl; + dout(15) << "_release_merge " << orig << dendl; assert(orig.length > 0); Extent newex = orig; diff --git a/trunk/ceph/ebofs/Allocator.h b/trunk/ceph/ebofs/Allocator.h index a7d7aebf75d00..c1898784d50a7 100644 --- a/trunk/ceph/ebofs/Allocator.h +++ b/trunk/ceph/ebofs/Allocator.h @@ -73,11 +73,10 @@ protected: int alloc_dec(Extent ex); - /*int unallocate(Extent& ex) { // skip limbo + int unallocate(Extent& ex) { // skip limbo return _release_merge(ex); } - */ - + int commit_limbo(); // limbo -> fs->limbo_tab int release_limbo(); // fs->limbo_tab -> free_tabs diff --git a/trunk/ceph/ebofs/BlockDevice.cc b/trunk/ceph/ebofs/BlockDevice.cc index 83de7b69efd0e..9537ded0234b0 100644 --- a/trunk/ceph/ebofs/BlockDevice.cc +++ b/trunk/ceph/ebofs/BlockDevice.cc @@ -38,6 +38,8 @@ #endif #endif +#include "debug.h" + /******************************************* * biovec @@ -61,9 +63,8 @@ inline ostream& operator<<(ostream& out, BlockDevice::biovec &bio) * ElevatorQueue */ -#undef dout -#define dout(x) if (x <= g_conf.debug_bdev) cout << g_clock.now() << " bdev(" << dev << ").elevatorq." -#define derr(x) if (x <= g_conf.debug_bdev) cerr << g_clock.now() << " bdev(" << dev << ").elevatorq." +#define dout(x) if (x <= g_conf.debug_bdev) cout << dbeginl << g_clock.now() << " bdev(" << dev << ").elevatorq." +#define derr(x) if (x <= g_conf.debug_bdev) cerr << dbeginl << g_clock.now() << " bdev(" << dev << ").elevatorq." int BlockDevice::ElevatorQueue::dequeue_io(list& biols, @@ -73,12 +74,12 @@ int BlockDevice::ElevatorQueue::dequeue_io(list& biols, // queue empty? assert(!io_map.empty()); - dout(20) << "dequeue_io el_pos " << el_pos << " dir " << el_dir_forward << endl; + dout(20) << "dequeue_io el_pos " << el_pos << " dir " << el_dir_forward << dendl; // find our position: i >= pos map::iterator i; - int tries = g_conf.bdev_el_bidir + 1; + int tries = 2; while (tries > 0) { if (el_dir_forward) { i = io_map.lower_bound(el_pos); @@ -95,7 +96,7 @@ int BlockDevice::ElevatorQueue::dequeue_io(list& biols, // reverse (or initial startup)? if (g_conf.bdev_el_bidir || !el_dir_forward) { - // dout(20) << "restart reversing" << endl; + // dout(20) << "restart reversing" << dendl; el_dir_forward = !el_dir_forward; } @@ -107,9 +108,9 @@ int BlockDevice::ElevatorQueue::dequeue_io(list& biols, el_stop = g_clock.now(); utime_t max(0, 1000*g_conf.bdev_el_fw_max_ms); // (s,us), convert ms -> us! el_stop += max; - // dout(20) << "restart forward sweep for " << max << endl; + // dout(20) << "restart forward sweep for " << max << dendl; } else { - // dout(20) << "restart fowrard sweep" << endl; + // dout(20) << "restart fowrard sweep" << dendl; } } else { // reverse @@ -119,9 +120,9 @@ int BlockDevice::ElevatorQueue::dequeue_io(list& biols, el_stop = g_clock.now(); utime_t max(0, 1000*g_conf.bdev_el_bw_max_ms); // (s,us), convert ms -> us! el_stop += max; - // dout(20) << "restart reverse sweep for " << max << endl; + // dout(20) << "restart reverse sweep for " << max << dendl; } else { - // dout(20) << "restart reverse sweep" << endl; + // dout(20) << "restart reverse sweep" << dendl; } } @@ -133,7 +134,7 @@ int BlockDevice::ElevatorQueue::dequeue_io(list& biols, // get some biovecs int num_bio = 0; - dout(20) << "dequeue_io starting with " << i->first << " " << *i->second << endl; + dout(20) << "dequeue_io starting with " << i->first << " " << *i->second << dendl; // merge contiguous ops char type = i->second->type; // read or write @@ -156,7 +157,7 @@ int BlockDevice::ElevatorQueue::dequeue_io(list& biols, // allowed? (not already submitted to kernel?) if (block_lock.intersects(bio->start, bio->length)) { // dout(20) << "dequeue_io " << bio->start << "~" << bio->length - // << " intersects block_lock " << block_lock << endl; + // << " intersects block_lock " << block_lock << dendl; break; // stop, or go with what we've got so far } @@ -169,10 +170,10 @@ int BlockDevice::ElevatorQueue::dequeue_io(list& biols, length += bio->length; if (el_dir_forward) { - //dout(20) << "dequeue_io fw dequeue io at " << el_pos << " " << *i->second << endl; + //dout(20) << "dequeue_io fw dequeue io at " << el_pos << " " << *i->second << dendl; biols.push_back(bio); // add at back } else { - // dout(20) << "dequeue_io bw dequeue io at " << el_pos << " " << *i->second << endl; + // dout(20) << "dequeue_io bw dequeue io at " << el_pos << " " << *i->second << dendl; biols.push_front(bio); // add at front } num_bio++; @@ -211,17 +212,17 @@ int BlockDevice::ElevatorQueue::dequeue_io(list& biols, * BarrierQueue */ #undef dout -#define dout(x) if (x <= g_conf.debug_bdev) cout << g_clock.now() << " bdev(" << dev << ").barrierq." +#define dout(x) if (x <= g_conf.debug_bdev) cout << dbeginl << g_clock.now() << " bdev(" << dev << ").barrierq." void BlockDevice::BarrierQueue::barrier() { if (!qls.empty() && qls.front()->empty()) { assert(qls.size() == 1); - dout(10) << "barrier not adding new queue, front is empty" << endl; + dout(10) << "barrier not adding new queue, front is empty" << dendl; } else { qls.push_back(new ElevatorQueue(bdev, dev)); dout(10) << "barrier adding new elevator queue (now " << qls.size() << "), front queue has " - << qls.front()->size() << " ios left" << endl; + << qls.front()->size() << " ios left" << dendl; } } @@ -234,7 +235,7 @@ bool BlockDevice::BarrierQueue::bump() qls.front() != qls.back()) { delete qls.front(); qls.pop_front(); - dout(10) << "dequeue_io front empty, moving to next queue (" << qls.front()->size() << ")" << endl; + dout(10) << "dequeue_io front empty, moving to next queue (" << qls.front()->size() << ")" << dendl; return true; } @@ -259,7 +260,7 @@ int BlockDevice::BarrierQueue::dequeue_io(list& biols, */ #undef dout -#define dout(x) if (x <= g_conf.debug_bdev) cout << g_clock.now() << " bdev(" << dev << ")." +#define dout(x) if (x <= g_conf.debug_bdev) cout << dbeginl << g_clock.now() << " bdev(" << dev << ")." @@ -278,7 +279,7 @@ block_t BlockDevice::get_num_blocks() dout(10) << "get_num_blocks ioctl BLKGETSIZE64 reports " << num_blocks << " 4k blocks, " << bytes << " bytes" - << endl; + << dendl; #else // hrm, try the 32 bit ioctl? unsigned long sectors = 0; @@ -287,25 +288,25 @@ block_t BlockDevice::get_num_blocks() bytes = sectors*512ULL; if (r == 0) { dout(10) << "get_num_blocks ioctl BLKGETSIZE reports " << sectors << " sectors, " - << num_blocks << " 4k blocks, " << bytes << " bytes" << endl; + << num_blocks << " 4k blocks, " << bytes << " bytes" << dendl; #endif } else { // hmm, try stat! - dout(10) << "get_num_blocks ioctl(2) failed with " << errno << " " << strerror(errno) << ", using stat(2)" << endl; + dout(10) << "get_num_blocks ioctl(2) failed with " << errno << " " << strerror(errno) << ", using stat(2)" << dendl; struct stat st; fstat(fd, &st); uint64_t bytes = st.st_size; num_blocks = bytes / EBOFS_BLOCK_SIZE; - dout(10) << "get_num_blocks stat reports " << num_blocks << " 4k blocks, " << bytes << " bytes" << endl; + dout(10) << "get_num_blocks stat reports " << num_blocks << " 4k blocks, " << bytes << " bytes" << dendl; } if (g_conf.bdev_fake_mb) { num_blocks = g_conf.bdev_fake_mb * 256; - dout(0) << "faking dev size " << g_conf.bdev_fake_mb << " mb" << endl; + dout(0) << "faking dev size " << g_conf.bdev_fake_mb << " mb" << dendl; } if (g_conf.bdev_fake_max_mb && num_blocks > (block_t)g_conf.bdev_fake_max_mb * 256ULL) { - dout(0) << "faking dev size " << g_conf.bdev_fake_max_mb << " mb" << endl; + dout(0) << "faking dev size " << g_conf.bdev_fake_max_mb << " mb" << dendl; num_blocks = g_conf.bdev_fake_max_mb * 256; } @@ -325,7 +326,7 @@ void* BlockDevice::io_thread_entry() int whoami = io_threads_started++; io_threads_running++; assert(io_threads_running <= g_conf.bdev_iothreads); - dout(10) << "io_thread" << whoami << " start, " << io_threads_running << " now running" << endl; + dout(10) << "io_thread" << whoami << " start, " << io_threads_running << " now running" << dendl; // get my own fd (and file position pointer) int fd = open_fd(); @@ -339,7 +340,7 @@ void* BlockDevice::io_thread_entry() // sleep do_sleep = true; } else { - dout(20) << "io_thread" << whoami << " going" << endl; + dout(20) << "io_thread" << whoami << " going" << dendl; block_t start, length; list biols; @@ -347,7 +348,7 @@ void* BlockDevice::io_thread_entry() if (n == 0) { // failed to dequeue a do-able op, sleep for now - dout(20) << "io_thread" << whoami << " couldn't dequeue doable op, sleeping" << endl; + dout(20) << "io_thread" << whoami << " couldn't dequeue doable op, sleeping" << dendl; assert(io_threads_running > 1); // there must be someone else, if we couldn't dequeue something doable. do_sleep = true; } @@ -376,28 +377,40 @@ void* BlockDevice::io_thread_entry() // sleep io_threads_running--; - dout(20) << "io_thread" << whoami << " sleeping, " << io_threads_running << " threads now running," - << " queue has " << root_queue.size() << endl; + dout(20) << "io_thread" << whoami << " sleeping, " + << io_threads_running << " threads now running," + << " queue has " << root_queue.size() + << dendl; + // first wait for signal | timeout? if (g_conf.bdev_idle_kick_after_ms > 0 && - io_threads_running == 0 && + io_threads_running == 0 && // only the last thread asleep needs to kick. idle_kicker) { - // first wait for signal | timeout - io_wakeup.WaitInterval(lock, utime_t(0, g_conf.bdev_idle_kick_after_ms*1000)); - - // should we still be sleeping? (did we get woken up, or did timer expire? - if (root_queue.empty() && io_threads_running == 0) { - idle_kicker->kick(); // kick - io_wakeup.Wait(lock); // and wait - } + dout(20) << "io_thread" << whoami << " doing short wait, to see if i stay idle" << dendl; + if (io_wakeup.WaitInterval(lock, utime_t(0, g_conf.bdev_idle_kick_after_ms*1000)) == ETIMEDOUT) { + // timer expired. kick ebofs. + dout(20) << "io_thread" << whoami << " timeout expired, kicking ebofs" << dendl; + + lock.Unlock(); + idle_kicker->kick(); // kick + lock.Lock(); + } + + // should i _still_ be sleeping? + if (root_queue.empty() && io_threads_running == 0) { + dout(20) << "io_thread" << whoami << " did the kick, now waiting for signal" << dendl; + io_wakeup.Wait(lock); // and wait (if condition still holds) + } else { + dout(20) << "io_thread" << whoami << " not waiting, must not be idle anymore?" << dendl; + } } else { - // normal, just wait. - io_wakeup.Wait(lock); + // just sleep, like normal. + io_wakeup.Wait(lock); // and wait (if condition still holds) } io_threads_running++; assert(io_threads_running <= g_conf.bdev_iothreads); - dout(20) << "io_thread" << whoami << " woke up, " << io_threads_running << " threads now running" << endl; + dout(20) << "io_thread" << whoami << " woke up, " << io_threads_running << " threads now running" << dendl; } } @@ -407,7 +420,7 @@ void* BlockDevice::io_thread_entry() lock.Unlock(); - dout(10) << "io_thread" << whoami << " finish" << endl; + dout(10) << "io_thread" << whoami << " finish" << dendl; return 0; } @@ -440,14 +453,14 @@ void BlockDevice::do_io(int fd, list& biols) // do it dout(20) << "do_io start " << (type==biovec::IO_WRITE?"write":"read") << " " << start << "~" << length - << " " << numbio << " bits" << endl; + << " " << numbio << " bits" << dendl; if (type == biovec::IO_WRITE) { r = _write(fd, start, length, bl); } else if (type == biovec::IO_READ) { r = _read(fd, start, length, bl); } else assert(0); dout(20) << "do_io finish " << (type==biovec::IO_WRITE?"write":"read") - << " " << start << "~" << length << endl; + << " " << start << "~" << length << dendl; // set rval for (p = biols.begin(); p != biols.end(); p++) @@ -494,14 +507,14 @@ void BlockDevice::finish_io(biovec *bio) void* BlockDevice::complete_thread_entry() { complete_lock.Lock(); - dout(10) << "complete_thread start" << endl; + dout(10) << "complete_thread start" << dendl; while (!io_stop) { while (!complete_queue.empty()) { list ls; ls.swap(complete_queue); - dout(10) << "complete_thread grabbed " << complete_queue_len << " biovecs" << endl; + dout(10) << "complete_thread grabbed " << complete_queue_len << " biovecs" << dendl; complete_queue_len = 0; complete_lock.Unlock(); @@ -511,7 +524,7 @@ void* BlockDevice::complete_thread_entry() p != ls.end(); p++) { biovec *bio = *p; - dout(20) << "complete_thread finishing " << *bio << endl; + dout(20) << "complete_thread finishing " << *bio << dendl; finish_io(bio); } @@ -529,11 +542,11 @@ void* BlockDevice::complete_thread_entry() } */ - dout(25) << "complete_thread sleeping" << endl; + dout(25) << "complete_thread sleeping" << dendl; complete_wakeup.Wait(complete_lock); } - dout(10) << "complete_thread finish" << endl; + dout(10) << "complete_thread finish" << dendl; complete_lock.Unlock(); return 0; } @@ -546,7 +559,7 @@ void* BlockDevice::complete_thread_entry() void BlockDevice::_submit_io(biovec *b) { // NOTE: lock must be held - dout(15) << "_submit_io " << *b << endl; + dout(15) << "_submit_io " << *b << dendl; // wake up io_thread(s)? if ((int)root_queue.size() == io_threads_running) @@ -568,9 +581,9 @@ void BlockDevice::_submit_io(biovec *b) (p != io_queue.begin() && (p--, p->second->start + p->second->length > b->start))) { dout(1) << "_submit_io new io " << *b - << " overlaps with existing " << *p->second << endl; + << " overlaps with existing " << *p->second << dendl; cerr << "_submit_io new io " << *b - << " overlaps with existing " << *p->second << endl; + << " overlaps with existing " << *p->second << dendl; } } */ @@ -582,10 +595,10 @@ int BlockDevice::_cancel_io(biovec *bio) // NOTE: lock must be held if (bio->in_queue == 0) { - dout(15) << "_cancel_io " << *bio << " FAILED" << endl; + dout(15) << "_cancel_io " << *bio << " FAILED" << dendl; return -1; } else { - dout(15) << "_cancel_io " << *bio << endl; + dout(15) << "_cancel_io " << *bio << dendl; bio->in_queue->cancel_io(bio); if (root_queue.bump()) io_wakeup.SignalAll(); // something happened! @@ -599,7 +612,7 @@ int BlockDevice::_cancel_io(biovec *bio) int BlockDevice::_read(int fd, block_t bno, unsigned num, bufferlist& bl) { - dout(10) << "_read " << bno << "~" << num << endl; + dout(10) << "_read " << bno << "~" << num << dendl; assert(fd > 0); @@ -634,7 +647,7 @@ int BlockDevice::_read(int fd, block_t bno, unsigned num, bufferlist& bl) int BlockDevice::_write(int fd, unsigned bno, unsigned num, bufferlist& bl) { - dout(10) << "_write " << bno << "~" << num << endl; + dout(10) << "_write " << bno << "~" << num << dendl; assert(fd > 0); @@ -671,8 +684,8 @@ int BlockDevice::_write(int fd, unsigned bno, unsigned num, bufferlist& bl) if (r < 0) { dout(1) << "couldn't write bno " << bno << " num " << num << " (" << len << " bytes) in " << n << " iovs, r=" << r - << " errno " << errno << " " << strerror(errno) << endl; - dout(1) << "bl is " << bl << endl; + << " errno " << errno << " " << strerror(errno) << dendl; + dout(1) << "bl is " << bl << dendl; assert(0); } else { assert(r == (int)len); @@ -703,7 +716,7 @@ int BlockDevice::open(kicker *idle) // open? fd = open_fd(); if (fd < 0) { - dout(1) << "open failed, r = " << fd << " " << strerror(errno) << endl; + dout(1) << "open failed, r = " << fd << " " << strerror(errno) << dendl; fd = 0; return -1; } @@ -712,7 +725,7 @@ int BlockDevice::open(kicker *idle) if (g_conf.bdev_lock) { int r = ::flock(fd, LOCK_EX|LOCK_NB); if (r < 0) { - derr(1) << "open " << dev << " failed to get LOCK_EX" << endl; + derr(1) << "open " << dev << " failed to get LOCK_EX" << dendl; assert(0); return -1; } @@ -721,10 +734,10 @@ int BlockDevice::open(kicker *idle) // figure size block_t b = get_num_blocks(); if (!b) { - dout(0) << "open can't determine size of device" << endl; + dout(0) << "open can't determine size of device" << dendl; assert(0); } - dout(2) << "open " << b << " blocks, " << b*4096 << " bytes" << endl; + dout(2) << "open " << b << " blocks, " << b*4096 << " bytes" << dendl; // start thread io_threads_started = 0; @@ -749,7 +762,7 @@ int BlockDevice::close() idle_kicker = 0; // shut down io thread - dout(10) << "close stopping io+complete threads" << endl; + dout(10) << "close stopping io+complete threads" << dendl; lock.Lock(); complete_lock.Lock(); io_stop = true; @@ -769,7 +782,7 @@ int BlockDevice::close() io_stop = false; // in case we start again - dout(2) << "close " << endl; + dout(2) << "close " << dendl; if (g_conf.bdev_lock) ::flock(fd, LOCK_UN); diff --git a/trunk/ceph/ebofs/BufferCache.cc b/trunk/ceph/ebofs/BufferCache.cc index d110d066e6993..ee3cf52cd379a 100644 --- a/trunk/ceph/ebofs/BufferCache.cc +++ b/trunk/ceph/ebofs/BufferCache.cc @@ -17,12 +17,13 @@ #include "BufferCache.h" #include "Onode.h" +#include "debug.h" /*********** BufferHead **************/ #undef dout -#define dout(x) if (x <= g_conf.debug_ebofs) cout << g_clock.now() << " ebofs.bh." +#define dout(x) if (x <= g_conf.debug_ebofs) cout << dbeginl << g_clock.now() << " ebofs.bh." @@ -33,7 +34,7 @@ #undef dout -#define dout(x) if (x <= g_conf.debug_ebofs) cout << g_clock.now() << " ebofs.oc." +#define dout(x) if (x <= g_conf.debug_ebofs) cout << dbeginl << g_clock.now() << " ebofs.oc." @@ -41,12 +42,12 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist { list waiters; - dout(10) << "rx_finish " << start << "~" << length << endl; + dout(10) << "rx_finish " << start << "~" << length << dendl; for (map::iterator p = data.lower_bound(start); p != data.end(); p++) { BufferHead *bh = p->second; - dout(10) << "rx_finish ?" << *bh << endl; + dout(10) << "rx_finish ?" << *bh << dendl; assert(p->first == bh->start()); // past? @@ -56,7 +57,7 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist assert(p->first >= start); assert(bh->end() <= start+length); - dout(10) << "rx_finish !" << *bh << endl; + dout(10) << "rx_finish !" << *bh << dendl; if (bh->rx_ioh == ioh) bh->rx_ioh = 0; @@ -65,12 +66,12 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist assert(bh->get_version() == 0); assert(bh->end() <= start+length); assert(bh->start() >= start); - dout(10) << "rx_finish rx -> clean on " << *bh << endl; + dout(10) << "rx_finish rx -> clean on " << *bh << dendl; bh->data.substr_of(bl, (bh->start()-start)*EBOFS_BLOCK_SIZE, bh->length()*EBOFS_BLOCK_SIZE); bc->mark_clean(bh); } else if (bh->is_partial()) { - dout(10) << "rx_finish partial -> tx on " << *bh << endl; + dout(10) << "rx_finish partial -> tx on " << *bh << dendl; if (1) { // double-check what block i am @@ -101,7 +102,7 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist bh->partial.clear(); } else { - dout(10) << "rx_finish ignoring status on (dirty|tx|clean) " << *bh << endl; + dout(10) << "rx_finish ignoring status on (dirty|tx|clean) " << *bh << dendl; assert(bh->is_dirty() || // was overwritten bh->is_tx() || // was overwritten and queued bh->is_clean()); // was overwritten, queued, _and_ flushed to disk @@ -124,12 +125,12 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist void ObjectCache::tx_finish(ioh_t ioh, block_t start, block_t length, version_t version, version_t epoch) { - dout(10) << "tx_finish " << start << "~" << length << " v" << version << endl; + dout(10) << "tx_finish " << start << "~" << length << " v" << version << dendl; for (map::iterator p = data.lower_bound(start); p != data.end(); p++) { BufferHead *bh = p->second; - dout(30) << "tx_finish ?bh " << *bh << endl; + dout(30) << "tx_finish ?bh " << *bh << dendl; assert(p->first == bh->start()); // past? @@ -139,19 +140,19 @@ void ObjectCache::tx_finish(ioh_t ioh, block_t start, block_t length, bh->tx_ioh = 0; if (!bh->is_tx()) { - dout(10) << "tx_finish bh not marked tx, skipping" << endl; + dout(10) << "tx_finish bh not marked tx, skipping" << dendl; continue; } assert(bh->is_tx()); if (version == bh->version) { - dout(10) << "tx_finish tx -> clean on " << *bh << endl; + dout(10) << "tx_finish tx -> clean on " << *bh << dendl; assert(bh->end() <= start+length); bh->set_last_flushed(version); bc->mark_clean(bh); } else { dout(10) << "tx_finish leaving tx, " << bh->version << " > " << version - << " on " << *bh << endl; + << " on " << *bh << dendl; assert(bh->version > version); } } @@ -252,18 +253,18 @@ int ObjectCache::try_map_read(block_t start, block_t len) if (e->is_clean() || e->is_dirty() || e->is_tx()) { - dout(20) << "try_map_read hit " << *e << endl; + dout(20) << "try_map_read hit " << *e << dendl; } else if (e->is_rx()) { - dout(20) << "try_map_read rx " << *e << endl; + dout(20) << "try_map_read rx " << *e << dendl; num_missing++; } else if (e->is_partial()) { - dout(-20) << "try_map_read partial " << *e << endl; + dout(-20) << "try_map_read partial " << *e << dendl; num_missing++; } else { - dout(0) << "try_map_read got unexpected " << *e << endl; + dout(0) << "try_map_read got unexpected " << *e << dendl; assert(0); } @@ -280,8 +281,8 @@ int ObjectCache::try_map_read(block_t start, block_t len) MIN(next-cur, left), // no prefetch exv); - dout(-20) << "try_map_read gap of " << p->first-cur << " blocks, " - << exv.size() << " extents" << endl; + dout(20) << "try_map_read gap of " << p->first-cur << " blocks, " + << exv.size() << " extents" << dendl; num_missing += exv.size(); left -= (p->first - cur); cur = p->first; @@ -340,7 +341,7 @@ int ObjectCache::map_read(block_t start, block_t len, n->set_length( exv[i].length ); bc->add_bh(n); missing[cur] = n; - dout(20) << "map_read miss " << left << " left, " << *n << endl; + dout(20) << "map_read miss " << left << " left, " << *n << dendl; cur += MIN(left,exv[i].length); left -= MIN(left,exv[i].length); } @@ -357,19 +358,19 @@ int ObjectCache::map_read(block_t start, block_t len, e->is_dirty() || e->is_tx()) { hits[cur] = e; // readable! - dout(20) << "map_read hit " << *e << endl; + dout(20) << "map_read hit " << *e << dendl; bc->touch(e); } else if (e->is_rx()) { rx[cur] = e; // missing, not readable. - dout(20) << "map_read rx " << *e << endl; + dout(20) << "map_read rx " << *e << dendl; } else if (e->is_partial()) { partial[cur] = e; - dout(20) << "map_read partial " << *e << endl; + dout(20) << "map_read partial " << *e << dendl; } else { - dout(0) << "map_read ??? got unexpected " << *e << endl; + dout(0) << "map_read ??? got unexpected " << *e << dendl; assert(0); } @@ -396,7 +397,7 @@ int ObjectCache::map_read(block_t start, block_t len, missing[cur] = n; cur += MIN(left, n->length()); left -= MIN(left, n->length()); - dout(20) << "map_read gap " << *n << endl; + dout(20) << "map_read gap " << *n << dendl; } continue; // more? } @@ -427,7 +428,7 @@ int ObjectCache::map_write(block_t start, block_t len, { map::iterator p = data.lower_bound(start); - dout(10) << "map_write " << *on << " " << start << "~" << len << " ... alloc " << alloc << endl; + dout(10) << "map_write " << *on << " " << start << "~" << len << " ... alloc " << alloc << dendl; // p->first >= start block_t cur = start; @@ -468,9 +469,9 @@ int ObjectCache::map_write(block_t start, block_t len, max = exv[0].length; if (newalloc) { - dout(10) << "map_write " << cur << "~" << max << " is new alloc on disk" << endl; + dout(10) << "map_write " << cur << "~" << max << " is new alloc on disk" << dendl; } else { - dout(10) << "map_write " << cur << "~" << max << " keeps old alloc on disk" << endl; + dout(10) << "map_write " << cur << "~" << max << " keeps old alloc on disk" << dendl; } // at end? @@ -485,12 +486,12 @@ int ObjectCache::map_write(block_t start, block_t len, continue; } - dout(10) << "p is " << *p->second << endl; + dout(10) << "p is " << *p->second << dendl; if (p->first <= cur) { BufferHead *bh = p->second; - dout(10) << "map_write bh " << *bh << " intersected" << endl; + dout(10) << "map_write bh " << *bh << " intersected" << dendl; if (p->first < cur) { if (cur+max >= p->first+p->second->length()) { @@ -567,7 +568,7 @@ int ObjectCache::map_write(block_t start, block_t len, // gap! block_t next = p->first; block_t glen = MIN(next-cur, max); - dout(10) << "map_write gap " << cur << "~" << glen << endl; + dout(10) << "map_write gap " << cur << "~" << glen << dendl; BufferHead *n = new BufferHead(this); n->set_start( cur ); n->set_length( glen ); @@ -627,7 +628,7 @@ void ObjectCache::touch_bottom(block_t bstart, block_t blast) if (bh->start() < bstart) continue; if (bh->end() > blast) break; - dout(12) << "moving " << *bh << " to bottom of lru" << endl; + dout(12) << "moving " << *bh << " to bottom of lru" << dendl; bc->touch_bottom(bh); // move to bottom of lru list } } @@ -637,7 +638,7 @@ void ObjectCache::truncate(block_t blocks, version_t super_epoch) { dout(7) << "truncate " << object_id << " " << blocks << " blocks" - << endl; + << dendl; while (!data.empty()) { block_t bhoff = data.rbegin()->first; @@ -648,7 +649,7 @@ void ObjectCache::truncate(block_t blocks, version_t super_epoch) bool uncom = on->uncommitted.contains(bh->start(), bh->length()); dout(10) << "truncate " << *bh << " uncom " << uncom << " of " << on->uncommitted - << endl; + << dendl; if (bhoff < blocks) { // we want right bit (one splice) @@ -672,7 +673,7 @@ void ObjectCache::truncate(block_t blocks, version_t super_epoch) if (bh->is_tx() && uncom) bc->bh_cancel_write(bh, super_epoch); if (bh->shadow_of) { - dout(10) << "truncate " << *bh << " unshadowing " << *bh->shadow_of << endl; + dout(10) << "truncate " << *bh << " unshadowing " << *bh->shadow_of << dendl; // shadow bh->shadow_of->remove_shadow(bh); if (bh->is_partial()) @@ -704,7 +705,7 @@ void ObjectCache::clone_to(Onode *other) p != data.end(); p++) { BufferHead *bh = p->second; - dout(10) << "clone_to ? " << *bh << endl; + dout(10) << "clone_to ? " << *bh << dendl; if (bh->is_dirty() || bh->is_tx() || bh->is_partial()) { // dup dirty or tx bh's if (!ton) @@ -716,7 +717,7 @@ void ObjectCache::clone_to(Onode *other) bc->add_bh(nbh); if (bh->is_partial()) { - dout(0) << "clone_to PARTIAL FIXME NOT FULLY IMPLEMENTED ******" << endl; + dout(0) << "clone_to PARTIAL FIXME NOT FULLY IMPLEMENTED ******" << dendl; nbh->partial = bh->partial; bc->mark_partial(nbh); // register as shadow_partial @@ -727,7 +728,7 @@ void ObjectCache::clone_to(Onode *other) bc->mark_clean(nbh); } - dout(10) << "clone_to dup " << *bh << " -> " << *nbh << endl; + dout(10) << "clone_to dup " << *bh << " -> " << *nbh << dendl; } } } @@ -737,13 +738,13 @@ void ObjectCache::clone_to(Onode *other) /************** BufferCache ***************/ #undef dout -#define dout(x) if (x <= g_conf.debug_ebofs) cout << g_clock.now() << " ebofs.bc." +#define dout(x) if (x <= g_conf.debug_ebofs) cout << dbeginl << g_clock.now() << " ebofs.bc." BufferHead *BufferCache::split(BufferHead *orig, block_t after) { - dout(20) << "split " << *orig << " at " << after << endl; + dout(20) << "split " << *orig << " at " << after << dendl; // split off right BufferHead *right = new BufferHead(orig->get_oc()); @@ -793,7 +794,7 @@ BufferHead *BufferCache::split(BufferHead *orig, block_t after) p--; while (p != orig->waitfor_read.begin()) { if (p->first < right->start()) break; - dout(0) << "split moving waiters at block " << p->first << " to right bh" << endl; + dout(0) << "split moving waiters at block " << p->first << " to right bh" << dendl; right->waitfor_read[p->first].swap( p->second ); o = p; p--; @@ -801,15 +802,15 @@ BufferHead *BufferCache::split(BufferHead *orig, block_t after) } } - dout(20) << "split left is " << *orig << endl; - dout(20) << "split right is " << *right << endl; + dout(20) << "split left is " << *orig << dendl; + dout(20) << "split right is " << *right << dendl; return right; } void BufferCache::bh_read(Onode *on, BufferHead *bh, block_t from) { - dout(10) << "bh_read " << *on << " on " << *bh << endl; + dout(10) << "bh_read " << *on << " on " << *bh << dendl; if (bh->is_missing()) { mark_rx(bh); @@ -824,7 +825,7 @@ void BufferCache::bh_read(Onode *on, BufferHead *bh, block_t from) Extent ex = exv[0]; if (from) { // force behavior, used for reading partials - dout(10) << "bh_read forcing read from block " << from << " (for a partial)" << endl; + dout(10) << "bh_read forcing read from block " << from << " (for a partial)" << dendl; ex.start = from; ex.length = 1; } @@ -832,7 +833,7 @@ void BufferCache::bh_read(Onode *on, BufferHead *bh, block_t from) // this should be empty!! assert(bh->rx_ioh == 0); - dout(20) << "bh_read " << *on << " " << *bh << " from " << ex << endl; + dout(20) << "bh_read " << *on << " " << *bh << " from " << ex << dendl; C_OC_RxFinish *fin = new C_OC_RxFinish(ebofs_lock, on->oc, bh->start(), bh->length(), @@ -851,7 +852,7 @@ void BufferCache::bh_read(Onode *on, BufferHead *bh, block_t from) bool BufferCache::bh_cancel_read(BufferHead *bh) { if (bh->rx_ioh && dev.cancel_io(bh->rx_ioh) >= 0) { - dout(10) << "bh_cancel_read on " << *bh << endl; + dout(10) << "bh_cancel_read on " << *bh << dendl; bh->rx_ioh = 0; mark_missing(bh); int l = bh->oc->put(); @@ -863,7 +864,7 @@ bool BufferCache::bh_cancel_read(BufferHead *bh) void BufferCache::bh_write(Onode *on, BufferHead *bh, block_t shouldbe) { - dout(10) << "bh_write " << *on << " on " << *bh << " in epoch " << bh->epoch_modified << endl; + dout(10) << "bh_write " << *on << " on " << *bh << " in epoch " << bh->epoch_modified << dendl; assert(bh->get_version() > 0); assert(bh->is_dirty()); @@ -878,7 +879,7 @@ void BufferCache::bh_write(Onode *on, BufferHead *bh, block_t shouldbe) if (shouldbe) assert(ex.length == 1 && ex.start == shouldbe); - dout(20) << "bh_write " << *on << " " << *bh << " to " << ex << endl; + dout(20) << "bh_write " << *on << " " << *bh << " to " << ex << dendl; //assert(bh->tx_ioh == 0); @@ -899,7 +900,7 @@ void BufferCache::bh_write(Onode *on, BufferHead *bh, block_t shouldbe) // assert: no partials on the same block // hose any partial on the same block if (bh->partial_write.count(ex.start)) { - dout(10) << "bh_write hosing parital write on same block " << ex.start << " " << *bh << endl; + dout(10) << "bh_write hosing parital write on same block " << ex.start << " " << *bh << dendl; dec_unflushed( bh->partial_write[ex.start].epoch ); bh->partial_write.erase(ex.start); } @@ -910,7 +911,7 @@ void BufferCache::bh_write(Onode *on, BufferHead *bh, block_t shouldbe) bool BufferCache::bh_cancel_write(BufferHead *bh, version_t cur_epoch) { if (bh->tx_ioh && dev.cancel_io(bh->tx_ioh) >= 0) { - dout(10) << "bh_cancel_write on " << *bh << endl; + dout(10) << "bh_cancel_write on " << *bh << dendl; bh->tx_ioh = 0; mark_dirty(bh); @@ -951,7 +952,7 @@ void BufferCache::rx_finish(ObjectCache *oc, { ebofs_lock.Lock(); dout(10) << "rx_finish ioh " << ioh << " on " << start << "~" << length - << ", at device block " << diskstart << endl; + << ", at device block " << diskstart << dendl; // oc if (oc->put() == 0) @@ -981,7 +982,7 @@ void BufferCache::rx_finish(ObjectCache *oc, dout(10) << "rx_finish partial from " << pblock << " -> " << p->first << " for epoch " << p->second.epoch //<< " (bh.epoch_modified is now " << bh->epoch_modified << ")" - << endl; + << dendl; // this had better be a past epoch //assert(p->epoch == epoch_modified - 1); // ?? @@ -1020,7 +1021,7 @@ void BufferCache::rx_finish(ObjectCache *oc, ++p) { BufferHead *bh = *p; dout(10) << "rx_finish applying shadow_partial for " << pblock - << " to " << *bh << endl; + << " to " << *bh << dendl; bufferptr bp = buffer::create_page_aligned(EBOFS_BLOCK_SIZE); bh->data.clear(); bh->data.push_back( bp ); @@ -1053,7 +1054,7 @@ void BufferCache::partial_tx_finish(version_t epoch) { ebofs_lock.Lock(); - dout(10) << "partial_tx_finish in epoch " << epoch << endl; + dout(10) << "partial_tx_finish in epoch " << epoch << dendl; // update unflushed counter assert(get_unflushed(EBOFS_BC_FLUSH_PARTIAL, epoch) > 0); @@ -1081,7 +1082,7 @@ void BufferCache::bh_queue_partial_write(Onode *on, BufferHead *bh) bh->partial_tx_to = exv[0].start; bh->partial_tx_epoch = bh->epoch_modified; - dout(10) << "bh_queue_partial_write " << *on << " on " << *bh << " block " << b << " epoch " << bh->epoch_modified << endl; + dout(10) << "bh_queue_partial_write " << *on << " on " << *bh << " block " << b << " epoch " << bh->epoch_modified << dendl; // copy map state, queue for this block @@ -1103,7 +1104,7 @@ void BufferCache::queue_partial(block_t from, block_t to, { dout(10) << "queue_partial " << from << " -> " << to << " in epoch " << epoch - << endl; + << dendl; if (partial_write[from].count(to)) { // this should be in the same epoch. @@ -1125,7 +1126,7 @@ void BufferCache::cancel_partial(block_t from, block_t to, version_t epoch) dout(10) << "cancel_partial " << from << " -> " << to << " (was epoch " << partial_write[from][to].epoch << ")" - << endl; + << dendl; partial_write[from].erase(to); if (partial_write[from].empty()) @@ -1137,12 +1138,12 @@ void BufferCache::cancel_partial(block_t from, block_t to, version_t epoch) void BufferCache::add_shadow_partial(block_t from, BufferHead *bh) { - dout(10) << "add_shadow_partial from " << from << " " << *bh << endl; + dout(10) << "add_shadow_partial from " << from << " " << *bh << dendl; shadow_partials[from].insert(bh); } void BufferCache::cancel_shadow_partial(block_t from, BufferHead *bh) { - dout(10) << "cancel_shadow_partial from " << from << " " << *bh << endl; + dout(10) << "cancel_shadow_partial from " << from << " " << *bh << dendl; shadow_partials[from].erase(bh); } diff --git a/trunk/ceph/ebofs/Ebofs.cc b/trunk/ceph/ebofs/Ebofs.cc index 1f9ca12b46ee3..3050ce8d0e3b6 100644 --- a/trunk/ceph/ebofs/Ebofs.cc +++ b/trunk/ceph/ebofs/Ebofs.cc @@ -29,9 +29,9 @@ // ******************* -#undef dout -#define dout(x) if (x <= g_conf.debug_ebofs) cout << g_clock.now() << " ebofs(" << dev.get_device_name() << ")." -#define derr(x) if (x <= g_conf.debug_ebofs) cerr << g_clock.now() << " ebofs(" << dev.get_device_name() << ")." +#include "debug.h" +#define dout(x) if (x <= g_conf.debug_ebofs) cout << dbeginl << g_clock.now() << " ebofs(" << dev.get_device_name() << ")." +#define derr(x) if (x <= g_conf.debug_ebofs) cerr << dbeginl << g_clock.now() << " ebofs(" << dev.get_device_name() << ")." char *nice_blocks(block_t b) @@ -59,7 +59,7 @@ int Ebofs::mount() return r; } - dout(2) << "mounting " << dev.get_device_name() << " " << dev.get_num_blocks() << " blocks, " << nice_blocks(dev.get_num_blocks()) << endl; + dout(2) << "mounting " << dev.get_device_name() << " " << dev.get_num_blocks() << " blocks, " << nice_blocks(dev.get_num_blocks()) << dendl; // read super bufferptr bp1 = buffer::create_page_aligned(EBOFS_BLOCK_SIZE); @@ -73,17 +73,17 @@ int Ebofs::mount() // valid superblocks? if (sb1->s_magic != EBOFS_MAGIC || sb2->s_magic != EBOFS_MAGIC) { - derr(0) << "mount bad magic, not a valid EBOFS file system" << endl; + derr(0) << "mount bad magic, not a valid EBOFS file system" << dendl; return -EINVAL; } if (sb1->num_blocks > dev.get_num_blocks() || sb2->num_blocks > dev.get_num_blocks()) { - derr(0) << "mount superblock size exceeds actual device size" << endl; + derr(0) << "mount superblock size exceeds actual device size" << dendl; return -EINVAL; } - dout(3) << "mount super @0 epoch " << sb1->epoch << endl; - dout(3) << "mount super @1 epoch " << sb2->epoch << endl; + dout(3) << "mount super @0 epoch " << sb1->epoch << dendl; + dout(3) << "mount super @1 epoch " << sb2->epoch << dendl; // pick newest super struct ebofs_super *sb = 0; @@ -92,7 +92,7 @@ int Ebofs::mount() else sb = sb2; super_epoch = sb->epoch; - dout(3) << "mount epoch " << super_epoch << endl; + dout(3) << "mount epoch " << super_epoch << dendl; assert(super_epoch == sb->epoch); super_fsid = sb->fsid; @@ -101,13 +101,13 @@ int Ebofs::mount() limbo_blocks = sb->limbo_blocks; // init node pools - dout(3) << "mount nodepool" << endl; + dout(3) << "mount nodepool" << dendl; nodepool.init( &sb->nodepool ); nodepool.read_usemap( dev, super_epoch ); nodepool.read_clean_nodes( dev ); // open tables - dout(3) << "mount opening tables" << endl; + dout(3) << "mount opening tables" << dendl; object_tab = new Table( nodepool, sb->object_tab ); for (int i=0; i( nodepool, sb->free_tab[i] ); @@ -124,31 +124,31 @@ int Ebofs::mount() if (journalfn) { journal = new FileJournal(this, journalfn); if (journal->open() < 0) { - dout(3) << "mount journal " << journalfn << " open failed" << endl; + dout(3) << "mount journal " << journalfn << " open failed" << dendl; delete journal; journal = 0; } else { - dout(3) << "mount journal " << journalfn << " opened, replaying" << endl; + dout(3) << "mount journal " << journalfn << " opened, replaying" << dendl; while (1) { bufferlist bl; epoch_t e; if (!journal->read_entry(bl, e)) { - dout(3) << "mount replay: end of journal, done." << endl; + dout(3) << "mount replay: end of journal, done." << dendl; break; } if (e < super_epoch) { - dout(3) << "mount replay: skipping old entry in epoch " << e << " < " << super_epoch << endl; + dout(3) << "mount replay: skipping old entry in epoch " << e << " < " << super_epoch << dendl; continue; } if (e == super_epoch+1) { super_epoch++; - dout(3) << "mount replay: jumped to next epoch " << super_epoch << endl; + dout(3) << "mount replay: jumped to next epoch " << super_epoch << dendl; } assert(e == super_epoch); - dout(3) << "mount replay: applying transaction in epoch " << e << endl; + dout(3) << "mount replay: applying transaction in epoch " << e << dendl; Transaction t; int off = 0; t._decode(bl, off); @@ -160,13 +160,13 @@ int Ebofs::mount() } } - dout(3) << "mount starting commit+finisher threads" << endl; + dout(3) << "mount starting commit+finisher threads" << dendl; commit_thread.create(); finisher_thread.create(); dout(1) << "mounted " << dev.get_device_name() << " " << dev.get_num_blocks() << " blocks, " << nice_blocks(dev.get_num_blocks()) << (journal ? ", with journal":", no journal") - << endl; + << dendl; mounted = true; @@ -204,7 +204,7 @@ int Ebofs::mkfs() nr.length = 20+ (num_blocks / 1000); if (nr.length < 10) nr.length = 10; nodepool.add_region(nr); - dout(10) << "mkfs: first node region at " << nr << endl; + dout(10) << "mkfs: first node region at " << nr << dendl; // allocate two usemaps block_t usemap_len = nodepool.get_usemap_len(); @@ -212,8 +212,8 @@ int Ebofs::mkfs() nodepool.usemap_even.length = usemap_len; nodepool.usemap_odd.start = nodepool.usemap_even.end(); nodepool.usemap_odd.length = usemap_len; - dout(10) << "mkfs: even usemap at " << nodepool.usemap_even << endl; - dout(10) << "mkfs: odd usemap at " << nodepool.usemap_odd << endl; + dout(10) << "mkfs: even usemap at " << nodepool.usemap_even << dendl; + dout(10) << "mkfs: odd usemap at " << nodepool.usemap_odd << dendl; // init tables struct ebofs_table empty; @@ -235,7 +235,7 @@ int Ebofs::mkfs() Extent left; left.start = nodepool.usemap_odd.end(); left.length = num_blocks - left.start; - dout(10) << "mkfs: free data blocks at " << left << endl; + dout(10) << "mkfs: free data blocks at " << left << dendl; allocator._release_into_limbo( left ); if (g_conf.ebofs_cloneable) { allocator.alloc_inc(nr); @@ -246,7 +246,7 @@ int Ebofs::mkfs() allocator.release_limbo(); // -> free_tab // write nodes, super, 2x - dout(10) << "mkfs: flushing nodepool and superblocks (2x)" << endl; + dout(10) << "mkfs: flushing nodepool and superblocks (2x)" << dendl; nodepool.commit_start( dev, 0 ); nodepool.commit_wait(); @@ -261,7 +261,7 @@ int Ebofs::mkfs() write_super(1, superbp1); // free memory - dout(10) << "mkfs: cleaning up" << endl; + dout(10) << "mkfs: cleaning up" << dendl; close_tables(); dev.close(); @@ -271,14 +271,14 @@ int Ebofs::mkfs() if (journalfn) { Journal *journal = new FileJournal(this, journalfn); if (journal->create() < 0) { - dout(3) << "mount journal " << journalfn << " created failed" << endl; + dout(3) << "mount journal " << journalfn << " created failed" << dendl; } else { - dout(3) << "mount journal " << journalfn << " created" << endl; + dout(3) << "mount journal " << journalfn << " created" << dendl; } delete journal; } - dout(2) << "mkfs: " << dev.get_device_name() << " " << dev.get_num_blocks() << " blocks, " << nice_blocks(dev.get_num_blocks()) << endl; + dout(2) << "mkfs: " << dev.get_device_name() << " " << dev.get_num_blocks() << " blocks, " << nice_blocks(dev.get_num_blocks()) << dendl; ebofs_lock.Unlock(); return 0; } @@ -302,19 +302,19 @@ int Ebofs::umount() ebofs_lock.Lock(); // mark unmounting - dout(1) << "umount start" << endl; + dout(1) << "umount start" << dendl; readonly = true; unmounting = true; // kick commit thread - dout(5) << "umount stopping commit thread" << endl; + dout(5) << "umount stopping commit thread" << dendl; commit_cond.Signal(); ebofs_lock.Unlock(); commit_thread.join(); ebofs_lock.Lock(); // kick finisher thread - dout(5) << "umount stopping finisher thread" << endl; + dout(5) << "umount stopping finisher thread" << dendl; finisher_lock.Lock(); finisher_stop = true; finisher_cond.Signal(); @@ -328,16 +328,16 @@ int Ebofs::umount() for (hash_map::iterator i = onode_map.begin(); i != onode_map.end(); i++) { - dout(0) << "umount *** leftover: " << i->first << " " << *(i->second) << endl; + dout(0) << "umount *** leftover: " << i->first << " " << *(i->second) << dendl; } // free memory - dout(5) << "umount cleaning up" << endl; + dout(5) << "umount cleaning up" << dendl; close_tables(); dev.close(); readonly = unmounting = mounted = false; - dout(1) << "umount done on " << dev.get_device_name() << endl; + dout(1) << "umount done on " << dev.get_device_name() << dendl; ebofs_lock.Unlock(); return 0; } @@ -348,7 +348,7 @@ void Ebofs::prepare_super(version_t epoch, bufferptr& bp) { struct ebofs_super sb; - dout(10) << "prepare_super v" << epoch << endl; + dout(10) << "prepare_super v" << epoch << dendl; // fill in super memset(&sb, 0, sizeof(sb)); @@ -404,7 +404,7 @@ void Ebofs::write_super(version_t epoch, bufferptr& bp) { block_t bno = epoch & 1; - dout(10) << "write_super v" << epoch << " to b" << bno << endl; + dout(10) << "write_super v" << epoch << " to b" << bno << dendl; dev.write(bno, 1, bp, "write_super"); } @@ -412,7 +412,7 @@ void Ebofs::write_super(version_t epoch, bufferptr& bp) int Ebofs::commit_thread_entry() { ebofs_lock.Lock(); - dout(10) << "commit_thread start" << endl; + dout(10) << "commit_thread start" << dendl; assert(!commit_thread_started); // there can be only one commit_thread_started = true; @@ -423,39 +423,48 @@ int Ebofs::commit_thread_entry() // wait for kick, or timeout if (g_conf.ebofs_commit_ms) { if (g_conf.ebofs_idle_commit_ms > 0) { + // *** this is an ugly ugly hack **** + // do not use // periodically check for idle block device + utime_t idle_wait(0, g_conf.ebofs_idle_commit_ms*1000); dout(20) << "commit_thread sleeping (up to) " << g_conf.ebofs_commit_ms << " ms, " - << g_conf.ebofs_idle_commit_ms << " ms if idle" << endl; - long left = g_conf.ebofs_commit_ms; - while (left > 0) { - long next = MIN(left, g_conf.ebofs_idle_commit_ms); - if (commit_cond.WaitInterval(ebofs_lock, utime_t(0, next*1000)) != ETIMEDOUT) + << idle_wait << " ms if idle" << dendl; + utime_t now = g_clock.now(); + utime_t stop = now; + stop += (double)g_conf.ebofs_commit_ms / 1000.0; + do { + utime_t wait = MIN(stop - now, idle_wait); + if (commit_cond.WaitInterval(ebofs_lock, wait) != ETIMEDOUT) { + dout(20) << "commit_thread i got kicked" << dendl; break; // we got kicked + } if (dev.is_idle()) { - dout(20) << "commit_thread bdev is idle, early commit" << endl; + dout(20) << "commit_thread bdev is idle, early commit" << dendl; break; // dev is idle } - left -= next; - dout(20) << "commit_thread " << left << " ms left" << endl; + now = g_clock.now(); + dout(20) << "commit_thread now=" << now << ", stop at " << stop << dendl; // hack hack //if (!left) g_conf.debug_ebofs = 10; // /hack hack - } + } while (now < stop); + dout(20) << "commit_thread done with idle loop" << dendl; + } else { // normal wait+timeout - dout(20) << "commit_thread sleeping (up to) " << g_conf.ebofs_commit_ms << " ms" << endl; + dout(20) << "commit_thread sleeping (up to) " << g_conf.ebofs_commit_ms << " ms" << dendl; commit_cond.WaitInterval(ebofs_lock, utime_t(0, g_conf.ebofs_commit_ms*1000)); } } else { // DEBUG.. wait until kicked - dout(10) << "commit_thread no commit_ms, waiting until kicked" << endl; + dout(10) << "commit_thread no commit_ms, waiting until kicked" << dendl; commit_cond.Wait(ebofs_lock); } if (unmounting) { - dout(10) << "commit_thread unmounting: final commit pass" << endl; + dout(10) << "commit_thread unmounting: final commit pass" << dendl; assert(readonly); unmounting = false; mounted = false; @@ -463,25 +472,26 @@ int Ebofs::commit_thread_entry() } if (!dirty && !limbo_blocks) { - dout(10) << "commit_thread not dirty" << endl; + dout(10) << "commit_thread not dirty" << dendl; } else { super_epoch++; dirty = false; - dout(10) << "commit_thread commit start, new epoch " << super_epoch << endl; + derr(10) << "commit_thread commit start, new epoch " << super_epoch << dendl; + dout(10) << "commit_thread commit start, new epoch " << super_epoch << dendl; dout(2) << "commit_thread data: " << 100*(dev.get_num_blocks()-get_free_blocks())/dev.get_num_blocks() << "% used, " << get_free_blocks() << " (" << 100*get_free_blocks()/dev.get_num_blocks() << "%) free in " << get_free_extents() << ", " << get_limbo_blocks() << " (" << 100*get_limbo_blocks()/dev.get_num_blocks() << "%) limbo in " << get_limbo_extents() - << endl; + << dendl; dout(2) << "commit_thread nodes: " << 100*nodepool.num_used()/nodepool.num_total() << "% used, " << nodepool.num_free() << " (" << 100*nodepool.num_free()/nodepool.num_total() << "%) free, " << nodepool.num_limbo() << " (" << 100*nodepool.num_limbo()/nodepool.num_total() << "%) limbo, " - << nodepool.num_total() << " total." << endl; + << nodepool.num_total() << " total." << dendl; dout(2) << "commit_thread bc: " << "size " << bc.get_size() << ", trimmable " << bc.get_trimmable() @@ -489,7 +499,7 @@ int Ebofs::commit_thread_entry() << "; dirty " << bc.get_stat_dirty() << ", tx " << bc.get_stat_tx() << ", max dirty " << g_conf.ebofs_bc_max_dirty - << endl; + << dendl; if (journal) journal->commit_epoch_start(); @@ -502,7 +512,7 @@ int Ebofs::commit_thread_entry() nodepool.commit_start( dev, super_epoch ); // blockdev barrier (prioritize our writes!) - dout(30) << "commit_thread barrier. flushing inodes " << inodes_flushing << endl; + dout(30) << "commit_thread barrier. flushing inodes " << inodes_flushing << dendl; dev.barrier(); // prepare super (before any changes get made!) @@ -511,19 +521,19 @@ int Ebofs::commit_thread_entry() // wait for it all to flush (drops global lock) commit_bc_wait(super_epoch-1); - dout(30) << "commit_thread bc flushed" << endl; + dout(30) << "commit_thread bc flushed" << dendl; commit_inodes_wait(); - dout(30) << "commit_thread inodes flushed" << endl; + dout(30) << "commit_thread inodes flushed" << dendl; nodepool.commit_wait(); - dout(30) << "commit_thread btree nodes flushed" << endl; + dout(30) << "commit_thread btree nodes flushed" << dendl; // ok, now (synchronously) write the prior super! - dout(10) << "commit_thread commit flushed, writing super for prior epoch" << endl; + dout(10) << "commit_thread commit flushed, writing super for prior epoch" << dendl; ebofs_lock.Unlock(); write_super(super_epoch, superbp); ebofs_lock.Lock(); - dout(10) << "commit_thread wrote super" << endl; + dout(10) << "commit_thread wrote super" << dendl; // free limbo space now // (since we're done allocating things, @@ -532,7 +542,7 @@ int Ebofs::commit_thread_entry() // do we need more node space? if (nodepool.num_free() < nodepool.num_total() / 3) { - dout(2) << "commit_thread running low on node space, allocating more." << endl; + dout(2) << "commit_thread running low on node space, allocating more." << dendl; alloc_more_node_space(); } @@ -540,14 +550,14 @@ int Ebofs::commit_thread_entry() if (journal) journal->commit_epoch_finish(); // kick waiters - dout(10) << "commit_thread queueing commit + kicking sync waiters" << endl; + dout(10) << "commit_thread queueing commit + kicking sync waiters" << dendl; queue_finishers(commit_waiters[super_epoch-1]); commit_waiters.erase(super_epoch-1); sync_cond.Signal(); - dout(10) << "commit_thread commit finish" << endl; + dout(10) << "commit_thread commit finish" << dendl; } // trim bc? @@ -556,7 +566,7 @@ int Ebofs::commit_thread_entry() } - dout(10) << "commit_thread finish" << endl; + dout(10) << "commit_thread finish" << dendl; commit_thread_started = false; ebofs_lock.Unlock(); return 0; @@ -565,37 +575,37 @@ int Ebofs::commit_thread_entry() void Ebofs::alloc_more_node_space() { - dout(1) << "alloc_more_node_space free " << nodepool.num_free() << "/" << nodepool.num_total() << endl; + dout(1) << "alloc_more_node_space free " << nodepool.num_free() << "/" << nodepool.num_total() << dendl; if (nodepool.num_regions() < EBOFS_MAX_NODE_REGIONS) { int want = nodepool.num_total(); Extent ex; allocator.allocate(ex, want, 2); - dout(1) << "alloc_more_node_space wants " << want << " more, got " << ex << endl; + dout(1) << "alloc_more_node_space wants " << want << " more, got " << ex << dendl; Extent even, odd; unsigned ulen = nodepool.get_usemap_len(nodepool.num_total() + ex.length); allocator.allocate(even, ulen, 2); allocator.allocate(odd, ulen, 2); - dout(1) << "alloc_more_node_space maps need " << ulen << " x2, got " << even << " " << odd << endl; + dout(1) << "alloc_more_node_space maps need " << ulen << " x2, got " << even << " " << odd << dendl; if (even.length == ulen && odd.length == ulen) { - dout(1) << "alloc_more_node_space got " << ex << ", new usemaps at even " << even << " odd " << odd << endl; + dout(1) << "alloc_more_node_space got " << ex << ", new usemaps at even " << even << " odd " << odd << dendl; allocator.release(nodepool.usemap_even); allocator.release(nodepool.usemap_odd); nodepool.add_region(ex); nodepool.usemap_even = even; nodepool.usemap_odd = odd; } else { - dout (1) << "alloc_more_node_space failed to get space for new usemaps" << endl; + dout (1) << "alloc_more_node_space failed to get space for new usemaps" << dendl; allocator.release(ex); allocator.release(even); allocator.release(odd); //assert(0); } } else { - dout(1) << "alloc_more_node_space already have max node regions!" << endl; + dout(1) << "alloc_more_node_space already have max node regions!" << dendl; assert(0); } } @@ -604,7 +614,7 @@ void Ebofs::alloc_more_node_space() void *Ebofs::finisher_thread_entry() { finisher_lock.Lock(); - dout(10) << "finisher_thread start" << endl; + dout(10) << "finisher_thread start" << dendl; while (!finisher_stop) { while (!finisher_queue.empty()) { @@ -621,11 +631,11 @@ void *Ebofs::finisher_thread_entry() } if (finisher_stop) break; - dout(30) << "finisher_thread sleeping" << endl; + dout(30) << "finisher_thread sleeping" << dendl; finisher_cond.Wait(finisher_lock); } - dout(10) << "finisher_thread start" << endl; + dout(10) << "finisher_thread start" << dendl; finisher_lock.Unlock(); return 0; } @@ -650,7 +660,7 @@ Onode* Ebofs::new_onode(object_t oid) dirty_onode(on); - dout(7) << "new_onode " << *on << endl; + dout(7) << "new_onode " << *on << dendl; return on; } @@ -663,14 +673,14 @@ Onode* Ebofs::get_onode(object_t oid) // yay Onode *on = onode_map[oid]; on->get(); - //cout << "get_onode " << *on << endl; + //cout << "get_onode " << *on << dendl; return on; } // on disk? Extent onode_loc; if (object_tab->lookup(oid, onode_loc) < 0) { - dout(10) << "onode lookup failed on " << oid << endl; + dout(10) << "onode lookup failed on " << oid << dendl; // object dne. return 0; } @@ -680,12 +690,12 @@ Onode* Ebofs::get_onode(object_t oid) // yep, just wait. Cond c; waitfor_onode[oid].push_back(&c); - dout(10) << "get_onode " << oid << " already loading, waiting" << endl; + dout(10) << "get_onode " << oid << " already loading, waiting" << dendl; c.Wait(ebofs_lock); continue; } - dout(10) << "get_onode reading " << oid << " from " << onode_loc << endl; + dout(10) << "get_onode reading " << oid << " from " << onode_loc << dendl; assert(waitfor_onode.count(oid) == 0); waitfor_onode[oid].clear(); // this should be empty initially. @@ -706,13 +716,13 @@ Onode* Ebofs::get_onode(object_t oid) // parse data block struct ebofs_onode *eo = (struct ebofs_onode*)bl.c_str(); if (eo->object_id != oid) { - cerr << " wrong oid in onode block: " << eo->object_id << " != " << oid << endl; - cerr << " onode_loc is " << eo->onode_loc << endl; - cerr << " object_size " << eo->object_size << endl; - cerr << " object_blocks " << eo->object_blocks << endl; + cerr << " wrong oid in onode block: " << eo->object_id << " != " << oid << dendl; + cerr << " onode_loc is " << eo->onode_loc << dendl; + cerr << " object_size " << eo->object_size << dendl; + cerr << " object_blocks " << eo->object_blocks << dendl; cerr << " " << eo->num_collections << " coll + " << eo->num_attr << " attr + " - << eo->num_extents << " extents" << endl; + << eo->num_extents << " extents" << dendl; assert(eo->object_id == oid); } on->readonly = eo->readonly; @@ -738,7 +748,7 @@ Onode* Ebofs::get_onode(object_t oid) p += sizeof(len); on->attr[key] = buffer::copy(p, len); p += len; - dout(15) << "get_onode " << *on << " attr " << key << " len " << len << endl; + dout(15) << "get_onode " << *on << " attr " << key << " len " << len << dendl; } // parse extents @@ -747,7 +757,7 @@ Onode* Ebofs::get_onode(object_t oid) for (int i=0; inum_extents; i++) { Extent ex = *((Extent*)p); on->extent_map[n] = ex; - dout(15) << "get_onode " << *on << " ex " << i << ": " << ex << endl; + dout(15) << "get_onode " << *on << " ex " << i << ": " << ex << dendl; n += ex.length; p += sizeof(Extent); } @@ -761,7 +771,7 @@ Onode* Ebofs::get_onode(object_t oid) waitfor_onode.erase(oid); // remove Cond list on->get(); - //cout << "get_onode " << *on << " (loaded)" << endl; + //cout << "get_onode " << *on << " (loaded)" << dendl; return on; } } @@ -811,7 +821,7 @@ void Ebofs::encode_onode(Onode *on, bufferlist& bl, unsigned& off) off += sizeof(int); bl.copy_in(off, l, i->second.c_str()); off += l; - dout(15) << "write_onode " << *on << " attr " << i->first << " len " << l << endl; + dout(15) << "write_onode " << *on << " attr " << i->first << " len " << l << dendl; } // extents @@ -820,7 +830,7 @@ void Ebofs::encode_onode(Onode *on, bufferlist& bl, unsigned& off) i++) { bl.copy_in(off, sizeof(Extent), (char*)&(i->second)); off += sizeof(Extent); - dout(15) << "write_onode " << *on << " ex " << i->first << ": " << i->second << endl; + dout(15) << "write_onode " << *on << " ex " << i->first << ": " << i->second << dendl; } } @@ -848,7 +858,7 @@ void Ebofs::write_onode(Onode *on) //object_tab->verify(); } - dout(10) << "write_onode " << *on << " to " << on->onode_loc << endl; + dout(10) << "write_onode " << *on << " to " << on->onode_loc << dendl; unsigned off = 0; encode_onode(on, bl, off); @@ -861,7 +871,7 @@ void Ebofs::write_onode(Onode *on) void Ebofs::remove_onode(Onode *on) { - dout(8) << "remove_onode " << *on << endl; + dout(8) << "remove_onode " << *on << dendl; assert(on->get_ref_count() >= 1); // caller @@ -878,7 +888,7 @@ void Ebofs::remove_onode(Onode *on) on->dangling = true; // remove from object table - //dout(0) << "remove_onode on " << *on << endl; + //dout(0) << "remove_onode on " << *on << dendl; object_tab->remove(on->object_id); // free onode space @@ -906,7 +916,7 @@ void Ebofs::remove_onode(Onode *on) dirty_onodes.erase(on); } - if (on->get_ref_count() > 1) dout(10) << "remove_onode **** will survive " << *on << endl; + if (on->get_ref_count() > 1) dout(10) << "remove_onode **** will survive " << *on << dendl; put_onode(on); dirty = true; @@ -915,10 +925,10 @@ void Ebofs::remove_onode(Onode *on) void Ebofs::put_onode(Onode *on) { on->put(); - //cout << "put_onode " << *on << endl; + //cout << "put_onode " << *on << dendl; if (on->get_ref_count() == 0 && on->dangling) { - //cout << " *** hosing on " << *on << endl; + //cout << " *** hosing on " << *on << dendl; delete on; } } @@ -938,7 +948,7 @@ void Ebofs::trim_inodes(int max) unsigned cmax = cnode_lru.lru_get_max(); if (max >= 0) omax = cmax = max; dout(10) << "trim_inodes start " << onode_lru.lru_get_size() << " / " << omax << " onodes, " - << cnode_lru.lru_get_size() << " / " << cmax << " cnodes" << endl; + << cnode_lru.lru_get_size() << " / " << cmax << " cnodes" << dendl; // onodes while (onode_lru.lru_get_size() > omax) { @@ -947,7 +957,7 @@ void Ebofs::trim_inodes(int max) if (on == 0) break; // nothing to expire // expire - dout(20) << "trim_inodes removing onode " << *on << endl; + dout(20) << "trim_inodes removing onode " << *on << dendl; onode_map.erase(on->object_id); on->dangling = true; @@ -955,7 +965,7 @@ void Ebofs::trim_inodes(int max) assert(on->oc == 0); // an open oc pins the onode! delete on; } else { - dout(-20) << "trim_inodes still active: " << *on << endl; + dout(-20) << "trim_inodes still active: " << *on << dendl; assert(0); // huh? } } @@ -968,7 +978,7 @@ void Ebofs::trim_inodes(int max) if (cn == 0) break; // nothing to expire // expire - dout(20) << "trim_inodes removing cnode " << *cn << endl; + dout(20) << "trim_inodes removing cnode " << *cn << dendl; cnode_map.erase(cn->coll_id); delete cn; @@ -976,7 +986,7 @@ void Ebofs::trim_inodes(int max) dout(10) << "trim_inodes finish " << onode_lru.lru_get_size() << " / " << omax << " onodes, " - << cnode_lru.lru_get_size() << " / " << cmax << " cnodes" << endl; + << cnode_lru.lru_get_size() << " / " << cmax << " cnodes" << dendl; } @@ -1026,12 +1036,12 @@ Cnode* Ebofs::get_cnode(coll_t cid) // yep, just wait. Cond c; waitfor_cnode[cid].push_back(&c); - dout(10) << "get_cnode " << cid << " already loading, waiting" << endl; + dout(10) << "get_cnode " << cid << " already loading, waiting" << dendl; c.Wait(ebofs_lock); continue; } - dout(10) << "get_cnode reading " << cid << " from " << cnode_loc << endl; + dout(10) << "get_cnode reading " << cid << " from " << cnode_loc << dendl; assert(waitfor_cnode.count(cid) == 0); waitfor_cnode[cid].clear(); // this should be empty initially. @@ -1063,7 +1073,7 @@ Cnode* Ebofs::get_cnode(coll_t cid) p += sizeof(len); cn->attr[key] = buffer::copy(p, len); p += len; - dout(15) << "get_cnode " << *cn << " attr " << key << " len " << len << endl; + dout(15) << "get_cnode " << *cn << " attr " << key << " len " << len << dendl; } // wake up other waiters @@ -1100,7 +1110,7 @@ void Ebofs::encode_cnode(Cnode *cn, bufferlist& bl, unsigned& off) bl.copy_in(off, len, i->second.c_str()); off += len; - dout(15) << "write_cnode " << *cn << " attr " << i->first << " len " << len << endl; + dout(15) << "write_cnode " << *cn << " attr " << i->first << " len " << len << dendl; } } @@ -1124,7 +1134,7 @@ void Ebofs::write_cnode(Cnode *cn) collection_tab->insert( cn->coll_id, cn->cnode_loc ); } - dout(10) << "write_cnode " << *cn << " to " << cn->cnode_loc << endl; + dout(10) << "write_cnode " << *cn << " to " << cn->cnode_loc << dendl; unsigned off = 0; encode_cnode(cn, bl, off); @@ -1137,7 +1147,7 @@ void Ebofs::write_cnode(Cnode *cn) void Ebofs::remove_cnode(Cnode *cn) { - dout(10) << "remove_cnode " << *cn << endl; + dout(10) << "remove_cnode " << *cn << dendl; // remove from table collection_tab->remove(cn->coll_id); @@ -1189,7 +1199,7 @@ void Ebofs::flush_inode_finish() { inodes_flushing--; if (inodes_flushing < 1000) - dout(20) << "flush_inode_finish, " << inodes_flushing << " left" << endl; + dout(20) << "flush_inode_finish, " << inodes_flushing << " left" << dendl; if (inodes_flushing == 0) inode_commit_cond.Signal(); } @@ -1198,7 +1208,7 @@ void Ebofs::flush_inode_finish() void Ebofs::commit_inodes_start() { - dout(10) << "commit_inodes_start" << endl; + dout(10) << "commit_inodes_start" << dendl; assert(inodes_flushing == 0); @@ -1226,17 +1236,17 @@ void Ebofs::commit_inodes_start() } dirty_cnodes.clear(); - dout(10) << "commit_inodes_start writing " << inodes_flushing << " onodes+cnodes" << endl; + dout(10) << "commit_inodes_start writing " << inodes_flushing << " onodes+cnodes" << dendl; } void Ebofs::commit_inodes_wait() { // caller must hold ebofs_lock while (inodes_flushing > 0) { - dout(10) << "commit_inodes_wait waiting for " << inodes_flushing << " onodes+cnodes to flush" << endl; + dout(10) << "commit_inodes_wait waiting for " << inodes_flushing << " onodes+cnodes to flush" << dendl; inode_commit_cond.Wait(ebofs_lock); } - dout(10) << "commit_inodes_wait all flushed" << endl; + dout(10) << "commit_inodes_wait all flushed" << dendl; } @@ -1258,14 +1268,14 @@ void Ebofs::trim_bc(off_t max) { if (max < 0) max = g_conf.ebofs_bc_size; - dout(10) << "trim_bc start: size " << bc.get_size() << ", trimmable " << bc.get_trimmable() << ", max " << max << endl; + dout(10) << "trim_bc start: size " << bc.get_size() << ", trimmable " << bc.get_trimmable() << ", max " << max << dendl; while (bc.get_size() > max && bc.get_trimmable()) { BufferHead *bh = (BufferHead*) bc.lru_rest.lru_expire(); if (!bh) break; - dout(25) << "trim_bc trimming " << *bh << endl; + dout(25) << "trim_bc trimming " << *bh << dendl; assert(bh->is_clean()); ObjectCache *oc = bh->oc; @@ -1274,30 +1284,28 @@ void Ebofs::trim_bc(off_t max) if (oc->is_empty()) { Onode *on = oc->on; - dout(10) << "trim_bc closing oc on " << *on << endl; + dout(10) << "trim_bc closing oc on " << *on << dendl; on->close_oc(); } } - dout(10) << "trim_bc finish: size " << bc.get_size() << ", trimmable " << bc.get_trimmable() << ", max " << max << endl; + dout(10) << "trim_bc finish: size " << bc.get_size() << ", trimmable " << bc.get_trimmable() << ", max " << max << dendl; } void Ebofs::kick_idle() { - dout(10) << "kick_idle" << endl; - commit_cond.Signal(); + dout(10) << "kick_idle" << dendl; + //commit_cond.Signal(); - /* ebofs_lock.Lock(); if (mounted && !unmounting && dirty) { - dout(0) << "kick_idle dirty, doing commit" << endl; + dout(10) << "kick_idle dirty, doing commit" << dendl; commit_cond.Signal(); } else { - dout(0) << "kick_idle !dirty or !mounted or unmounting, doing nothing" << endl; + dout(10) << "kick_idle !dirty or !mounted or unmounting, doing nothing" << dendl; } ebofs_lock.Unlock(); - */ } void Ebofs::sync(Context *onsafe) @@ -1325,17 +1333,17 @@ void Ebofs::sync() { ebofs_lock.Lock(); if (!dirty) { - dout(7) << "sync in " << super_epoch << ", not dirty" << endl; + dout(7) << "sync in " << super_epoch << ", not dirty" << dendl; } else { epoch_t start = super_epoch; - dout(7) << "sync start in " << start << endl; + dout(7) << "sync start in " << start << dendl; while (super_epoch == start) { - dout(7) << "sync kicking commit in " << super_epoch << endl; + dout(7) << "sync kicking commit in " << super_epoch << dendl; dirty = true; commit_cond.Signal(); sync_cond.Wait(ebofs_lock); } - dout(10) << "sync finish in " << super_epoch << endl; + dout(10) << "sync finish in " << super_epoch << dendl; } ebofs_lock.Unlock(); } @@ -1344,15 +1352,15 @@ void Ebofs::sync() void Ebofs::commit_bc_wait(version_t epoch) { - dout(10) << "commit_bc_wait on epoch " << epoch << endl; + dout(10) << "commit_bc_wait on epoch " << epoch << dendl; while (bc.get_unflushed(EBOFS_BC_FLUSH_BHWRITE,epoch) > 0 || bc.get_unflushed(EBOFS_BC_FLUSH_PARTIAL,epoch) > 0) { - //dout(10) << "commit_bc_wait " << bc.get_unflushed(epoch) << " unflushed in epoch " << epoch << endl; + //dout(10) << "commit_bc_wait " << bc.get_unflushed(epoch) << " unflushed in epoch " << epoch << dendl; dout(10) << "commit_bc_wait epoch " << epoch << ", unflushed bhwrite " << bc.get_unflushed(EBOFS_BC_FLUSH_BHWRITE) << ", unflushed partial " << bc.get_unflushed(EBOFS_BC_FLUSH_PARTIAL) - << endl; + << dendl; bc.waitfor_flush(); } @@ -1362,14 +1370,14 @@ void Ebofs::commit_bc_wait(version_t epoch) dout(10) << "commit_bc_wait all flushed for epoch " << epoch << "; " << bc.get_unflushed(EBOFS_BC_FLUSH_BHWRITE) << " " << bc.get_unflushed(EBOFS_BC_FLUSH_PARTIAL) - << endl; + << dendl; } int Ebofs::statfs(struct statfs *buf) { - dout(7) << "statfs" << endl; + dout(7) << "statfs" << dendl; buf->f_type = EBOFS_MAGIC; /* type of filesystem */ buf->f_bsize = 4096; /* optimal transfer block size */ @@ -1410,7 +1418,7 @@ void Ebofs::alloc_write(Onode *on, // subtract those off, so we're left with the committed bits (that must be reallocated). alloc.subtract(already_uncom); - dout(10) << "alloc_write must (re)alloc " << alloc << " on " << *on << endl; + dout(10) << "alloc_write must (re)alloc " << alloc << " on " << *on << dendl; // release it (into limbo) for (map::iterator i = alloc.m.begin(); @@ -1427,11 +1435,11 @@ void Ebofs::alloc_write(Onode *on, if (!old.empty()) { if (i->first == start) { old_bfirst = old[0].start; - dout(20) << "alloc_write old_bfirst " << old_bfirst << " of " << old[0] << endl; + dout(20) << "alloc_write old_bfirst " << old_bfirst << " of " << old[0] << dendl; } if (i->first+i->second == start+len) { old_blast = old[old.size()-1].last(); - dout(20) << "alloc_write old_blast " << old_blast << " of " << old[old.size()-1] << endl; + dout(20) << "alloc_write old_blast " << old_blast << " of " << old[old.size()-1] << dendl; } } } @@ -1451,7 +1459,7 @@ void Ebofs::alloc_write(Onode *on, // cancelable/moveable? if (alloc.contains(bh->start(), bh->length())) { - dout(10) << "alloc_write " << *bh << " already in " << alloc << endl; + dout(10) << "alloc_write " << *bh << " already in " << alloc << dendl; continue; } @@ -1463,14 +1471,14 @@ void Ebofs::alloc_write(Onode *on, assert(bh->epoch_modified == super_epoch); if (bc.bh_cancel_write(bh, super_epoch)) { if (bh->length() == 1) - dout(10) << "alloc_write unallocated tx " << old[0] << ", canceled " << *bh << endl; + dout(10) << "alloc_write unallocated tx " << old[0] << ", canceled " << *bh << dendl; // no, this isn't compatible with clone() and extent reference counting. //allocator.unallocate(old[0]); // release (into free) - allocator.release(old[0]); + allocator.release(old[0]); // **FIXME** no cloning yet, my friend! alloc.insert(bh->start(), bh->length()); } else { if (bh->length() == 1) - dout(10) << "alloc_write released tx " << old[0] << ", couldn't cancel " << *bh << endl; + dout(10) << "alloc_write released tx " << old[0] << ", couldn't cancel " << *bh << dendl; allocator.release(old[0]); // release (into limbo) alloc.insert(bh->start(), bh->length()); } @@ -1478,33 +1486,33 @@ void Ebofs::alloc_write(Onode *on, if (bh->length() == 1) dout(10) << "alloc_write skipped tx " << old[0] << ", not entirely within " << start << "~" << len - << " bh " << *bh << endl; + << " bh " << *bh << dendl; } } - dout(10) << "alloc_write will (re)alloc " << alloc << " on " << *on << endl; + dout(10) << "alloc_write will (re)alloc " << alloc << " on " << *on << dendl; } if (alloc.empty()) return; // no need to dirty the onode below! // merge alloc into onode uncommitted map - //dout(10) << " union of " << on->uncommitted << " and " << alloc << endl; + //dout(10) << " union of " << on->uncommitted << " and " << alloc << dendl; interval_set old = on->uncommitted; on->uncommitted.union_of(alloc); - dout(10) << "alloc_write onode.uncommitted is now " << on->uncommitted << endl; + dout(10) << "alloc_write onode.uncommitted is now " << on->uncommitted << dendl; if (0) { // verify interval_set ta; ta.intersection_of(on->uncommitted, alloc); - cout << " ta " << ta << endl; + cout << " ta " << ta << dendl; assert(alloc == ta); interval_set tb; tb.intersection_of(on->uncommitted, old); - cout << " tb " << tb << endl; + cout << " tb " << tb << dendl; assert(old == tb); } @@ -1514,7 +1522,7 @@ void Ebofs::alloc_write(Onode *on, for (map::iterator i = alloc.m.begin(); i != alloc.m.end(); i++) { - dout(15) << "alloc_write alloc " << i->first << "~" << i->second << " (of " << start << "~" << len << ")" << endl; + dout(15) << "alloc_write alloc " << i->first << "~" << i->second << " (of " << start << "~" << len << ")" << dendl; // allocate new space block_t left = i->second; @@ -1522,7 +1530,7 @@ void Ebofs::alloc_write(Onode *on, while (left > 0) { Extent ex; allocator.allocate(ex, left, Allocator::NEAR_LAST_FWD); - dout(10) << "alloc_write got " << ex << " for object offset " << cur << endl; + dout(10) << "alloc_write got " << ex << " for object offset " << cur << dendl; on->set_extent(cur, ex); // map object to new region left -= ex.length; cur += ex.length; @@ -1551,7 +1559,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) } if (off+(off_t)len > on->object_size) { dout(10) << "apply_write extending size on " << *on << ": " << on->object_size - << " -> " << off+len << endl; + << " -> " << off+len << dendl; on->object_size = off+len; dirty_onode(on); } @@ -1563,7 +1571,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) } if (zleft) dout(10) << "apply_write zeroing " << zleft << " bytes before " << off << "~" << len - << " in " << *on << endl; + << " in " << *on << dendl; block_t blast = (len+off-1) / EBOFS_BLOCK_SIZE; block_t blen = blast-bstart+1; @@ -1573,7 +1581,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) block_t old_bfirst = 0; // zero means not defined here (since we ultimately pass to bh_read) block_t old_blast = 0; alloc_write(on, bstart, blen, alloc, old_bfirst, old_blast); - dout(20) << "apply_write old_bfirst " << old_bfirst << ", old_blast " << old_blast << endl; + dout(20) << "apply_write old_bfirst " << old_bfirst << ", old_blast " << old_blast << dendl; if (fake_writes) { on->uncommitted.clear(); // worst case! @@ -1603,7 +1611,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) // old write in progress? if (bh->is_tx()) { // copy the buffer to avoid munging up in-flight write - dout(10) << "apply_write tx pending, copying buffer on " << *bh << endl; + dout(10) << "apply_write tx pending, copying buffer on " << *bh << dendl; bufferlist temp; temp.claim(bh->data); //bc.bufferpool.alloc(EBOFS_BLOCK_SIZE*bh->length(), bh->data); @@ -1616,13 +1624,13 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) if ((bh->start() == bstart && opos % EBOFS_BLOCK_SIZE != 0)) { BufferHead *right = bc.split(bh, bh->start()+1); hits[right->start()] = right; - dout(10) << "apply_write split off left block for partial write; rest is " << *right << endl; + dout(10) << "apply_write split off left block for partial write; rest is " << *right << dendl; } if ((bh->last() == blast && (len+off) % EBOFS_BLOCK_SIZE != 0) && ((off_t)len+off < on->object_size)) { BufferHead *right = bc.split(bh, bh->last()); hits[right->start()] = right; - dout(10) << "apply_write split off right block for upcoming partial write; rest is " << *right << endl; + dout(10) << "apply_write split off right block for upcoming partial write; rest is " << *right << dendl; } } @@ -1643,7 +1651,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) dout(10) << "apply_write writing into partial " << *bh << ":" << " off_in_bh " << off_in_bh << " len_in_bh " << len_in_bh - << endl; + << dendl; unsigned z = MIN( zleft, len_in_bh ); if (z) { bufferptr zp(z); @@ -1665,7 +1673,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) opos += len_in_bh-z; if (bh->partial_is_complete(on->object_size - bh->start()*EBOFS_BLOCK_SIZE)) { - dout(10) << "apply_write completed partial " << *bh << endl; + dout(10) << "apply_write completed partial " << *bh << dendl; //bc.bufferpool.alloc(EBOFS_BLOCK_SIZE*bh->length(), bh->data); // new buffers! bh->data.clear(); bh->data.push_back( buffer::create_page_aligned(EBOFS_BLOCK_SIZE*bh->length()) ); @@ -1675,13 +1683,13 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) bc.bh_write(on, bh); } else if (bh->is_rx()) { - dout(10) << "apply_write rx -> partial " << *bh << endl; + dout(10) << "apply_write rx -> partial " << *bh << dendl; assert(bh->length() == 1); bc.mark_partial(bh); bc.bh_queue_partial_write(on, bh); // queue the eventual write } else if (bh->is_missing()) { - dout(10) << "apply_write missing -> partial " << *bh << endl; + dout(10) << "apply_write missing -> partial " << *bh << dendl; assert(bh->length() == 1); bc.mark_partial(bh); @@ -1695,7 +1703,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) bc.bh_queue_partial_write(on, bh); // queue the eventual write } else if (bh->is_partial()) { - dout(10) << "apply_write already partial, no need to submit rx on " << *bh << endl; + dout(10) << "apply_write already partial, no need to submit rx on " << *bh << dendl; if (bh->partial_tx_epoch == super_epoch) bc.bh_cancel_partial_write(bh); bc.bh_queue_partial_write(on, bh); // queue the eventual write @@ -1709,7 +1717,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) dout(10) << "apply_write writing leading/tailing partial into " << *bh << ":" << " off_in_bh " << off_in_bh << " len_in_bh " << len_in_bh - << endl; + << dendl; // copy data into new buffers first (copy on write!) // FIXME: only do the modified pages? this might be a big bh! @@ -1761,7 +1769,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) dout(10) << "apply_write writing into " << *bh << ":" << " len_in_bh " << len_in_bh - << endl; + << dendl; unsigned z = MIN(len_in_bh, zleft); if (z) { @@ -1807,7 +1815,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) bool Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, Cond *will_wait_on, bool *will_wait_on_bool) { - dout(10) << "attempt_read " << *on << " " << off << "~" << len << endl; + dout(10) << "attempt_read " << *on << " " << off << "~" << len << dendl; ObjectCache *oc = on->get_oc(&bc); // map @@ -1826,7 +1834,7 @@ bool Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, for (map::iterator i = missing.begin(); i != missing.end(); i++) { - dout(10) << "attempt_read missing buffer " << *(i->second) << endl; + dout(10) << "attempt_read missing buffer " << *(i->second) << dendl; bc.bh_read(on, i->second); } BufferHead *wait_on = missing.begin()->second; @@ -1850,7 +1858,7 @@ bool Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, if (partials_ok) { // wait on this one Context *c = new C_Cond(will_wait_on, will_wait_on_bool); - dout(10) << "attempt_read insufficient partial buffer " << *(i->second) << " c " << c << endl; + dout(10) << "attempt_read insufficient partial buffer " << *(i->second) << " c " << c << dendl; i->second->waitfor_read[i->second->start()].push_back(c); } partials_ok = false; @@ -1862,7 +1870,7 @@ bool Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, if (!rx.empty()) { BufferHead *wait_on = rx.begin()->second; Context *c = new C_Cond(will_wait_on, will_wait_on_bool); - dout(20) << "attempt_read waiting for read to finish on " << *wait_on << " c " << c << endl; + dout(20) << "attempt_read waiting for read to finish on " << *wait_on << " c " << c << dendl; block_t b = MAX(wait_on->start(), bstart); wait_on->waitfor_read[b].push_back(c); return false; @@ -1904,7 +1912,7 @@ bool Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, pos += bh->data.length(); } else { bufferlist frag; - dout(10) << "substr " << (start-bhstart) << "~" << (end-start) << " of " << bh->data.length() << " in " << *bh << endl; + dout(10) << "substr " << (start-bhstart) << "~" << (end-start) << " of " << bh->data.length() << " in " << *bh << dendl; frag.substr_of(bh->data, start-bhstart, end-start); pos += frag.length(); bl.claim_append( frag ); @@ -1940,14 +1948,14 @@ int Ebofs::is_cached(object_t oid, off_t off, size_t len) int Ebofs::_is_cached(object_t oid, off_t off, size_t len) { if (!have_onode(oid)) { - dout(7) << "_is_cached " << oid << " " << off << "~" << len << " ... onode " << endl; + dout(7) << "_is_cached " << oid << " " << off << "~" << len << " ... onode " << dendl; return -1; // object dne? } Onode *on = get_onode(oid); if (!on->have_oc()) { // nothing is cached. return # of extents in file. - dout(10) << "_is_cached have onode but no object cache, returning extent count" << endl; + dout(10) << "_is_cached have onode but no object cache, returning extent count" << dendl; return on->extent_map.size(); } @@ -1962,7 +1970,7 @@ int Ebofs::_is_cached(object_t oid, off_t off, size_t len) map partials; // ?? int num_missing = on->get_oc(&bc)->try_map_read(bstart, blen); - dout(7) << "_is_cached try_map_read reports " << num_missing << " missing extents" << endl; + dout(7) << "_is_cached try_map_read reports " << num_missing << " missing extents" << dendl; return num_missing; // FIXME: actually, we should calculate if these extents are contiguous. @@ -1991,7 +1999,7 @@ void Ebofs::_trim_from_cache(object_t oid, off_t off, size_t len) { // be careful not to load it if we don't have it if (!have_onode(oid)) { - dout(7) << "_trim_from_cache " << oid << " " << off << "~" << len << " ... onode not in cache " << endl; + dout(7) << "_trim_from_cache " << oid << " " << off << "~" << len << " ... onode not in cache " << dendl; return; } @@ -2024,11 +2032,11 @@ int Ebofs::read(object_t oid, int Ebofs::_read(object_t oid, off_t off, size_t len, bufferlist& bl) { - dout(7) << "_read " << oid << " " << off << "~" << len << endl; + dout(7) << "_read " << oid << " " << off << "~" << len << dendl; Onode *on = get_onode(oid); if (!on) { - dout(7) << "_read " << oid << " " << off << "~" << len << " ... dne " << endl; + dout(7) << "_read " << oid << " " << off << "~" << len << " ... dne " << dendl; return -ENOENT; // object dne? } @@ -2039,7 +2047,7 @@ int Ebofs::_read(object_t oid, off_t off, size_t len, bufferlist& bl) while (1) { // check size bound if (off >= on->object_size) { - dout(7) << "_read " << oid << " " << off << "~" << len << " ... off past eof " << on->object_size << endl; + dout(7) << "_read " << oid << " " << off << "~" << len << " ... off past eof " << on->object_size << dendl; r = -ESPIPE; // FIXME better errno? break; } @@ -2056,7 +2064,7 @@ int Ebofs::_read(object_t oid, off_t off, size_t len, bufferlist& bl) cond.Wait(ebofs_lock); if (on->deleted) { - dout(7) << "_read " << oid << " " << off << "~" << len << " ... object deleted" << endl; + dout(7) << "_read " << oid << " " << off << "~" << len << " ... object deleted" << dendl; r = -ENOENT; break; } @@ -2067,7 +2075,7 @@ int Ebofs::_read(object_t oid, off_t off, size_t len, bufferlist& bl) trim_bc(); if (r < 0) return r; // return error, - dout(7) << "_read " << oid << " " << off << "~" << len << " ... got " << bl.length() << endl; + dout(7) << "_read " << oid << " " << off << "~" << len << " ... got " << bl.length() << dendl; return bl.length(); // or bytes read. } @@ -2089,7 +2097,7 @@ bool Ebofs::write_will_block() unsigned Ebofs::apply_transaction(Transaction& t, Context *onsafe) { ebofs_lock.Lock(); - dout(7) << "apply_transaction start (" << t.get_num_ops() << " ops)" << endl; + dout(7) << "apply_transaction start (" << t.get_num_ops() << " ops)" << dendl; unsigned r = _apply_transaction(t); @@ -2130,7 +2138,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) bufferlist *pbl; t.get_pbl(pbl); if (_read(oid, offset, len, *pbl) < 0) { - dout(7) << "apply_transaction fail on _read" << endl; + dout(7) << "apply_transaction fail on _read" << dendl; r &= bit; } } @@ -2143,7 +2151,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) struct stat *st; t.get_pstat(st); if (_stat(oid, st) < 0) { - dout(7) << "apply_transaction fail on _stat" << endl; + dout(7) << "apply_transaction fail on _stat" << dendl; r &= bit; } } @@ -2158,7 +2166,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) pair pattrval; t.get_pattrval(pattrval); if ((*(pattrval.second) = _getattr(oid, attrname, pattrval.first, *(pattrval.second))) < 0) { - dout(7) << "apply_transaction fail on _getattr" << endl; + dout(7) << "apply_transaction fail on _getattr" << dendl; r &= bit; } } @@ -2171,7 +2179,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) map *pset; t.get_pattrset(pset); if (_getattrs(oid, *pset) < 0) { - dout(7) << "apply_transaction fail on _getattrs" << endl; + dout(7) << "apply_transaction fail on _getattrs" << dendl; r &= bit; } } @@ -2188,7 +2196,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) bufferlist bl; t.get_bl(bl); if (_write(oid, offset, len, bl) < 0) { - dout(7) << "apply_transaction fail on _write" << endl; + dout(7) << "apply_transaction fail on _write" << dendl; r &= bit; } } @@ -2212,7 +2220,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) off_t len; t.get_length(len); if (_truncate(oid, len) < 0) { - dout(7) << "apply_transaction fail on _truncate" << endl; + dout(7) << "apply_transaction fail on _truncate" << dendl; r &= bit; } } @@ -2223,7 +2231,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) object_t oid; t.get_oid(oid); if (_remove(oid) < 0) { - dout(7) << "apply_transaction fail on _remove" << endl; + dout(7) << "apply_transaction fail on _remove" << dendl; r &= bit; } } @@ -2238,7 +2246,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) bufferlist bl; t.get_bl(bl); if (_setattr(oid, attrname, bl.c_str(), bl.length()) < 0) { - dout(7) << "apply_transaction fail on _setattr" << endl; + dout(7) << "apply_transaction fail on _setattr" << dendl; r &= bit; } } @@ -2251,7 +2259,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) map *pattrset; t.get_pattrset(pattrset); if (_setattrs(oid, *pattrset) < 0) { - dout(7) << "apply_transaction fail on _setattrs" << endl; + dout(7) << "apply_transaction fail on _setattrs" << dendl; r &= bit; } } @@ -2264,7 +2272,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) const char *attrname; t.get_attrname(attrname); if (_rmattr(oid, attrname) < 0) { - dout(7) << "apply_transaction fail on _rmattr" << endl; + dout(7) << "apply_transaction fail on _rmattr" << dendl; r &= bit; } } @@ -2277,7 +2285,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) object_t noid; t.get_oid(noid); if (_clone(oid, noid) < 0) { - dout(7) << "apply_transaction fail on _clone" << endl; + dout(7) << "apply_transaction fail on _clone" << dendl; r &= bit; } } @@ -2288,7 +2296,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) coll_t cid; t.get_cid(cid); if (_create_collection(cid) < 0) { - dout(7) << "apply_transaction fail on _create_collection" << endl; + dout(7) << "apply_transaction fail on _create_collection" << dendl; r &= bit; } } @@ -2299,7 +2307,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) coll_t cid; t.get_cid(cid); if (_destroy_collection(cid) < 0) { - dout(7) << "apply_transaction fail on _destroy_collection" << endl; + dout(7) << "apply_transaction fail on _destroy_collection" << dendl; r &= bit; } } @@ -2312,7 +2320,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) object_t oid; t.get_oid(oid); if (_collection_add(cid, oid) < 0) { - //dout(7) << "apply_transaction fail on _collection_add" << endl; + //dout(7) << "apply_transaction fail on _collection_add" << dendl; //r &= bit; } } @@ -2325,7 +2333,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) object_t oid; t.get_oid(oid); if (_collection_remove(cid, oid) < 0) { - dout(7) << "apply_transaction fail on _collection_remove" << endl; + dout(7) << "apply_transaction fail on _collection_remove" << dendl; r &= bit; } } @@ -2341,7 +2349,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) t.get_bl(bl); if (_collection_setattr(cid, attrname, bl.c_str(), bl.length()) < 0) { //if (_collection_setattr(cid, attrname, attrval.first, attrval.second) < 0) { - dout(7) << "apply_transaction fail on _collection_setattr" << endl; + dout(7) << "apply_transaction fail on _collection_setattr" << dendl; r &= bit; } } @@ -2354,21 +2362,21 @@ unsigned Ebofs::_apply_transaction(Transaction& t) const char *attrname; t.get_attrname(attrname); if (_collection_rmattr(cid, attrname) < 0) { - dout(7) << "apply_transaction fail on _collection_rmattr" << endl; + dout(7) << "apply_transaction fail on _collection_rmattr" << dendl; r &= bit; } } break; default: - cerr << "bad op " << op << endl; + cerr << "bad op " << op << dendl; assert(0); } bit = bit << 1; } - dout(7) << "_apply_transaction finish (r = " << r << ")" << endl; + dout(7) << "_apply_transaction finish (r = " << r << ")" << dendl; return r; } @@ -2376,7 +2384,7 @@ unsigned Ebofs::_apply_transaction(Transaction& t) int Ebofs::_write(object_t oid, off_t offset, size_t length, const bufferlist& bl) { - dout(7) << "_write " << oid << " " << offset << "~" << length << endl; + dout(7) << "_write " << oid << " " << offset << "~" << length << dendl; assert(bl.length() == length); // too much unflushed dirty data? (if so, block!) @@ -2390,7 +2398,7 @@ int Ebofs::_write(object_t oid, off_t offset, size_t length, const bufferlist& b << "; dirty " << bc.get_stat_dirty() << ", tx " << bc.get_stat_tx() << ", max dirty " << g_conf.ebofs_bc_max_dirty - << endl; + << dendl; while (_write_will_block()) bc.waitfor_stat(); // waits on ebofs_lock @@ -2404,14 +2412,14 @@ int Ebofs::_write(object_t oid, off_t offset, size_t length, const bufferlist& b << "; dirty " << bc.get_stat_dirty() << ", tx " << bc.get_stat_tx() << ", max dirty " << g_conf.ebofs_bc_max_dirty - << endl; + << dendl; } // out of space? unsigned max = (length+offset) / EBOFS_BLOCK_SIZE + 10; // very conservative; assumes we have to rewrite max += dirty_onodes.size() + dirty_cnodes.size(); if (max >= free_blocks) { - dout(1) << "write failing, only " << free_blocks << " blocks free, may need up to " << max << endl; + dout(1) << "write failing, only " << free_blocks << " blocks free, may need up to " << max << dendl; return -ENOSPC; } @@ -2472,7 +2480,7 @@ int Ebofs::write(object_t oid, int Ebofs::_remove(object_t oid) { - dout(7) << "_remove " << oid << endl; + dout(7) << "_remove " << oid << dendl; // get inode Onode *on = get_onode(oid); @@ -2515,7 +2523,7 @@ int Ebofs::remove(object_t oid, Context *onsafe) int Ebofs::_truncate(object_t oid, off_t size) { - dout(7) << "_truncate " << oid << " size " << size << endl; + dout(7) << "_truncate " << oid << " size " << size << dendl; Onode *on = get_onode(oid); if (!on) @@ -2558,7 +2566,7 @@ int Ebofs::_truncate(object_t oid, off_t size) left.insert(0, nblocks); uncom.intersection_of(left, on->uncommitted); } - dout(10) << "uncommitted was " << on->uncommitted << " now " << uncom << endl; + dout(10) << "uncommitted was " << on->uncommitted << " now " << uncom << dendl; on->uncommitted = uncom; } @@ -2629,7 +2637,7 @@ int Ebofs::clone(object_t from, object_t to, Context *onsafe) int Ebofs::_clone(object_t from, object_t to) { - dout(7) << "_clone " << from << " -> " << to << endl; + dout(7) << "_clone " << from << " -> " << to << dendl; if (!g_conf.ebofs_cloneable) return -1; // no! @@ -2725,7 +2733,7 @@ int Ebofs::pick_object_revision_lt(object_t& oid) } dout(8) << "find_object_revision " << orig << " -> " << oid - << " r=" << r << endl; + << " r=" << r << dendl; } ebofs_lock.Unlock(); return r; @@ -2737,7 +2745,7 @@ int Ebofs::pick_object_revision_lt(object_t& oid) bool Ebofs::exists(object_t oid) { ebofs_lock.Lock(); - dout(8) << "exists " << oid << endl; + dout(8) << "exists " << oid << dendl; bool e = (object_tab->lookup(oid) == 0); ebofs_lock.Unlock(); return e; @@ -2753,7 +2761,7 @@ int Ebofs::stat(object_t oid, struct stat *st) int Ebofs::_stat(object_t oid, struct stat *st) { - dout(7) << "_stat " << oid << endl; + dout(7) << "_stat " << oid << dendl; Onode *on = get_onode(oid); if (!on) return -ENOENT; @@ -2768,7 +2776,7 @@ int Ebofs::_stat(object_t oid, struct stat *st) int Ebofs::_setattr(object_t oid, const char *name, const void *value, size_t size) { - dout(8) << "setattr " << oid << " '" << name << "' len " << size << endl; + dout(8) << "setattr " << oid << " '" << name << "' len " << size << dendl; Onode *on = get_onode(oid); if (!on) return -ENOENT; @@ -2782,7 +2790,7 @@ int Ebofs::_setattr(object_t oid, const char *name, const void *value, size_t si dirty_onode(on); put_onode(on); - dout(8) << "setattr " << oid << " '" << name << "' len " << size << " success" << endl; + dout(8) << "setattr " << oid << " '" << name << "' len " << size << " success" << dendl; return 0; } @@ -2815,7 +2823,7 @@ int Ebofs::setattr(object_t oid, const char *name, const void *value, size_t siz int Ebofs::_setattrs(object_t oid, map& attrset) { - dout(8) << "setattrs " << oid << endl; + dout(8) << "setattrs " << oid << dendl; Onode *on = get_onode(oid); if (!on) return -ENOENT; @@ -2866,7 +2874,7 @@ int Ebofs::getattr(object_t oid, const char *name, void *value, size_t size) int Ebofs::_getattr(object_t oid, const char *name, void *value, size_t size) { - dout(8) << "_getattr " << oid << " '" << name << "' maxlen " << size << endl; + dout(8) << "_getattr " << oid << " '" << name << "' maxlen " << size << dendl; Onode *on = get_onode(oid); if (!on) return -ENOENT; @@ -2874,11 +2882,11 @@ int Ebofs::_getattr(object_t oid, const char *name, void *value, size_t size) string n(name); int r = 0; if (on->attr.count(n) == 0) { - dout(10) << "_getattr " << oid << " '" << name << "' dne" << endl; + dout(10) << "_getattr " << oid << " '" << name << "' dne" << dendl; r = -1; } else { r = MIN( on->attr[n].length(), size ); - dout(10) << "_getattr " << oid << " '" << name << "' got len " << r << endl; + dout(10) << "_getattr " << oid << " '" << name << "' got len " << r << dendl; memcpy(value, on->attr[n].c_str(), r ); } put_onode(on); @@ -2895,7 +2903,7 @@ int Ebofs::getattrs(object_t oid, map &aset) int Ebofs::_getattrs(object_t oid, map &aset) { - dout(8) << "_getattrs " << oid << endl; + dout(8) << "_getattrs " << oid << dendl; Onode *on = get_onode(oid); if (!on) return -ENOENT; @@ -2908,7 +2916,7 @@ int Ebofs::_getattrs(object_t oid, map &aset) int Ebofs::_rmattr(object_t oid, const char *name) { - dout(8) << "_rmattr " << oid << " '" << name << "'" << endl; + dout(8) << "_rmattr " << oid << " '" << name << "'" << dendl; Onode *on = get_onode(oid); if (!on) return -ENOENT; @@ -2954,7 +2962,7 @@ int Ebofs::rmattr(object_t oid, const char *name, Context *onsafe) int Ebofs::listattr(object_t oid, vector& attrs) { ebofs_lock.Lock(); - dout(8) << "listattr " << oid << endl; + dout(8) << "listattr " << oid << dendl; Onode *on = get_onode(oid); if (!on) { @@ -2981,7 +2989,7 @@ int Ebofs::listattr(object_t oid, vector& attrs) int Ebofs::list_collections(list& ls) { ebofs_lock.Lock(); - dout(9) << "list_collections " << endl; + dout(9) << "list_collections " << dendl; Table::Cursor cursor(collection_tab); @@ -3000,7 +3008,7 @@ int Ebofs::list_collections(list& ls) int Ebofs::_create_collection(coll_t cid) { - dout(9) << "_create_collection " << hex << cid << dec << endl; + dout(9) << "_create_collection " << hex << cid << dec << dendl; if (_collection_exists(cid)) return -EEXIST; @@ -3040,7 +3048,7 @@ int Ebofs::create_collection(coll_t cid, Context *onsafe) int Ebofs::_destroy_collection(coll_t cid) { - dout(9) << "_destroy_collection " << hex << cid << dec << endl; + dout(9) << "_destroy_collection " << hex << cid << dec << dendl; if (!_collection_exists(cid)) return -ENOENT; @@ -3098,7 +3106,7 @@ int Ebofs::destroy_collection(coll_t cid, Context *onsafe) bool Ebofs::collection_exists(coll_t cid) { ebofs_lock.Lock(); - dout(10) << "collection_exists " << hex << cid << dec << endl; + dout(10) << "collection_exists " << hex << cid << dec << dendl; bool r = _collection_exists(cid); ebofs_lock.Unlock(); return r; @@ -3110,7 +3118,7 @@ bool Ebofs::_collection_exists(coll_t cid) int Ebofs::_collection_add(coll_t cid, object_t oid) { - dout(9) << "_collection_add " << hex << cid << " object " << oid << dec << endl; + dout(9) << "_collection_add " << hex << cid << " object " << oid << dec << dendl; if (!_collection_exists(cid)) return -ENOENT; @@ -3161,7 +3169,7 @@ int Ebofs::collection_add(coll_t cid, object_t oid, Context *onsafe) int Ebofs::_collection_remove(coll_t cid, object_t oid) { - dout(9) << "_collection_remove " << hex << cid << " object " << oid << dec << endl; + dout(9) << "_collection_remove " << hex << cid << " object " << oid << dec << dendl; if (!_collection_exists(cid)) return -ENOENT; @@ -3213,7 +3221,7 @@ int Ebofs::collection_remove(coll_t cid, object_t oid, Context *onsafe) int Ebofs::collection_list(coll_t cid, list& ls) { ebofs_lock.Lock(); - dout(9) << "collection_list " << hex << cid << dec << endl; + dout(9) << "collection_list " << hex << cid << dec << dendl; if (!_collection_exists(cid)) { ebofs_lock.Unlock(); @@ -3228,7 +3236,7 @@ int Ebofs::collection_list(coll_t cid, list& ls) const coll_t c = cursor.current().key.first; const object_t o = cursor.current().key.second; if (c != cid) break; // end! - dout(10) << "collection_list " << hex << cid << " includes " << o << dec << endl; + dout(10) << "collection_list " << hex << cid << " includes " << o << dec << dendl; ls.push_back(o); num++; if (cursor.move_right() < 0) break; @@ -3242,7 +3250,7 @@ int Ebofs::collection_list(coll_t cid, list& ls) int Ebofs::_collection_setattr(coll_t cid, const char *name, const void *value, size_t size) { - dout(10) << "_collection_setattr " << hex << cid << dec << " '" << name << "' len " << size << endl; + dout(10) << "_collection_setattr " << hex << cid << dec << " '" << name << "' len " << size << dendl; Cnode *cn = get_cnode(cid); if (!cn) return -ENOENT; @@ -3258,7 +3266,7 @@ int Ebofs::_collection_setattr(coll_t cid, const char *name, const void *value, int Ebofs::collection_setattr(coll_t cid, const char *name, const void *value, size_t size, Context *onsafe) { ebofs_lock.Lock(); - dout(10) << "collection_setattr " << hex << cid << dec << " '" << name << "' len " << size << endl; + dout(10) << "collection_setattr " << hex << cid << dec << " '" << name << "' len " << size << dendl; int r = _collection_setattr(cid, name, value, size); @@ -3286,7 +3294,7 @@ int Ebofs::collection_setattr(coll_t cid, const char *name, const void *value, s int Ebofs::collection_getattr(coll_t cid, const char *name, void *value, size_t size) { ebofs_lock.Lock(); - dout(10) << "collection_setattr " << hex << cid << dec << " '" << name << "' maxlen " << size << endl; + dout(10) << "collection_setattr " << hex << cid << dec << " '" << name << "' maxlen " << size << dendl; Cnode *cn = get_cnode(cid); if (!cn) { @@ -3310,7 +3318,7 @@ int Ebofs::collection_getattr(coll_t cid, const char *name, void *value, size_t int Ebofs::_collection_rmattr(coll_t cid, const char *name) { - dout(10) << "_collection_rmattr " << hex << cid << dec << " '" << name << "'" << endl; + dout(10) << "_collection_rmattr " << hex << cid << dec << " '" << name << "'" << dendl; Cnode *cn = get_cnode(cid); if (!cn) return -ENOENT; @@ -3354,7 +3362,7 @@ int Ebofs::collection_rmattr(coll_t cid, const char *name, Context *onsafe) int Ebofs::collection_listattr(coll_t cid, vector& attrs) { ebofs_lock.Lock(); - dout(10) << "collection_listattr " << hex << cid << dec << endl; + dout(10) << "collection_listattr " << hex << cid << dec << dendl; Cnode *cn = get_cnode(cid); if (!cn) { @@ -3393,7 +3401,7 @@ void Ebofs::_export_freelist(bufferlist& bl) assert(cursor.current().value > 0); Extent ex(cursor.current().key, cursor.current().value); - dout(10) << "_export_freelist " << ex << endl; + dout(10) << "_export_freelist " << ex << dendl; bl.append((char*)&ex, sizeof(ex)); if (cursor.move_right() <= 0) break; } @@ -3412,7 +3420,7 @@ void Ebofs::_import_freelist(bufferlist& bl) int num = bl.length() / sizeof(Extent); Extent *p = (Extent*)bl.c_str(); for (int i=0; i *tab; if (b < EBOFS_NUM_FREE_BUCKETS) { tab = free_tab[b]; - dout(30) << "dump bucket " << b << " " << tab->get_num_keys() << endl; + dout(30) << "dump bucket " << b << " " << tab->get_num_keys() << dendl; } else { tab = limbo_tab; - dout(30) << "dump limbo " << tab->get_num_keys() << endl;; + dout(30) << "dump limbo " << tab->get_num_keys() << dendl;; } if (tab->get_num_keys() > 0) { diff --git a/trunk/ceph/ebofs/FileJournal.cc b/trunk/ceph/ebofs/FileJournal.cc index b49ee82bb42b4..1050724459d74 100644 --- a/trunk/ceph/ebofs/FileJournal.cc +++ b/trunk/ceph/ebofs/FileJournal.cc @@ -22,19 +22,21 @@ #include "config.h" +#include "debug.h" + #undef dout -#define dout(x) if (x <= g_conf.debug_ebofs) cout << g_clock.now() << " ebofs(" << ebofs->dev.get_device_name() << ").journal " -#define derr(x) if (x <= g_conf.debug_ebofs) cerr << g_clock.now() << " ebofs(" << ebofs->dev.get_device_name() << ").journal " +#define dout(x) if (x <= g_conf.debug_ebofs) cout << dbeginl << g_clock.now() << " ebofs(" << ebofs->dev.get_device_name() << ").journal " +#define derr(x) if (x <= g_conf.debug_ebofs) cerr << dbeginl << g_clock.now() << " ebofs(" << ebofs->dev.get_device_name() << ").journal " int FileJournal::create() { - dout(2) << "create " << fn << endl; + dout(2) << "create " << fn << dendl; // open/create fd = ::open(fn.c_str(), O_RDWR|O_SYNC); if (fd < 0) { - dout(2) << "create failed " << errno << " " << strerror(errno) << endl; + dout(2) << "create failed " << errno << " " << strerror(errno) << dendl; return -errno; } assert(fd > 0); @@ -45,7 +47,7 @@ int FileJournal::create() // get size struct stat st; ::fstat(fd, &st); - dout(2) << "create " << fn << " " << st.st_size << " bytes" << endl; + dout(2) << "create " << fn << " " << st.st_size << " bytes" << dendl; // write empty header memset(&header, 0, sizeof(header)); @@ -65,13 +67,13 @@ int FileJournal::create() int FileJournal::open() { - //dout(1) << "open " << fn << endl; + //dout(1) << "open " << fn << dendl; // open and file assert(fd == 0); fd = ::open(fn.c_str(), O_RDWR|O_SYNC); if (fd < 0) { - dout(2) << "open failed " << errno << " " << strerror(errno) << endl; + dout(2) << "open failed " << errno << " " << strerror(errno) << dendl; return -errno; } assert(fd > 0); @@ -83,7 +85,7 @@ int FileJournal::open() // read header? read_header(); if (header.fsid != ebofs->get_fsid()) { - dout(2) << "open journal fsid doesn't match, invalid (someone else's?) journal" << endl; + dout(2) << "open journal fsid doesn't match, invalid (someone else's?) journal" << dendl; } else if (header.num > 0) { // valid header, pick an offset @@ -91,7 +93,7 @@ int FileJournal::open() if (header.epoch[i] == ebofs->get_super_epoch()) { dout(2) << "using read_pos header pointer " << header.epoch[i] << " at " << header.offset[i] - << endl; + << dendl; read_pos = header.offset[i]; write_pos = queue_pos = 0; break; @@ -99,12 +101,12 @@ int FileJournal::open() else if (header.epoch[i] < ebofs->get_super_epoch()) { dout(2) << "super_epoch is " << ebofs->get_super_epoch() << ", skipping old " << header.epoch[i] << " at " << header.offset[i] - << endl; + << dendl; } else if (header.epoch[i] > ebofs->get_super_epoch()) { dout(2) << "super_epoch is " << ebofs->get_super_epoch() << ", but wtf, journal is later " << header.epoch[i] << " at " << header.offset[i] - << endl; + << dendl; break; } } @@ -117,7 +119,7 @@ int FileJournal::open() void FileJournal::close() { - dout(1) << "close " << fn << endl; + dout(1) << "close " << fn << dendl; // stop writer thread stop_writer(); @@ -153,46 +155,46 @@ void FileJournal::print_header() for (int i=0; i= header.offset[0]) { dout(10) << "submit_entry JOURNAL FULL (and wrapped), " << queue_pos << "+" << size << " >= " << header.offset[0] - << endl; + << dendl; full = true; print_header(); return false; @@ -286,7 +288,7 @@ bool FileJournal::submit_entry(bufferlist& e, Context *oncommit) // is there room if we wrap? if ((off_t)sizeof(header_t) + size < header.offset[0]) { // yes! - dout(10) << "submit_entry wrapped from " << queue_pos << " to " << sizeof(header_t) << endl; + dout(10) << "submit_entry wrapped from " << queue_pos << " to " << sizeof(header_t) << dendl; header.wrap = queue_pos; queue_pos = sizeof(header_t); header.push(ebofs->get_super_epoch(), queue_pos); @@ -294,7 +296,7 @@ bool FileJournal::submit_entry(bufferlist& e, Context *oncommit) // no room. dout(10) << "submit_entry JOURNAL FULL (and can't wrap), " << queue_pos << "+" << size << " >= " << header.max_size - << endl; + << dendl; full = true; return false; } @@ -303,7 +305,7 @@ bool FileJournal::submit_entry(bufferlist& e, Context *oncommit) dout(10) << "submit_entry " << queue_pos << " : " << e.length() << " epoch " << ebofs->get_super_epoch() - << " " << oncommit << endl; + << " " << oncommit << dendl; // dump on queue writeq.push_back(pair(ebofs->get_super_epoch(), e)); @@ -322,18 +324,18 @@ void FileJournal::commit_epoch_start() { dout(10) << "commit_epoch_start on " << ebofs->get_super_epoch()-1 << " -- new epoch " << ebofs->get_super_epoch() - << endl; + << dendl; Mutex::Locker locker(write_lock); // was full -> empty -> now usable? if (full) { if (header.num != 0) { - dout(1) << " journal FULL, ignoring this epoch" << endl; + dout(1) << " journal FULL, ignoring this epoch" << dendl; return; } - dout(1) << " clearing FULL flag, journal now usable" << endl; + dout(1) << " clearing FULL flag, journal now usable" << dendl; full = false; } @@ -344,13 +346,13 @@ void FileJournal::commit_epoch_start() void FileJournal::commit_epoch_finish() { - dout(10) << "commit_epoch_finish committed " << ebofs->get_super_epoch()-1 << endl; + dout(10) << "commit_epoch_finish committed " << ebofs->get_super_epoch()-1 << dendl; write_lock.Lock(); { if (full) { // full journal damage control. - dout(15) << " journal was FULL, contents now committed, clearing header. journal still not usable until next epoch." << endl; + dout(15) << " journal was FULL, contents now committed, clearing header. journal still not usable until next epoch." << dendl; header.clear(); write_pos = queue_pos = sizeof(header_t); } else { @@ -367,7 +369,7 @@ void FileJournal::commit_epoch_finish() dout(15) << " dropping unwritten and committed " << write_pos << " : " << writeq.front().second.length() << " epoch " << writeq.front().first - << endl; + << dendl; // finisher? Context *oncommit = commitq.front(); if (oncommit) callbacks.push_back(oncommit); @@ -400,7 +402,7 @@ void FileJournal::make_writeable() bool FileJournal::read_entry(bufferlist& bl, epoch_t& epoch) { if (!read_pos) { - dout(2) << "read_entry -- not readable" << endl; + dout(2) << "read_entry -- not readable" << dendl; return false; } @@ -414,7 +416,7 @@ bool FileJournal::read_entry(bufferlist& bl, epoch_t& epoch) } } assert(read_pos != header.wrap); - dout(10) << "read_entry wrapped from " << header.wrap << " to " << read_pos << endl; + dout(10) << "read_entry wrapped from " << header.wrap << " to " << read_pos << dendl; } // header @@ -422,7 +424,7 @@ bool FileJournal::read_entry(bufferlist& bl, epoch_t& epoch) ::lseek(fd, read_pos, SEEK_SET); ::read(fd, &h, sizeof(h)); if (!h.check_magic(read_pos, header.fsid)) { - dout(2) << "read_entry " << read_pos << " : bad header magic, end of journal" << endl; + dout(2) << "read_entry " << read_pos << " : bad header magic, end of journal" << dendl; return false; } @@ -436,7 +438,7 @@ bool FileJournal::read_entry(bufferlist& bl, epoch_t& epoch) if (!f.check_magic(read_pos, header.fsid) || h.epoch != f.epoch || h.len != f.len) { - dout(2) << "read_entry " << read_pos << " : bad footer magic, partially entry, end of journal" << endl; + dout(2) << "read_entry " << read_pos << " : bad footer magic, partially entry, end of journal" << dendl; return false; } @@ -445,7 +447,7 @@ bool FileJournal::read_entry(bufferlist& bl, epoch_t& epoch) dout(1) << "read_entry " << read_pos << " : " << " " << h.len << " bytes" << " epoch " << h.epoch - << endl; + << dendl; bl.push_back(bp); epoch = h.epoch; diff --git a/trunk/ceph/ebofs/mkfs.ebofs.cc b/trunk/ceph/ebofs/mkfs.ebofs.cc index 1b432dd12da66..8a10eed44cbad 100644 --- a/trunk/ceph/ebofs/mkfs.ebofs.cc +++ b/trunk/ceph/ebofs/mkfs.ebofs.cc @@ -41,6 +41,31 @@ int main(int argc, char **argv) Ebofs fs(filename); fs.mount(); + if (1) { + char crap[1024*1024]; + memset(crap, 0, 1024*1024); + + + object_t oid(1,2); + off_t pos = 0; + off_t sz = 16; + + bufferlist bl; + bl.append(crap, sz); + + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 1000*1000*40; // ms -> nsec + + while (1) { + cout << g_clock.now() << " writing " << pos << "~" << sz << endl; + fs.write(oid, pos, sz, bl, (Context*)0); + pos += sz; + nanosleep(&ts, 0); + } + + } + /* if (1) { // partial write tests diff --git a/trunk/ceph/include/Context.h b/trunk/ceph/include/Context.h index 3d4d45b1167aa..edc5f410491df 100644 --- a/trunk/ceph/include/Context.h +++ b/trunk/ceph/include/Context.h @@ -93,7 +93,7 @@ public: class C_Gather : public Context { public: bool sub_finish(int r) { - //cout << "C_Gather sub_finish " << this << endl; + //cout << "C_Gather sub_finish " << this << " got " << r << " of " << waitfor << endl; assert(waitfor.count(r)); waitfor.erase(r); if (!waitfor.empty()) diff --git a/trunk/ceph/mds/journal.cc b/trunk/ceph/mds/journal.cc index 066ca724c8129..a533f62e450e2 100644 --- a/trunk/ceph/mds/journal.cc +++ b/trunk/ceph/mds/journal.cc @@ -106,14 +106,24 @@ bool EMetaBlob::has_expired(MDS *mds) << " for " << *dir << endl; continue; // not our problem } - // FIXME HACK: this makes logger stats more accurage, for journal stats, - // but is not perfectly safe. for benchmarking ONLY! - if (dir->get_committed_version() >= lp->second.dirv || - //if (dir->get_committing_version() >= lp->second.dirv || - dir->get_committed_version_equivalent() >= lp->second.dirv) { - dout(10) << "EMetaBlob.has_expired have dirv " << lp->second.dirv - << " for " << *dir << endl; - continue; // yay + + if (g_conf.mds_hack_log_expire_for_better_stats) { + // FIXME HACK: this makes logger stats more accurage, for journal stats, + // but is not perfectly safe. for benchmarking ONLY! + if (dir->get_committing_version() >= lp->second.dirv || // committING, not committED. + dir->get_committed_version_equivalent() >= lp->second.dirv) { + dout(10) << "EMetaBlob.has_expired have dirv " << lp->second.dirv + << " for " << *dir << endl; + continue; // yay + } + } else { + // this is the proper (safe) way + if (dir->get_committed_version() >= lp->second.dirv || + dir->get_committed_version_equivalent() >= lp->second.dirv) { + dout(10) << "EMetaBlob.has_expired have dirv " << lp->second.dirv + << " for " << *dir << endl; + continue; // yay + } } if (dir->is_ambiguous_dir_auth()) { -- 2.39.5