From: Kosie van der Merwe Date: Fri, 1 Feb 2013 21:10:15 +0000 (-0800) Subject: Merged in master X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f9d153877a1956a7a074f174593140d027b62d22;p=rocksdb.git Merged in master --- diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index 27a9407e5..000000000 --- a/AUTHORS +++ /dev/null @@ -1,8 +0,0 @@ -# Names should be added to this file like so: -# Name or Organization - -Google Inc. - -# Initial version authors: -Jeffrey Dean -Sanjay Ghemawat diff --git a/Makefile b/Makefile index 13062d8e7..8c31a8c8e 100644 --- a/Makefile +++ b/Makefile @@ -9,9 +9,8 @@ INSTALL_PATH ?= $(CURDIR) # Uncomment exactly one of the lines labelled (A), (B), and (C) below # to switch between compilation modes. -# OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode) -# OPT ?= -g2 # (B) Debug mode, w/ full line-level debugging symbols -OPT ?= -O2 -g2 -DNDEBUG -Wall # (C) Profiling mode: opt, but w/debugging symbols +# OPT ?= -DNDEBUG # (A) Production use (optimized mode) +OPT += -O3 -fno-omit-frame-pointer -momit-leaf-frame-pointer #----------------------------------------------- # detect what platform we're building on @@ -19,8 +18,9 @@ $(shell ./build_detect_platform build_config.mk) # this file is generated by the previous line to set build flags and sources include build_config.mk -CFLAGS += -Werror -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) -CXXFLAGS += -Werror -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) +WARNING_FLAGS = -Wall -Werror -Wno-unused-parameter -Wno-sign-compare +CFLAGS += -g $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) +CXXFLAGS += -g $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) LDFLAGS += $(PLATFORM_LDFLAGS) @@ -38,6 +38,7 @@ TESTS = \ c_test \ cache_test \ coding_test \ + histogram_test \ corruption_test \ crc32c_test \ db_test \ @@ -65,7 +66,6 @@ TOOLS = \ PROGRAMS = db_bench $(TESTS) $(TOOLS) BENCHMARKS = db_bench_sqlite3 db_bench_tree_db -VERSIONFILE=util/build_version.cc LIBRARY = libleveldb.a MEMENVLIBRARY = libmemenv.a @@ -99,13 +99,20 @@ $(SHARED3): endif # PLATFORM_SHARED_EXT -all: $(VERSIONFILE) $(SHARED) $(LIBRARY) $(TOOLS) +all: $(SHARED) $(LIBRARY) $(PROGRAMS) -check: all $(PROGRAMS) $(TESTS) $(TOOLS) +release: + make clean + OPT=-DNDEBUG make -j32 + +check: all $(PROGRAMS) $(TESTS) $(TOOLS) ldb_tests for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done +ldb_tests: all $(PROGRAMS) $(TOOLS) + python tools/ldb_test.py + clean: - -rm -f $(PROGRAMS) $(BENCHMARKS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) $(THRIFTSERVER) */*.o */*/*.o ios-x86/*/*.o ios-arm/*/*.o build_config.mk $(VERSIONFILE) */*.d + -rm -f $(PROGRAMS) $(BENCHMARKS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) $(THRIFTSERVER) */*.o */*/*.o ios-x86/*/*.o ios-arm/*/*.o build_config.mk -rm -rf ios-x86/* ios-arm/* $(LIBRARY): $(LIBOBJECTS) @@ -143,6 +150,9 @@ cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) +histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) + corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) diff --git a/NEWS b/NEWS deleted file mode 100644 index 3fd99242d..000000000 --- a/NEWS +++ /dev/null @@ -1,17 +0,0 @@ -Release 1.2 2011-05-16 ----------------------- - -Fixes for larger databases (tested up to one billion 100-byte entries, -i.e., ~100GB). - -(1) Place hard limit on number of level-0 files. This fixes errors -of the form "too many open files". - -(2) Fixed memtable management. Before the fix, a heavy write burst -could cause unbounded memory usage. - -A fix for a logging bug where the reader would incorrectly complain -about corruption. - -Allow public access to WriteBatch contents so that users can easily -wrap a DB. diff --git a/TODO b/TODO deleted file mode 100644 index 9130b6a9f..000000000 --- a/TODO +++ /dev/null @@ -1,13 +0,0 @@ -ss -- Stats - -db -- Maybe implement DB::BulkDeleteForRange(start_key, end_key) - that would blow away files whose ranges are entirely contained - within [start_key..end_key]? For Chrome, deletion of obsolete - object stores, etc. can be done in the background anyway, so - probably not that important. - -After a range is completely deleted, what gets rid of the -corresponding files if we do no future changes to that range. Make -the conditions for triggering compactions fire in more situations? diff --git a/build_detect_version b/build_detect_version index db2fca8ef..73dbf62af 100755 --- a/build_detect_version +++ b/build_detect_version @@ -8,7 +8,8 @@ # # create git version file -VFILE=util/build_version.cc +VFILE=$(mktemp) +trap "rm $VFILE" EXIT # check to see if git is in the path which git > /dev/null @@ -19,6 +20,10 @@ else echo "git not found"| awk ' BEGIN {print "#include \"build_version.h\""} {print "const char * leveldb_build_git_sha = \"leveldb_build_git_sha:git not found\";"} END {}' > ${VFILE} fi -date | awk 'BEGIN {} {print "const char * leveldb_build_git_datetime = \"leveldb_build_git_datetime:"$0"\";"} END {} ' >> ${VFILE} echo "const char * leveldb_build_compile_date = __DATE__;" >> ${VFILE} echo "const char * leveldb_build_compile_time = __TIME__;" >> ${VFILE} + +OUTFILE=util/build_version.cc +if [ ! -e $OUTFILE ] || ! cmp -s $VFILE $OUTFILE; then + cp $VFILE $OUTFILE +fi diff --git a/db/builder.cc b/db/builder.cc index 1fc200930..6c4fe337d 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -26,13 +26,13 @@ Status BuildTable(const std::string& dbname, std::string fname = TableFileName(dbname, meta->number); if (iter->Valid()) { - WritableFile* file; + unique_ptr file; s = env->NewWritableFile(fname, &file); if (!s.ok()) { return s; } - TableBuilder* builder = new TableBuilder(options, file, 0); + TableBuilder* builder = new TableBuilder(options, file.get(), 0); meta->smallest.DecodeFrom(iter->key()); for (; iter->Valid(); iter->Next()) { Slice key = iter->key(); @@ -63,8 +63,6 @@ Status BuildTable(const std::string& dbname, if (s.ok()) { s = file->Close(); } - delete file; - file = NULL; if (s.ok()) { // Verify that the table is usable diff --git a/db/c.cc b/db/c.cc index eb28c4bdc..7614e7edf 100644 --- a/db/c.cc +++ b/db/c.cc @@ -39,6 +39,8 @@ using leveldb::WritableFile; using leveldb::WriteBatch; using leveldb::WriteOptions; +using std::shared_ptr; + extern "C" { struct leveldb_t { DB* rep; }; @@ -48,12 +50,12 @@ struct leveldb_snapshot_t { const Snapshot* rep; }; struct leveldb_readoptions_t { ReadOptions rep; }; struct leveldb_writeoptions_t { WriteOptions rep; }; struct leveldb_options_t { Options rep; }; -struct leveldb_cache_t { Cache* rep; }; struct leveldb_seqfile_t { SequentialFile* rep; }; struct leveldb_randomfile_t { RandomAccessFile* rep; }; struct leveldb_writablefile_t { WritableFile* rep; }; -struct leveldb_logger_t { Logger* rep; }; struct leveldb_filelock_t { FileLock* rep; }; +struct leveldb_logger_t { shared_ptr rep; }; +struct leveldb_cache_t { shared_ptr rep; }; struct leveldb_comparator_t : public Comparator { void* state_; @@ -421,7 +423,9 @@ void leveldb_options_set_env(leveldb_options_t* opt, leveldb_env_t* env) { } void leveldb_options_set_info_log(leveldb_options_t* opt, leveldb_logger_t* l) { - opt->rep.info_log = (l ? l->rep : NULL); + if (l) { + opt->rep.info_log = l->rep; + } } void leveldb_options_set_write_buffer_size(leveldb_options_t* opt, size_t s) { @@ -433,7 +437,9 @@ void leveldb_options_set_max_open_files(leveldb_options_t* opt, int n) { } void leveldb_options_set_cache(leveldb_options_t* opt, leveldb_cache_t* c) { - opt->rep.block_cache = c->rep; + if (c) { + opt->rep.block_cache = c->rep; + } } void leveldb_options_set_block_size(leveldb_options_t* opt, size_t s) { @@ -502,6 +508,16 @@ void leveldb_options_set_compression(leveldb_options_t* opt, int t) { opt->rep.compression = static_cast(t); } +void leveldb_options_set_compression_per_level(leveldb_options_t* opt, + int* level_values, + size_t num_levels) { + opt->rep.compression_per_level.resize(num_levels); + for (size_t i = 0; i < num_levels; ++i) { + opt->rep.compression_per_level[i] = + static_cast(level_values[i]); + } +} + void leveldb_options_set_compression_options( leveldb_options_t* opt, int w_bits, int level, int strategy) { opt->rep.compression_opts.window_bits = w_bits; @@ -647,7 +663,6 @@ leveldb_cache_t* leveldb_cache_create_lru(size_t capacity) { } void leveldb_cache_destroy(leveldb_cache_t* cache) { - delete cache->rep; delete cache; } diff --git a/db/c_test.c b/db/c_test.c index 2c36972b8..1d71b6508 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -188,6 +188,9 @@ int main(int argc, char** argv) { leveldb_options_set_block_restart_interval(options, 8); leveldb_options_set_compression(options, leveldb_no_compression); leveldb_options_set_compression_options(options, -14, -1, 0); + int compression_levels[] = {leveldb_no_compression, leveldb_no_compression, + leveldb_no_compression, leveldb_no_compression}; + leveldb_options_set_compression_per_level(options, compression_levels, 4); roptions = leveldb_readoptions_create(); leveldb_readoptions_set_verify_checksums(roptions, 1); diff --git a/db/corruption_test.cc b/db/corruption_test.cc index 4b1fce741..63360a76a 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -28,7 +28,7 @@ class CorruptionTest { public: test::ErrorEnv env_; std::string dbname_; - Cache* tiny_cache_; + shared_ptr tiny_cache_; Options options_; DB* db_; @@ -47,7 +47,6 @@ class CorruptionTest { ~CorruptionTest() { delete db_; DestroyDB(dbname_, Options()); - delete tiny_cache_; } Status TryReopen(Options* options = NULL) { diff --git a/db/db_bench.cc b/db/db_bench.cc index de0a25609..8469d6c3a 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -472,7 +472,7 @@ struct ThreadState { class Benchmark { private: - Cache* cache_; + shared_ptr cache_; const FilterPolicy* filter_policy_; DB* db_; long num_; @@ -658,7 +658,6 @@ class Benchmark { ~Benchmark() { delete db_; - delete cache_; delete filter_policy_; } @@ -978,7 +977,7 @@ class Benchmark { options.WAL_ttl_seconds = FLAGS_WAL_ttl_seconds; if (FLAGS_min_level_to_compress >= 0) { assert(FLAGS_min_level_to_compress <= FLAGS_num_levels); - options.compression_per_level = new CompressionType[FLAGS_num_levels]; + options.compression_per_level.resize(FLAGS_num_levels); for (int i = 0; i < FLAGS_min_level_to_compress; i++) { options.compression_per_level[i] = kNoCompression; } @@ -1009,7 +1008,7 @@ class Benchmark { exit(1); } if (FLAGS_min_level_to_compress >= 0) { - delete options.compression_per_level; + options.compression_per_level.clear(); } } @@ -1281,14 +1280,13 @@ class Benchmark { void HeapProfile() { char fname[100]; snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_); - WritableFile* file; + unique_ptr file; Status s = FLAGS_env->NewWritableFile(fname, &file); if (!s.ok()) { fprintf(stderr, "%s\n", s.ToString().c_str()); return; } - bool ok = port::GetHeapProfile(WriteToFile, file); - delete file; + bool ok = port::GetHeapProfile(WriteToFile, file.get()); if (!ok) { fprintf(stderr, "heap profiling not supported\n"); FLAGS_env->DeleteFile(fname); diff --git a/db/db_impl.cc b/db/db_impl.cc index 2ce5750f1..8b0c3484c 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -49,18 +49,17 @@ static Status NewLogger(const std::string& dbname, const std::string& db_log_dir, Env* env, size_t max_log_file_size, - Logger** logger) { + shared_ptr* logger) { std::string db_absolute_path; env->GetAbsolutePath(dbname, &db_absolute_path); if (max_log_file_size > 0) { // need to auto split the log file? - AutoSplitLogger* auto_split_logger = + auto logger_ptr = new AutoSplitLogger(env, dbname, db_log_dir, max_log_file_size); - Status s = auto_split_logger->GetStatus(); + logger->reset(logger_ptr); + Status s = logger_ptr->GetStatus(); if (!s.ok()) { - delete auto_split_logger; - } else { - *logger = auto_split_logger; + logger->reset(); } return s; } else { @@ -108,8 +107,8 @@ struct DBImpl::CompactionState { // We have potentially have more than one outfile due to hot-cold separation // needing both a hot file and a cold file to output to. size_t num_outfiles; - WritableFile** outfiles; - TableBuilder** builders; + std::vector> outfiles; + std::vector> builders; uint64_t total_bytes; @@ -121,8 +120,6 @@ struct DBImpl::CompactionState { explicit CompactionState(Compaction* c) : compaction(c), num_outfiles(0), - outfiles(NULL), - builders(NULL), total_bytes(0) { } }; @@ -170,12 +167,7 @@ Options SanitizeOptions(const std::string& dbname, if (result.block_cache == NULL && !result.no_block_cache) { result.block_cache = NewLRUCache(8 << 20); } - if (src.compression_per_level != NULL) { - result.compression_per_level = new CompressionType[src.num_levels]; - for (int i = 0; i < src.num_levels; i++) { - result.compression_per_level[i] = src.compression_per_level[i]; - } - } + result.compression_per_level = src.compression_per_level; return result; } @@ -188,16 +180,13 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname, dbname, &internal_comparator_, &internal_filter_policy_, options)), internal_filter_policy_(options.filter_policy), owns_info_log_(options_.info_log != options.info_log), - owns_cache_(options_.block_cache != options.block_cache), is_hotcold_(metrics_db != NULL), db_lock_(NULL), metrics_db_(metrics_db), shutting_down_(NULL), bg_cv_(&mutex_), mem_(new MemTable(internal_comparator_, NumberLevels())), - logfile_(NULL), logfile_number_(0), - log_(NULL), tmp_batch_(new WriteBatch), bg_compaction_scheduled_(0), bg_logstats_scheduled_(false), @@ -219,13 +208,13 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname, stats_ = new CompactionStats[options.num_levels]; // Reserve ten files or so for other uses and give the rest to TableCache. const int table_cache_size = options_.max_open_files - 10; - table_cache_ = new TableCache(dbname_, &options_, table_cache_size); + table_cache_.reset(new TableCache(dbname_, &options_, table_cache_size)); - versions_ = new VersionSet(dbname_, &options_, table_cache_, - &internal_comparator_); + versions_.reset(new VersionSet(dbname_, &options_, table_cache_.get(), + &internal_comparator_)); - dumpLeveldbBuildVersion(options_.info_log); - options_.Dump(options_.info_log); + dumpLeveldbBuildVersion(options_.info_log.get()); + options_.Dump(options_.info_log.get()); #ifdef USE_SCRIBE logger_ = new ScribeLogger("localhost", 1456); @@ -269,26 +258,12 @@ DBImpl::~DBImpl() { env_->UnlockFile(db_lock_); } - delete versions_; if (mem_ != NULL) mem_->Unref(); imm_.UnrefAll(); delete tmp_batch_; - delete log_; - delete logfile_; delete metrics_db_; - delete table_cache_; delete[] stats_; - if (owns_info_log_) { - delete options_.info_log; - } - if (owns_cache_) { - delete options_.block_cache; - } - if (options_.compression_per_level != NULL) { - delete[] options_.compression_per_level; - } - delete logger_; } @@ -313,6 +288,10 @@ void DBImpl::TEST_Destroy_DBImpl() { if (db_lock_ != NULL) { env_->UnlockFile(db_lock_); } + + log_.reset(); + versions_.reset(); + table_cache_.reset(); } uint64_t DBImpl::TEST_Current_Manifest_FileNo() { @@ -327,21 +306,18 @@ Status DBImpl::NewDB() { new_db.SetLastSequence(0); const std::string manifest = DescriptorFileName(dbname_, 1); - WritableFile* file; + unique_ptr file; Status s = env_->NewWritableFile(manifest, &file); if (!s.ok()) { return s; } + file->SetPreallocationBlockSize(options_.manifest_preallocation_size); { - log::Writer log(file); + log::Writer log(std::move(file)); std::string record; new_db.EncodeTo(&record); s = log.AddRecord(record); - if (s.ok()) { - s = file->Close(); - } } - delete file; if (s.ok()) { // Make "CURRENT" file that points to the new manifest file. s = SetCurrentFile(env_, dbname_, 1); @@ -653,7 +629,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, // Open the log file std::string fname = LogFileName(dbname_, log_number); - SequentialFile* file; + unique_ptr file; Status status = env_->NewSequentialFile(fname, &file); if (!status.ok()) { MaybeIgnoreError(&status); @@ -663,14 +639,14 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, // Create the log reader. LogReporter reporter; reporter.env = env_; - reporter.info_log = options_.info_log; + reporter.info_log = options_.info_log.get(); reporter.fname = fname.c_str(); reporter.status = (options_.paranoid_checks ? &status : NULL); // We intentially make log::Reader do checksumming even if // paranoid_checks==false so that corruptions cause entire commits // to be skipped instead of propagating bad information (like overly // large sequence numbers). - log::Reader reader(file, &reporter, true/*checksum*/, + log::Reader reader(std::move(file), &reporter, true/*checksum*/, 0/*initial_offset*/); Log(options_.info_log, "Recovering log #%llu", (unsigned long long) log_number); @@ -728,7 +704,6 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, } if (mem != NULL && !external_table) mem->Unref(); - delete file; return status; } @@ -745,7 +720,7 @@ Status DBImpl::WriteLevel0TableForRecovery(MemTable* mem, VersionEdit* edit) { Status s; { mutex_.Unlock(); - s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta); + s = BuildTable(dbname_, env_, options_, table_cache_.get(), iter, &meta); mutex_.Lock(); } @@ -791,7 +766,7 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, Status s; { mutex_.Unlock(); - s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta); + s = BuildTable(dbname_, env_, options_, table_cache_.get(), iter, &meta); mutex_.Lock(); } base->Unref(); @@ -870,8 +845,9 @@ Status DBImpl::CompactMemTable(bool* madeProgress) { } // Replace immutable memtable with the generated Table - s = imm_.InstallMemtableFlushResults(m, versions_, s, &mutex_, - options_.info_log, file_number, pending_outputs_); + s = imm_.InstallMemtableFlushResults( + m, versions_.get(), s, &mutex_, options_.info_log.get(), + file_number, pending_outputs_); if (s.ok()) { if (madeProgress) { @@ -924,7 +900,7 @@ SequenceNumber DBImpl::GetLatestSequenceNumber() { } Status DBImpl::GetUpdatesSince(SequenceNumber seq, - TransactionLogIterator** iter) { + unique_ptr* iter) { // Get All Log Files. // Sort Files @@ -956,9 +932,8 @@ Status DBImpl::GetUpdatesSince(SequenceNumber seq, if (!s.ok()) { return s; } - TransactionLogIteratorImpl* impl = - new TransactionLogIteratorImpl(dbname_, &options_, seq, probableWALFiles); - *iter = impl; + iter->reset( + new TransactionLogIteratorImpl(dbname_, &options_, seq, probableWALFiles)); return Status::OK(); } @@ -1034,7 +1009,7 @@ Status DBImpl::ReadFirstLine(const std::string& fname, } }; - SequentialFile* file; + unique_ptr file; Status status = env_->NewSequentialFile(fname, &file); if (!status.ok()) { @@ -1044,10 +1019,10 @@ Status DBImpl::ReadFirstLine(const std::string& fname, LogReporter reporter; reporter.env = env_; - reporter.info_log = options_.info_log; + reporter.info_log = options_.info_log.get(); reporter.fname = fname.c_str(); reporter.status = (options_.paranoid_checks ? &status : NULL); - log::Reader reader(file, &reporter, true/*checksum*/, + log::Reader reader(std::move(file), &reporter, true/*checksum*/, 0/*initial_offset*/); std::string scratch; Slice record; @@ -1284,7 +1259,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, } } - Compaction* c = NULL; + unique_ptr c; bool is_manual = (manual_compaction_ != NULL) && (manual_compaction_->in_progress == false); InternalKey manual_end; @@ -1292,10 +1267,11 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, ManualCompaction* m = manual_compaction_; assert(!m->in_progress); m->in_progress = true; // another thread cannot pick up the same work - c = versions_->CompactRange(m->level, m->begin, m->end); - m->done = (c == NULL); - if (c != NULL) { + c.reset(versions_->CompactRange(m->level, m->begin, m->end)); + if (c) { manual_end = c->input(0, c->num_input_files(0) - 1)->largest; + } else { + m->done = true; } Log(options_.info_log, "Manual compaction at level-%d from %s .. %s; will stop at %s\n", @@ -1304,11 +1280,11 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, (m->end ? m->end->DebugString().c_str() : "(end)"), (m->done ? "(end)" : manual_end.DebugString().c_str())); } else if (!options_.disable_auto_compactions) { - c = versions_->PickCompaction(); + c.reset(versions_->PickCompaction()); } Status status; - if (c == NULL) { + if (!c) { // Nothing to do Log(options_.info_log, "Compaction nothing to do"); } else if (!is_manual && c->IsTrivialMove()) { @@ -1326,18 +1302,18 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, static_cast(f->file_size), status.ToString().c_str(), versions_->LevelSummary(&tmp)); - versions_->ReleaseCompactionFiles(c, status); + versions_->ReleaseCompactionFiles(c.get(), status); *madeProgress = true; } else { - CompactionState* compact = new CompactionState(c); + CompactionState* compact = new CompactionState(c.get()); status = DoCompactionWork(compact); CleanupCompaction(compact); - versions_->ReleaseCompactionFiles(c, status); + versions_->ReleaseCompactionFiles(c.get(), status); c->ReleaseInputs(); FindObsoleteFiles(deletion_state); *madeProgress = true; } - delete c; + c.reset(); if (status.ok()) { // Done @@ -1370,22 +1346,18 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, void DBImpl::CleanupCompaction(CompactionState* compact) { mutex_.AssertHeld(); - if (compact->builders != NULL) { + if (!compact->builders.empty()) { // May happen if we get a shutdown call in the middle of compaction for (size_t i = 0; i < compact->num_outfiles; ++i) { compact->builders[i]->Abandon(); - delete compact->builders[i]; - delete compact->outfiles[i]; } - delete[] compact->builders; - delete[] compact->outfiles; compact->num_outfiles = 0; - compact->builders = NULL; - compact->outfiles = NULL; + compact->builders.clear(); + compact->outfiles.clear(); } else { - assert(compact->outfiles == NULL); + assert(compact->outfiles.empty()); } for (size_t i = 0; i < compact->outputs.size(); i++) { const CompactionState::Output& out = compact->outputs[i]; @@ -1400,7 +1372,7 @@ void DBImpl::CleanupCompaction(CompactionState* compact) { void DBImpl::AllocateCompactionOutputFileNumbers(CompactionState* compact) { mutex_.AssertHeld(); assert(compact != NULL); - assert(compact->builders == NULL); + assert(compact->builders.empty()); int filesNeeded = compact->compaction->num_input_files(1); for (int i = 0; i < filesNeeded; i++) { uint64_t file_number = versions_->NewFileNumber(); @@ -1423,11 +1395,11 @@ void DBImpl::ReleaseCompactionUnusedFileNumbers(CompactionState* compact) { Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { assert(compact != NULL); - assert(compact->builders == NULL); + assert(compact->builders.empty()); compact->num_outfiles = is_hotcold_?2:1; - compact->builders = new TableBuilder*[compact->num_outfiles]; - compact->outfiles = new WritableFile*[compact->num_outfiles]; + compact->builders.resize(compact->num_outfiles); + compact->outfiles.resize(compact->num_outfiles); Status s; for (size_t i = 0; i < compact->num_outfiles; ++i) { @@ -1453,22 +1425,25 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { // Make the output file std::string fname = TableFileName(dbname_, file_number); s = env_->NewWritableFile(fname, &compact->outfiles[i]); + + // Over-estimate slightly so we don't end up just barely crossing + // the threshold. + compact->outfiles[i]->SetPreallocationBlockSize( + 1.1 * versions_->MaxFileSizeForLevel(compact->compaction->level() + 1)); + if (s.ok()) { - compact->builders[i] = new TableBuilder(options_, compact->outfiles[i], - compact->compaction->level() + 1); + compact->builders[i].reset( + new TableBuilder(options_, compact->outfiles[i].get(), + compact->compaction->level()+1)); } else { // Clean up already constructed builders and outfiles for (size_t j = 0; j < i; ++j) { compact->builders[j]->Abandon(); - delete compact->builders[j]; - delete compact->outfiles[j]; } - delete[] compact->builders; - delete[] compact->outfiles; compact->num_outfiles = 0; - compact->builders = NULL; - compact->outfiles = NULL; + compact->builders.clear(); + compact->outfiles.clear(); // And stop looping break; @@ -1480,8 +1455,8 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, Iterator* input) { assert(compact != NULL); - assert(compact->outfiles != NULL); - assert(compact->builders != NULL); + assert(!compact->outfiles.empty()); + assert(!compact->builders.empty()); Status s; @@ -1503,8 +1478,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, compact->current_output(i)->file_size = current_bytes; compact->current_output(i)->skip = current_entries == 0; compact->total_bytes += current_bytes; - delete compact->builders[i]; - compact->builders[i] = nullptr; + compact->builders[i].reset(); // Finish and check for file errors if (s.ok() && !options_.disableDataSync && current_entries > 0) { @@ -1517,8 +1491,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, if (s.ok() && current_entries > 0) { s = compact->outfiles[i]->Close(); } - delete compact->outfiles[i]; - compact->outfiles[i] = nullptr; + compact->outfiles[i].reset(); if (s.ok() && current_entries > 0) { // Verify that the table is usable @@ -1541,17 +1514,12 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, if (!s.ok()) { for (; i < compact->num_outfiles; ++i) { compact->builders[i]->Abandon(); - delete compact->builders[i]; - delete compact->outfiles[i]; } } - delete[] compact->builders; - delete[] compact->outfiles; - compact->num_outfiles = 0; - compact->builders = NULL; - compact->outfiles = NULL; + compact->builders.clear(); + compact->outfiles.clear(); return s; } @@ -1774,8 +1742,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { Log(options_.info_log, "Compaction start summary: %s\n", scratch); assert(versions_->NumLevelFiles(compact->compaction->level()) > 0); - assert(compact->builders == NULL); - assert(compact->outfiles == NULL); + assert(compact->builders.empty()); + assert(compact->outfiles.empty()); SequenceNumber visible_at_tip = 0; SequenceNumber earliest_snapshot; @@ -1800,7 +1768,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { BlockMetrics* block_metrics_store = NULL; const uint64_t start_micros = env_->NowMicros(); - Iterator* input = versions_->MakeInputIterator(compact->compaction); + unique_ptr input(versions_->MakeInputIterator(compact->compaction)); input->SeekToFirst(); Status status; ParsedInternalKey ikey; @@ -1826,8 +1794,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { Slice value = input->value(); Slice* compaction_filter_value = NULL; if (compact->compaction->ShouldStopBefore(key) && - compact->builders != NULL) { - status = FinishCompactionOutputFile(compact, input); + !compact->builders.empty()) { + status = FinishCompactionOutputFile(compact, input.get()); if (!status.ok()) { break; } @@ -1915,7 +1883,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { if (!drop) { // Open output file if necessary - if (compact->builders == NULL) { + if (compact->builders.empty()) { status = OpenCompactionOutputFile(compact); if (!status.ok()) { break; @@ -1924,7 +1892,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { // Select output file size_t outfile_idx = 0; - if (is_hotcold_ && IsRecordHot(input, metrics_db_, ReadOptions(), + if (is_hotcold_ && IsRecordHot(input.get(), metrics_db_, ReadOptions(), &block_metrics_store)) { outfile_idx = 1; } @@ -1944,7 +1912,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { // TODO: consider adding the size of all the builders together. if (compact->builders[outfile_idx]->FileSize() >= compact->compaction->MaxOutputFileSize()) { - status = FinishCompactionOutputFile(compact, input); + status = FinishCompactionOutputFile(compact, input.get()); if (!status.ok()) { break; } @@ -1959,14 +1927,13 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { if (status.ok() && shutting_down_.Acquire_Load()) { status = Status::IOError("Deleting DB during compaction"); } - if (status.ok() && compact->builders != NULL) { - status = FinishCompactionOutputFile(compact, input); + if (status.ok() && !compact->builders.empty()) { + status = FinishCompactionOutputFile(compact, input.get()); } if (status.ok()) { status = input->status(); } - delete input; - input = NULL; + input.reset(); CompactionStats stats; stats.micros = env_->NowMicros() - start_micros - imm_micros; @@ -1975,7 +1942,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { stats.files_in_levelnp1 = compact->compaction->num_input_files(1); int num_output_files = compact->outputs.size(); - if (compact->builders != NULL) { + if (!compact->builders.empty()) { // An error occured so ignore the last output. assert(num_output_files > 0); num_output_files -= compact->num_outfiles; @@ -2214,9 +2181,9 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { status = log_->AddRecord(WriteBatchInternal::Contents(updates)); if (status.ok() && options.sync) { if (options_.use_fsync) { - status = logfile_->Fsync(); + status = log_->file()->Fsync(); } else { - status = logfile_->Sync(); + status = log_->file()->Sync(); } } } @@ -2381,18 +2348,18 @@ Status DBImpl::MakeRoomForWrite(bool force) { DelayLoggingAndReset(); assert(versions_->PrevLogNumber() == 0); uint64_t new_log_number = versions_->NewFileNumber(); - WritableFile* lfile = NULL; + unique_ptr lfile; s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile); if (!s.ok()) { // Avoid chewing through file number space in a tight loop. - versions_->ReuseFileNumber(new_log_number); + versions_->ReuseFileNumber(new_log_number); break; } - delete log_; - delete logfile_; - logfile_ = lfile; + // Our final size should be less than write_buffer_size + // (compression, etc) but err on the side of caution. + lfile->SetPreallocationBlockSize(1.1 * options_.write_buffer_size); logfile_number_ = new_log_number; - log_ = new log::Writer(lfile); + log_.reset(new log::Writer(std::move(lfile))); imm_.Add(mem_); mem_ = new MemTable(internal_comparator_, NumberLevels()); mem_->Ref(); @@ -2598,14 +2565,14 @@ Status DB::InternalOpen(const Options& options, const std::string& dbname, s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists if (s.ok()) { uint64_t new_log_number = impl->versions_->NewFileNumber(); - WritableFile* lfile; + unique_ptr lfile; s = options.env->NewWritableFile(LogFileName(dbname, new_log_number), &lfile); if (s.ok()) { + lfile->SetPreallocationBlockSize(1.1 * options.write_buffer_size); edit.SetLogNumber(new_log_number); - impl->logfile_ = lfile; impl->logfile_number_ = new_log_number; - impl->log_ = new log::Writer(lfile); + impl->log_.reset(new log::Writer(std::move(lfile))); s = impl->versions_->LogAndApply(&edit, &impl->mutex_); } if (s.ok()) { @@ -2695,8 +2662,8 @@ Status DestroyDB(const std::string& dbname, const Options& options) { // A global method that can dump out the build version void dumpLeveldbBuildVersion(Logger * log) { Log(log, "Git sha %s", leveldb_build_git_sha); - Log(log, "Git datetime %s", leveldb_build_git_datetime); - Log(log, "Compile time %s %s", leveldb_build_compile_time, leveldb_build_compile_date); + Log(log, "Compile time %s %s", + leveldb_build_compile_time, leveldb_build_compile_date); } } // namespace leveldb diff --git a/db/db_impl.h b/db/db_impl.h index 40120a649..651406824 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -61,7 +61,7 @@ class DBImpl : public DB { uint64_t* manifest_file_size); virtual SequenceNumber GetLatestSequenceNumber(); virtual Status GetUpdatesSince(SequenceNumber seq_number, - TransactionLogIterator ** iter); + unique_ptr* iter); // Extra methods (for testing) that are not in the public DB interface @@ -110,7 +110,7 @@ class DBImpl : public DB { protected: Env* const env_; const std::string dbname_; - VersionSet* versions_; + unique_ptr versions_; const InternalKeyComparator internal_comparator_; const Options options_; // options_.comparator == &internal_comparator_ @@ -229,11 +229,10 @@ class DBImpl : public DB { // Constant after construction const InternalFilterPolicy internal_filter_policy_; bool owns_info_log_; - bool owns_cache_; bool is_hotcold_; // table_cache_ provides its own synchronization - TableCache* table_cache_; + unique_ptr table_cache_; // Lock over the persistent DB state. Non-NULL iff successfully acquired. FileLock* db_lock_; @@ -247,9 +246,8 @@ class DBImpl : public DB { port::CondVar bg_cv_; // Signalled when background work finishes MemTable* mem_; MemTableList imm_; // Memtable that are not changing - WritableFile* logfile_; uint64_t logfile_number_; - log::Writer* log_; + unique_ptr log_; // Metrics that have been received from the cache, but have not yet been // flushed to metrics_db_. std::vector*> unflushed_metrics_; diff --git a/db/db_test.cc b/db/db_test.cc index 5a90b893e..1fdcd26b6 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -101,18 +101,17 @@ class SpecialEnv : public EnvWrapper { manifest_write_error_.Release_Store(NULL); } - Status NewWritableFile(const std::string& f, WritableFile** r) { + Status NewWritableFile(const std::string& f, unique_ptr* r) { class SSTableFile : public WritableFile { private: SpecialEnv* env_; - WritableFile* base_; + unique_ptr base_; public: - SSTableFile(SpecialEnv* env, WritableFile* base) + SSTableFile(SpecialEnv* env, unique_ptr&& base) : env_(env), - base_(base) { + base_(std::move(base)) { } - ~SSTableFile() { delete base_; } Status Append(const Slice& data) { if (env_->no_space_.Acquire_Load() != NULL) { // Drop writes on the floor @@ -133,10 +132,10 @@ class SpecialEnv : public EnvWrapper { class ManifestFile : public WritableFile { private: SpecialEnv* env_; - WritableFile* base_; + unique_ptr base_; public: - ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) { } - ~ManifestFile() { delete base_; } + ManifestFile(SpecialEnv* env, unique_ptr&& b) + : env_(env), base_(std::move(b)) { } Status Append(const Slice& data) { if (env_->manifest_write_error_.Acquire_Load() != NULL) { return Status::IOError("simulated writer error"); @@ -162,24 +161,25 @@ class SpecialEnv : public EnvWrapper { Status s = target()->NewWritableFile(f, r); if (s.ok()) { if (strstr(f.c_str(), ".sst") != NULL) { - *r = new SSTableFile(this, *r); + r->reset(new SSTableFile(this, std::move(*r))); } else if (strstr(f.c_str(), "MANIFEST") != NULL) { - *r = new ManifestFile(this, *r); + r->reset(new ManifestFile(this, std::move(*r))); } } return s; } - Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) { + Status NewRandomAccessFile(const std::string& f, + unique_ptr* r) { class CountingFile : public RandomAccessFile { private: - RandomAccessFile* target_; + unique_ptr target_; anon::AtomicCounter* counter_; public: - CountingFile(RandomAccessFile* target, anon::AtomicCounter* counter) - : target_(target), counter_(counter) { + CountingFile(unique_ptr&& target, + anon::AtomicCounter* counter) + : target_(std::move(target)), counter_(counter) { } - virtual ~CountingFile() { delete target_; } virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { counter_->Increment(); @@ -189,7 +189,7 @@ class SpecialEnv : public EnvWrapper { Status s = target()->NewRandomAccessFile(f, r); if (s.ok() && count_random_reads_) { - *r = new CountingFile(*r, &random_read_counter_); + r->reset(new CountingFile(std::move(*r), &random_read_counter_)); } return s; } @@ -551,6 +551,62 @@ TEST(DBTest, ReadWrite) { } while (ChangeOptions()); } +static std::string Key(int i) { + char buf[100]; + snprintf(buf, sizeof(buf), "key%06d", i); + return std::string(buf); +} + +TEST(DBTest, LevelLimitReopen) { + Options options = CurrentOptions(); + Reopen(&options); + + const std::string value(1024 * 1024, ' '); + int i = 0; + while (NumTableFilesAtLevel(2) == 0) { + ASSERT_OK(Put(Key(i++), value)); + } + + options.num_levels = 1; + Status s = TryReopen(&options); + ASSERT_EQ(s.IsCorruption(), true); + ASSERT_EQ(s.ToString(), + "Corruption: VersionEdit: db already has " + "more levels than options.num_levels"); + + options.num_levels = 10; + ASSERT_OK(TryReopen(&options)); +} + +TEST(DBTest, Preallocation) { + const std::string src = dbname_ + "/alloc_test"; + unique_ptr srcfile; + ASSERT_OK(env_->NewWritableFile(src, &srcfile)); + srcfile->SetPreallocationBlockSize(1024 * 1024); + + // No writes should mean no preallocation + size_t block_size, last_allocated_block; + srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); + ASSERT_EQ(last_allocated_block, 0UL); + + // Small write should preallocate one block + srcfile->Append("test"); + srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); + ASSERT_EQ(last_allocated_block, 1UL); + + // Write an entire preallocation block, make sure we increased by two. + std::string buf(block_size, ' '); + srcfile->Append(buf); + srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); + ASSERT_EQ(last_allocated_block, 2UL); + + // Write five more blocks at once, ensure we're where we need to be. + buf = std::string(block_size * 5, ' '); + srcfile->Append(buf); + srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); + ASSERT_EQ(last_allocated_block, 7UL); +} + TEST(DBTest, PutDeleteGet) { do { ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); @@ -1024,12 +1080,6 @@ TEST(DBTest, RecoverDuringMemtableCompaction) { } while (ChangeOptions()); } -static std::string Key(int i) { - char buf[100]; - snprintf(buf, sizeof(buf), "key%06d", i); - return std::string(buf); -} - TEST(DBTest, MinorCompactionsHappen) { Options options = CurrentOptions(); options.write_buffer_size = 10000; @@ -1219,7 +1269,7 @@ bool MinLevelToCompress(CompressionType& type, Options& options, int wbits, fprintf(stderr, "skipping test, compression disabled\n"); return false; } - options.compression_per_level = new CompressionType[options.num_levels]; + options.compression_per_level.resize(options.num_levels); // do not compress L0 for (int i = 0; i < 1; i++) { @@ -2226,7 +2276,6 @@ TEST(DBTest, BloomFilter) { env_->delay_sstable_sync_.Release_Store(NULL); Close(); - delete options.block_cache; delete options.filter_policy; } @@ -2284,9 +2333,9 @@ TEST(DBTest, SnapshotFiles) { } } } - SequentialFile* srcfile; + unique_ptr srcfile; ASSERT_OK(env_->NewSequentialFile(src, &srcfile)); - WritableFile* destfile; + unique_ptr destfile; ASSERT_OK(env_->NewWritableFile(dest, &destfile)); char buffer[4096]; @@ -2298,8 +2347,6 @@ TEST(DBTest, SnapshotFiles) { size -= slice.size(); } ASSERT_OK(destfile->Close()); - delete destfile; - delete srcfile; } // release file snapshot @@ -2440,7 +2487,7 @@ TEST(DBTest, TransactionLogIterator) { Put("key2", value); ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3U); { - TransactionLogIterator* iter; + unique_ptr iter; Status status = dbfull()->GetUpdatesSince(0, &iter); ASSERT_TRUE(status.ok()); ASSERT_TRUE(!iter->Valid()); @@ -2466,7 +2513,7 @@ TEST(DBTest, TransactionLogIterator) { Put("key6", value); } { - TransactionLogIterator* iter; + unique_ptr iter; Status status = dbfull()->GetUpdatesSince(0, &iter); ASSERT_TRUE(status.ok()); ASSERT_TRUE(!iter->Valid()); @@ -2546,7 +2593,6 @@ TEST(DBTest, ReadCompaction) { ASSERT_TRUE(NumTableFilesAtLevel(0) < l1 || NumTableFilesAtLevel(1) < l2 || NumTableFilesAtLevel(2) < l3); - delete options.block_cache; } } @@ -2660,7 +2706,6 @@ class ModelDB: public DB { }; explicit ModelDB(const Options& options): options_(options) { } - ~ModelDB() { } virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) { return DB::Put(o, k, v); } @@ -2753,7 +2798,7 @@ class ModelDB: public DB { return 0; } virtual Status GetUpdatesSince(leveldb::SequenceNumber, - leveldb::TransactionLogIterator**) { + unique_ptr*) { return Status::NotSupported("Not supported in Model DB"); } diff --git a/db/log_reader.cc b/db/log_reader.cc index ddd620246..ce63d80b6 100644 --- a/db/log_reader.cc +++ b/db/log_reader.cc @@ -15,9 +15,9 @@ namespace log { Reader::Reporter::~Reporter() { } -Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum, - uint64_t initial_offset) - : file_(file), +Reader::Reader(unique_ptr&& file, Reporter* reporter, + bool checksum, uint64_t initial_offset) + : file_(std::move(file)), reporter_(reporter), checksum_(checksum), backing_store_(new char[kBlockSize]), diff --git a/db/log_reader.h b/db/log_reader.h index 82d4bee68..1f1d78860 100644 --- a/db/log_reader.h +++ b/db/log_reader.h @@ -5,6 +5,7 @@ #ifndef STORAGE_LEVELDB_DB_LOG_READER_H_ #define STORAGE_LEVELDB_DB_LOG_READER_H_ +#include #include #include "db/log_format.h" @@ -14,6 +15,7 @@ namespace leveldb { class SequentialFile; +using std::unique_ptr; namespace log { @@ -40,8 +42,8 @@ class Reader { // // The Reader will start reading at the first record located at physical // position >= initial_offset within the file. - Reader(SequentialFile* file, Reporter* reporter, bool checksum, - uint64_t initial_offset); + Reader(unique_ptr&& file, Reporter* reporter, + bool checksum, uint64_t initial_offset); ~Reader(); @@ -57,8 +59,10 @@ class Reader { // Undefined before the first call to ReadRecord. uint64_t LastRecordOffset(); + SequentialFile* file() { return file_.get(); } + private: - SequentialFile* const file_; + const unique_ptr file_; Reporter* const reporter_; bool const checksum_; char* const backing_store_; diff --git a/db/log_test.cc b/db/log_test.cc index b6a733682..1840090b3 100644 --- a/db/log_test.cc +++ b/db/log_test.cc @@ -100,8 +100,26 @@ class LogTest { } }; - StringDest dest_; - StringSource source_; + std::string& dest_contents() { + auto dest = dynamic_cast(writer_.file()); + assert(dest); + return dest->contents_; + } + + const std::string& dest_contents() const { + auto dest = dynamic_cast(writer_.file()); + assert(dest); + return dest->contents_; + } + + void reset_source_contents() { + auto src = dynamic_cast(reader_.file()); + assert(src); + src->contents_ = dest_contents(); + } + + unique_ptr dest_holder_; + unique_ptr source_holder_; ReportCollector report_; bool reading_; Writer writer_; @@ -112,9 +130,11 @@ class LogTest { static uint64_t initial_offset_last_record_offsets_[]; public: - LogTest() : reading_(false), - writer_(&dest_), - reader_(&source_, &report_, true/*checksum*/, + LogTest() : dest_holder_(new StringDest), + source_holder_(new StringSource), + reading_(false), + writer_(std::move(dest_holder_)), + reader_(std::move(source_holder_), &report_, true/*checksum*/, 0/*initial_offset*/) { } @@ -124,13 +144,13 @@ class LogTest { } size_t WrittenBytes() const { - return dest_.contents_.size(); + return dest_contents().size(); } std::string Read() { if (!reading_) { reading_ = true; - source_.contents_ = Slice(dest_.contents_); + reset_source_contents(); } std::string scratch; Slice record; @@ -142,26 +162,27 @@ class LogTest { } void IncrementByte(int offset, int delta) { - dest_.contents_[offset] += delta; + dest_contents()[offset] += delta; } void SetByte(int offset, char new_byte) { - dest_.contents_[offset] = new_byte; + dest_contents()[offset] = new_byte; } void ShrinkSize(int bytes) { - dest_.contents_.resize(dest_.contents_.size() - bytes); + dest_contents().resize(dest_contents().size() - bytes); } void FixChecksum(int header_offset, int len) { // Compute crc of type/len/data - uint32_t crc = crc32c::Value(&dest_.contents_[header_offset+6], 1 + len); + uint32_t crc = crc32c::Value(&dest_contents()[header_offset+6], 1 + len); crc = crc32c::Mask(crc); - EncodeFixed32(&dest_.contents_[header_offset], crc); + EncodeFixed32(&dest_contents()[header_offset], crc); } void ForceError() { - source_.force_error_ = true; + auto src = dynamic_cast(reader_.file()); + src->force_error_ = true; } size_t DroppedBytes() const { @@ -192,22 +213,25 @@ class LogTest { void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) { WriteInitialOffsetLog(); reading_ = true; - source_.contents_ = Slice(dest_.contents_); - Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/, - WrittenBytes() + offset_past_end); + unique_ptr source(new StringSource); + source->contents_ = dest_contents(); + unique_ptr offset_reader( + new Reader(std::move(source), &report_, true/*checksum*/, + WrittenBytes() + offset_past_end)); Slice record; std::string scratch; ASSERT_TRUE(!offset_reader->ReadRecord(&record, &scratch)); - delete offset_reader; } void CheckInitialOffsetRecord(uint64_t initial_offset, int expected_record_offset) { WriteInitialOffsetLog(); reading_ = true; - source_.contents_ = Slice(dest_.contents_); - Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/, - initial_offset); + unique_ptr source(new StringSource); + source->contents_ = dest_contents(); + unique_ptr offset_reader( + new Reader(std::move(source), &report_, true/*checksum*/, + initial_offset)); Slice record; std::string scratch; ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch)); @@ -216,7 +240,6 @@ class LogTest { ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset], offset_reader->LastRecordOffset()); ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]); - delete offset_reader; } }; diff --git a/db/log_writer.cc b/db/log_writer.cc index 2da99ac08..f61abe723 100644 --- a/db/log_writer.cc +++ b/db/log_writer.cc @@ -12,8 +12,8 @@ namespace leveldb { namespace log { -Writer::Writer(WritableFile* dest) - : dest_(dest), +Writer::Writer(unique_ptr&& dest) + : dest_(std::move(dest)), block_offset_(0) { for (int i = 0; i <= kMaxRecordType; i++) { char t = static_cast(i); diff --git a/db/log_writer.h b/db/log_writer.h index a3a954d96..a15bcf65a 100644 --- a/db/log_writer.h +++ b/db/log_writer.h @@ -5,6 +5,7 @@ #ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_ #define STORAGE_LEVELDB_DB_LOG_WRITER_H_ +#include #include #include "db/log_format.h" #include "leveldb/slice.h" @@ -14,6 +15,8 @@ namespace leveldb { class WritableFile; +using std::unique_ptr; + namespace log { class Writer { @@ -21,13 +24,16 @@ class Writer { // Create a writer that will append data to "*dest". // "*dest" must be initially empty. // "*dest" must remain live while this Writer is in use. - explicit Writer(WritableFile* dest); + explicit Writer(unique_ptr&& dest); ~Writer(); Status AddRecord(const Slice& slice); + WritableFile* file() { return dest_.get(); } + const WritableFile* file() const { return dest_.get(); } + private: - WritableFile* dest_; + unique_ptr dest_; int block_offset_; // Current offset in block // crc32c values for all supported record types. These are diff --git a/db/repair.cc b/db/repair.cc index 4b0220b5c..068407320 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -50,8 +50,6 @@ class Repairer { icmp_(options.comparator), ipolicy_(options.filter_policy), options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)), - owns_info_log_(options_.info_log != options.info_log), - owns_cache_(options_.block_cache != options.block_cache), next_file_number_(1) { // TableCache can be small since we expect each table to be opened once. table_cache_ = new TableCache(dbname_, &options_, 10); @@ -61,12 +59,6 @@ class Repairer { ~Repairer() { delete table_cache_; delete edit_; - if (owns_info_log_) { - delete options_.info_log; - } - if (owns_cache_) { - delete options_.block_cache; - } } Status Run() { @@ -104,7 +96,6 @@ class Repairer { InternalKeyComparator const icmp_; InternalFilterPolicy const ipolicy_; Options const options_; - bool owns_info_log_; bool owns_cache_; TableCache* table_cache_; VersionEdit* edit_; @@ -164,7 +155,7 @@ class Repairer { Status ConvertLogToTable(uint64_t log) { struct LogReporter : public log::Reader::Reporter { Env* env; - Logger* info_log; + std::shared_ptr info_log; uint64_t lognum; virtual void Corruption(size_t bytes, const Status& s) { // We print error messages for corruption, but continue repairing. @@ -177,7 +168,7 @@ class Repairer { // Open the log file std::string logname = LogFileName(dbname_, log); - SequentialFile* lfile; + unique_ptr lfile; Status status = env_->NewSequentialFile(logname, &lfile); if (!status.ok()) { return status; @@ -192,7 +183,7 @@ class Repairer { // corruptions cause entire commits to be skipped instead of // propagating bad information (like overly large sequence // numbers). - log::Reader reader(lfile, &reporter, false/*do not checksum*/, + log::Reader reader(std::move(lfile), &reporter, false/*do not checksum*/, 0/*initial_offset*/); // Read all the records and add to a memtable @@ -219,7 +210,6 @@ class Repairer { status = Status::OK(); // Keep going with rest of file } } - delete lfile; // Do not record a version edit for this conversion to a Table // since ExtractMetaData() will also generate edits. @@ -304,7 +294,7 @@ class Repairer { Status WriteDescriptor() { std::string tmp = TempFileName(dbname_, 1); - WritableFile* file; + unique_ptr file; Status status = env_->NewWritableFile(tmp, &file); if (!status.ok()) { return status; @@ -331,16 +321,11 @@ class Repairer { //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str()); { - log::Writer log(file); + log::Writer log(std::move(file)); std::string record; edit_->EncodeTo(&record); status = log.AddRecord(record); } - if (status.ok()) { - status = file->Close(); - } - delete file; - file = NULL; if (!status.ok()) { env_->DeleteFile(tmp); diff --git a/db/table_cache.cc b/db/table_cache.cc index cf44920d8..e6f18b875 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -13,17 +13,17 @@ namespace leveldb { struct TableAndFile { - RandomAccessFile* file; - Table* table; + unique_ptr file; + unique_ptr table; }; static class DBStatistics* dbstatistics; static void DeleteEntry(const Slice& key, void* value) { TableAndFile* tf = reinterpret_cast(value); - delete tf->table; - delete tf->file; - dbstatistics ? dbstatistics->incNumFileCloses() : (void)0; + if (dbstatistics) { + dbstatistics->incNumFileCloses(); + } delete tf; } @@ -44,7 +44,6 @@ TableCache::TableCache(const std::string& dbname, } TableCache::~TableCache() { - delete cache_; } Status TableCache::FindTable(uint64_t file_number, uint64_t file_size, @@ -60,24 +59,24 @@ Status TableCache::FindTable(uint64_t file_number, uint64_t file_size, *tableIO = true; // we had to do IO from storage } std::string fname = TableFileName(dbname_, file_number); - RandomAccessFile* file = NULL; - Table* table = NULL; + unique_ptr file; + unique_ptr
table; s = env_->NewRandomAccessFile(fname, &file); stats ? stats->incNumFileOpens() : (void)0; if (s.ok()) { - s = Table::Open(*options_, file_number, file, file_size, &table); + s = Table::Open(*options_, file_number, std::move(file), file_size, + &table); } if (!s.ok()) { assert(table == NULL); - delete file; stats ? stats->incNumFileErrors() : (void)0; // We do not cache error results so that if the error is transient, // or somebody repairs the file, we recover automatically. } else { TableAndFile* tf = new TableAndFile; - tf->file = file; - tf->table = table; + tf->file = std::move(file); + tf->table = std::move(table); *handle = cache_->Insert(key, tf, 1, &DeleteEntry); } } @@ -98,9 +97,10 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, return NewErrorIterator(s); } - Table* table = reinterpret_cast(cache_->Value(handle))->table; + Table* table = + reinterpret_cast(cache_->Value(handle))->table.get(); Iterator* result = table->NewIterator(options); - result->RegisterCleanup(&UnrefEntry, cache_, handle); + result->RegisterCleanup(&UnrefEntry, cache_.get(), handle); if (tableptr != NULL) { *tableptr = table; } @@ -117,7 +117,8 @@ Status TableCache::Get(const ReadOptions& options, Cache::Handle* handle = NULL; Status s = FindTable(file_number, file_size, &handle, tableIO); if (s.ok()) { - Table* t = reinterpret_cast(cache_->Value(handle))->table; + Table* t = + reinterpret_cast(cache_->Value(handle))->table.get(); s = t->InternalGet(options, k, arg, saver); cache_->Release(handle); } diff --git a/db/table_cache.h b/db/table_cache.h index 8e837b4aa..4de110828 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -52,7 +52,7 @@ class TableCache { Env* const env_; const std::string dbname_; const Options* options_; - Cache* cache_; + std::shared_ptr cache_; Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**, bool* tableIO = NULL); diff --git a/db/transaction_log_iterator_impl.cc b/db/transaction_log_iterator_impl.cc index bc118b8c2..234b26ef8 100644 --- a/db/transaction_log_iterator_impl.cc +++ b/db/transaction_log_iterator_impl.cc @@ -14,22 +14,23 @@ TransactionLogIteratorImpl::TransactionLogIteratorImpl( files_(files), started_(false), isValid_(true), - currentFileIndex_(0), - currentLogReader_(NULL) { - assert( files_ != NULL); + currentFileIndex_(0) { + assert(files_ != NULL); } LogReporter TransactionLogIteratorImpl::NewLogReporter(const uint64_t logNumber) { LogReporter reporter; reporter.env = options_->env; - reporter.info_log = options_->info_log; + reporter.info_log = options_->info_log.get(); reporter.log_number = logNumber; return reporter; } -Status TransactionLogIteratorImpl::OpenLogFile(const LogFile& logFile, - SequentialFile** file) { +Status TransactionLogIteratorImpl::OpenLogFile( + const LogFile& logFile, + unique_ptr* file) +{ Env* env = options_->env; if (logFile.type == kArchivedLogFile) { std::string fname = ArchivedLogFileName(dbname_, logFile.logNumber); @@ -73,17 +74,18 @@ void TransactionLogIteratorImpl::Next() { std::string scratch; Slice record; if (!started_) { - SequentialFile* file = NULL; + unique_ptr file; Status status = OpenLogFile(currentLogFile, &file); if (!status.ok()) { isValid_ = false; currentStatus_ = status; return; } - assert(file != NULL); + assert(file); WriteBatch batch; - log::Reader* reader = new log::Reader(file, &reporter, true, 0); - assert(reader != NULL); + unique_ptr reader( + new log::Reader(std::move(file), &reporter, true, 0)); + assert(reader); while (reader->ReadRecord(&record, &scratch)) { if (record.size() < 12) { reporter.Corruption( @@ -95,7 +97,7 @@ void TransactionLogIteratorImpl::Next() { if (currentNum >= sequenceNumber_) { isValid_ = true; currentRecord_ = record; - currentLogReader_ = reader; + currentLogReader_ = std::move(reader); break; } } @@ -108,7 +110,7 @@ void TransactionLogIteratorImpl::Next() { started_ = true; } else { LOOK_NEXT_FILE: - assert(currentLogReader_ != NULL); + assert(currentLogReader_); bool openNextFile = true; while (currentLogReader_->ReadRecord(&record, &scratch)) { if (record.size() < 12) { @@ -125,15 +127,16 @@ LOOK_NEXT_FILE: if (openNextFile) { if (currentFileIndex_ < files_->size() - 1) { ++currentFileIndex_; - delete currentLogReader_; - SequentialFile *file; + currentLogReader_.reset(); + unique_ptr file; Status status = OpenLogFile(files_->at(currentFileIndex_), &file); if (!status.ok()) { isValid_ = false; currentStatus_ = status; return; } - currentLogReader_ = new log::Reader(file, &reporter, true, 0); + currentLogReader_.reset( + new log::Reader(std::move(file), &reporter, true, 0)); goto LOOK_NEXT_FILE; } else { // LOOKED AT FILES. WE ARE DONE HERE. diff --git a/db/transaction_log_iterator_impl.h b/db/transaction_log_iterator_impl.h index 880032100..7b0b7723f 100644 --- a/db/transaction_log_iterator_impl.h +++ b/db/transaction_log_iterator_impl.h @@ -26,14 +26,11 @@ struct LogReporter : public log::Reader::Reporter { class TransactionLogIteratorImpl : public TransactionLogIterator { public: TransactionLogIteratorImpl(const std::string& dbname, - const Options* options, - SequenceNumber& seqNum, - std::vector* files); + const Options* options, + SequenceNumber& seqNum, + std::vector* files); virtual ~TransactionLogIteratorImpl() { // TODO move to cc file. - if (currentLogReader_ != NULL) { - delete currentLogReader_; - } delete files_; } @@ -55,8 +52,8 @@ class TransactionLogIteratorImpl : public TransactionLogIterator { Status currentStatus_; size_t currentFileIndex_; Slice currentRecord_; - log::Reader* currentLogReader_; - Status OpenLogFile(const LogFile& logFile, SequentialFile** file); + unique_ptr currentLogReader_; + Status OpenLogFile(const LogFile& logFile, unique_ptr* file); LogReporter NewLogReporter(uint64_t logNumber); }; diff --git a/db/version_edit.cc b/db/version_edit.cc index 9a52310ca..198ca275a 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -95,13 +95,16 @@ static bool GetInternalKey(Slice* input, InternalKey* dst) { } } -bool VersionEdit::GetLevel(Slice* input, int* level) { +bool VersionEdit::GetLevel(Slice* input, int* level, const char** msg) { uint32_t v; if (GetVarint32(input, &v) && (int)v < number_levels_) { *level = v; return true; } else { + if ((int)v >= number_levels_) { + *msg = "db already has more levels than options.num_levels"; + } return false; } } @@ -163,32 +166,38 @@ Status VersionEdit::DecodeFrom(const Slice& src) { break; case kCompactPointer: - if (GetLevel(&input, &level) && + if (GetLevel(&input, &level, &msg) && GetInternalKey(&input, &key)) { compact_pointers_.push_back(std::make_pair(level, key)); } else { - msg = "compaction pointer"; + if (!msg) { + msg = "compaction pointer"; + } } break; case kDeletedFile: - if (GetLevel(&input, &level) && + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number)) { deleted_files_.insert(std::make_pair(level, number)); } else { - msg = "deleted file"; + if (!msg) { + msg = "deleted file"; + } } break; case kNewFile: - if (GetLevel(&input, &level) && + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &f.number) && GetVarint64(&input, &f.file_size) && GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.largest)) { new_files_.push_back(std::make_pair(level, f)); } else { - msg = "new-file entry"; + if (!msg) { + msg = "new-file entry"; + } } break; diff --git a/db/version_edit.h b/db/version_edit.h index c11eda856..3776c2179 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -91,7 +91,7 @@ class VersionEdit { typedef std::set< std::pair > DeletedFileSet; - bool GetLevel(Slice* input, int* level); + bool GetLevel(Slice* input, int* level, const char** msg); int number_levels_; std::string comparator_; diff --git a/db/version_set.cc b/db/version_set.cc index 3316c0194..9f0457e3e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1008,8 +1008,6 @@ VersionSet::VersionSet(const std::string& dbname, log_number_(0), prev_log_number_(0), num_levels_(options_->num_levels), - descriptor_file_(NULL), - descriptor_log_(NULL), dummy_versions_(this), current_(NULL), compactions_in_progress_(options_->num_levels), @@ -1026,8 +1024,6 @@ VersionSet::~VersionSet() { delete[] compact_pointer_; delete[] max_file_size_; delete[] level_max_bytes_; - delete descriptor_log_; - delete descriptor_file_; } void VersionSet::Init(int num_levels) { @@ -1106,16 +1102,17 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, manifest_file_number_ = NewFileNumber(); // Change manifest file no. } - if (descriptor_log_ == NULL || new_descriptor_log) { + if (!descriptor_log_ || new_descriptor_log) { // No reason to unlock *mu here since we only hit this path in the // first call to LogAndApply (when opening the database). - assert(descriptor_file_ == NULL || new_descriptor_log); + assert(!descriptor_log_ || new_descriptor_log); new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_); edit->SetNextFile(next_file_number_); - s = env_->NewWritableFile(new_manifest_file, &descriptor_file_); + unique_ptr descriptor_file; + s = env_->NewWritableFile(new_manifest_file, &descriptor_file); if (s.ok()) { - descriptor_log_ = new log::Writer(descriptor_file_); - s = WriteSnapshot(descriptor_log_); + descriptor_log_.reset(new log::Writer(std::move(descriptor_file))); + s = WriteSnapshot(descriptor_log_.get()); } } @@ -1141,9 +1138,9 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, } if (s.ok()) { if (options_->use_fsync) { - s = descriptor_file_->Fsync(); + s = descriptor_log_->file()->Fsync(); } else { - s = descriptor_file_->Sync(); + s = descriptor_log_->file()->Sync(); } } if (!s.ok()) { @@ -1164,7 +1161,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, } // find offset in manifest file where this version is stored. - new_manifest_file_size = descriptor_file_->GetFileSize(); + new_manifest_file_size = descriptor_log_->file()->GetFileSize(); mu->Lock(); // cache the manifest_file_size so that it can be used to rollover in the @@ -1184,10 +1181,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, v->GetVersionNumber()); delete v; if (!new_manifest_file.empty()) { - delete descriptor_log_; - delete descriptor_file_; - descriptor_log_ = NULL; - descriptor_file_ = NULL; + descriptor_log_.reset(); env_->DeleteFile(new_manifest_file); } } @@ -1254,7 +1248,7 @@ Status VersionSet::Recover() { current.c_str()); std::string dscname = dbname_ + "/" + current; - SequentialFile* file; + unique_ptr file; s = env_->NewSequentialFile(dscname, &file); if (!s.ok()) { return s; @@ -1278,7 +1272,8 @@ Status VersionSet::Recover() { { LogReporter reporter; reporter.status = &s; - log::Reader reader(file, &reporter, true/*checksum*/, 0/*initial_offset*/); + log::Reader reader(std::move(file), &reporter, true/*checksum*/, + 0/*initial_offset*/); Slice record; std::string scratch; while (reader.ReadRecord(&record, &scratch) && s.ok()) { @@ -1318,8 +1313,7 @@ Status VersionSet::Recover() { } } } - delete file; - file = NULL; + file.reset(); if (s.ok()) { if (!have_next_file) { @@ -1372,7 +1366,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, }; // Open the specified manifest file. - SequentialFile* file; + unique_ptr file; Status s = options.env->NewSequentialFile(dscname, &file); if (!s.ok()) { return s; @@ -1392,7 +1386,8 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, { LogReporter reporter; reporter.status = &s; - log::Reader reader(file, &reporter, true/*checksum*/, 0/*initial_offset*/); + log::Reader reader(std::move(file), &reporter, true/*checksum*/, + 0/*initial_offset*/); Slice record; std::string scratch; while (reader.ReadRecord(&record, &scratch) && s.ok()) { @@ -1439,8 +1434,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, } } } - delete file; - file = NULL; + file.reset(); if (s.ok()) { if (!have_next_file) { @@ -1673,13 +1667,13 @@ const char* VersionSet::LevelDataSizeSummary( bool VersionSet::ManifestContains(const std::string& record) const { std::string fname = DescriptorFileName(dbname_, manifest_file_number_); Log(options_->info_log, "ManifestContains: checking %s\n", fname.c_str()); - SequentialFile* file = NULL; + unique_ptr file; Status s = env_->NewSequentialFile(fname, &file); if (!s.ok()) { Log(options_->info_log, "ManifestContains: %s\n", s.ToString().c_str()); return false; } - log::Reader reader(file, NULL, true/*checksum*/, 0); + log::Reader reader(std::move(file), NULL, true/*checksum*/, 0); Slice r; std::string scratch; bool result = false; @@ -1689,7 +1683,6 @@ bool VersionSet::ManifestContains(const std::string& record) const { break; } } - delete file; Log(options_->info_log, "ManifestContains: result = %d\n", result ? 1 : 0); return result; } diff --git a/db/version_set.h b/db/version_set.h index 71f05fd03..6b642f555 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -16,6 +16,7 @@ #define STORAGE_LEVELDB_DB_VERSION_SET_H_ #include +#include #include #include #include @@ -408,6 +409,9 @@ class VersionSet { // record results in files_by_size_. The largest files are listed first. void UpdateFilesBySize(Version *v); + // Get the max file size in a given level. + uint64_t MaxFileSizeForLevel(int level); + private: class Builder; struct ManifestWriter; @@ -440,8 +444,6 @@ class VersionSet { double MaxBytesForLevel(int level); - uint64_t MaxFileSizeForLevel(int level); - int64_t ExpandedCompactionByteSizeLimit(int level); int64_t MaxGrandParentOverlapBytes(int level); @@ -460,8 +462,7 @@ class VersionSet { int num_levels_; // Opened lazily - WritableFile* descriptor_file_; - log::Writer* descriptor_log_; + unique_ptr descriptor_log_; Version dummy_versions_; // Head of circular doubly-linked list of versions. Version* current_; // == dummy_versions_.prev_ diff --git a/fbcode.clang31.sh b/fbcode.clang31.sh index 276bf12ca..bcedad200 100644 --- a/fbcode.clang31.sh +++ b/fbcode.clang31.sh @@ -3,7 +3,6 @@ # Set environment variables so that we can compile leveldb using # fbcode settings. It uses the latest g++ compiler and also # uses jemalloc -# This is compiled with gcc version 4.7.1 for zeus proxy TOOLCHAIN_REV=f365dbeae46a30414a2874a6f45e73e10f1caf7d TOOLCHAIN_EXECUTABLES="/mnt/gvfs/third-party/$TOOLCHAIN_REV/centos5.2-native" diff --git a/fbcode.gcc471.sh b/fbcode.gcc471.sh index 1736a18f5..c6d94eb23 100644 --- a/fbcode.gcc471.sh +++ b/fbcode.gcc471.sh @@ -3,7 +3,6 @@ # Set environment variables so that we can compile leveldb using # fbcode settings. It uses the latest g++ compiler and also # uses jemalloc -# This is compiled with gcc version 4.7.1 for zeus proxy TOOLCHAIN_REV=20d3328ac30f633840ce819ad03019f415267a86 TOOLCHAIN_EXECUTABLES="/mnt/gvfs/third-party/$TOOLCHAIN_REV/centos5.2-native" diff --git a/hdfs/env_hdfs.h b/hdfs/env_hdfs.h index 2f37dd365..964a1cab9 100644 --- a/hdfs/env_hdfs.h +++ b/hdfs/env_hdfs.h @@ -235,13 +235,17 @@ class HdfsEnv : public Env { } virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result); + unique_ptr* result); virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result){ return notsup;} + unique_ptr* result) { + return notsup; + } virtual Status NewWritableFile(const std::string& fname, - WritableFile** result){return notsup;} + unique_ptr* result) { + return notsup; + } virtual bool FileExists(const std::string& fname){return false;} @@ -269,7 +273,8 @@ class HdfsEnv : public Env { virtual Status UnlockFile(FileLock* lock){return notsup;} - virtual Status NewLogger(const std::string& fname, Logger** result){return notsup;} + virtual Status NewLogger(const std::string& fname, + shared_ptr* result){return notsup;} virtual void Schedule( void (*function)(void* arg), void* arg) {} diff --git a/helpers/memenv/memenv.cc b/helpers/memenv/memenv.cc index 954702420..3ca0be28a 100644 --- a/helpers/memenv/memenv.cc +++ b/helpers/memenv/memenv.cc @@ -233,31 +233,31 @@ class InMemoryEnv : public EnvWrapper { // Partial implementation of the Env interface. virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result) { + unique_ptr* result) { MutexLock lock(&mutex_); if (file_map_.find(fname) == file_map_.end()) { *result = NULL; return Status::IOError(fname, "File not found"); } - *result = new SequentialFileImpl(file_map_[fname]); + result->reset(new SequentialFileImpl(file_map_[fname])); return Status::OK(); } virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result) { + unique_ptr* result) { MutexLock lock(&mutex_); if (file_map_.find(fname) == file_map_.end()) { *result = NULL; return Status::IOError(fname, "File not found"); } - *result = new RandomAccessFileImpl(file_map_[fname]); + result->reset(new RandomAccessFileImpl(file_map_[fname])); return Status::OK(); } virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { + unique_ptr* result) { MutexLock lock(&mutex_); if (file_map_.find(fname) != file_map_.end()) { DeleteFileInternal(fname); @@ -267,7 +267,7 @@ class InMemoryEnv : public EnvWrapper { file->Ref(); file_map_[fname] = file; - *result = new WritableFileImpl(file); + result->reset(new WritableFileImpl(file)); return Status::OK(); } diff --git a/helpers/memenv/memenv_test.cc b/helpers/memenv/memenv_test.cc index d8a562610..4a4821660 100644 --- a/helpers/memenv/memenv_test.cc +++ b/helpers/memenv/memenv_test.cc @@ -8,6 +8,7 @@ #include "leveldb/db.h" #include "leveldb/env.h" #include "util/testharness.h" +#include #include #include @@ -27,7 +28,7 @@ class MemEnvTest { TEST(MemEnvTest, Basics) { uint64_t file_size; - WritableFile* writable_file; + unique_ptr writable_file; std::vector children; ASSERT_OK(env_->CreateDir("/dir")); @@ -40,7 +41,7 @@ TEST(MemEnvTest, Basics) { // Create a file. ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); - delete writable_file; + writable_file.reset(); // Check that the file exists. ASSERT_TRUE(env_->FileExists("/dir/f")); @@ -53,7 +54,7 @@ TEST(MemEnvTest, Basics) { // Write to the file. ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); ASSERT_OK(writable_file->Append("abc")); - delete writable_file; + writable_file.reset(); // Check for expected size. ASSERT_OK(env_->GetFileSize("/dir/f", &file_size)); @@ -68,8 +69,8 @@ TEST(MemEnvTest, Basics) { ASSERT_EQ(3U, file_size); // Check that opening non-existent file fails. - SequentialFile* seq_file; - RandomAccessFile* rand_file; + unique_ptr seq_file; + unique_ptr rand_file; ASSERT_TRUE(!env_->NewSequentialFile("/dir/non_existent", &seq_file).ok()); ASSERT_TRUE(!seq_file); ASSERT_TRUE(!env_->NewRandomAccessFile("/dir/non_existent", &rand_file).ok()); @@ -85,9 +86,9 @@ TEST(MemEnvTest, Basics) { } TEST(MemEnvTest, ReadWrite) { - WritableFile* writable_file; - SequentialFile* seq_file; - RandomAccessFile* rand_file; + unique_ptr writable_file; + unique_ptr seq_file; + unique_ptr rand_file; Slice result; char scratch[100]; @@ -96,7 +97,7 @@ TEST(MemEnvTest, ReadWrite) { ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); ASSERT_OK(writable_file->Append("hello ")); ASSERT_OK(writable_file->Append("world")); - delete writable_file; + writable_file.reset(); // Read sequentially. ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file)); @@ -110,7 +111,6 @@ TEST(MemEnvTest, ReadWrite) { ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file. ASSERT_OK(seq_file->Read(1000, &result, scratch)); ASSERT_EQ(0U, result.size()); - delete seq_file; // Random reads. ASSERT_OK(env_->NewRandomAccessFile("/dir/f", &rand_file)); @@ -123,7 +123,6 @@ TEST(MemEnvTest, ReadWrite) { // Too high offset. ASSERT_TRUE(!rand_file->Read(1000, 5, &result, scratch).ok()); - delete rand_file; } TEST(MemEnvTest, Locks) { @@ -139,14 +138,14 @@ TEST(MemEnvTest, Misc) { ASSERT_OK(env_->GetTestDirectory(&test_dir)); ASSERT_TRUE(!test_dir.empty()); - WritableFile* writable_file; + unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile("/a/b", &writable_file)); // These are no-ops, but we test they return success. ASSERT_OK(writable_file->Sync()); ASSERT_OK(writable_file->Flush()); ASSERT_OK(writable_file->Close()); - delete writable_file; + writable_file.reset(); } TEST(MemEnvTest, LargeWrite) { @@ -158,13 +157,13 @@ TEST(MemEnvTest, LargeWrite) { write_data.append(1, static_cast(i)); } - WritableFile* writable_file; + unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); ASSERT_OK(writable_file->Append("foo")); ASSERT_OK(writable_file->Append(write_data)); - delete writable_file; + writable_file.reset(); - SequentialFile* seq_file; + unique_ptr seq_file; Slice result; ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file)); ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". @@ -178,7 +177,6 @@ TEST(MemEnvTest, LargeWrite) { read += result.size(); } ASSERT_TRUE(write_data == read_data); - delete seq_file; delete [] scratch; } diff --git a/include/leveldb/c.h b/include/leveldb/c.h index f62ffc07b..8799a5fb8 100644 --- a/include/leveldb/c.h +++ b/include/leveldb/c.h @@ -183,6 +183,10 @@ extern void leveldb_options_destroy(leveldb_options_t*); extern void leveldb_options_set_comparator( leveldb_options_t*, leveldb_comparator_t*); +extern void leveldb_options_set_compression_per_level( + leveldb_options_t* opt, + int* level_values, + size_t num_levels); extern void leveldb_options_set_filter_policy( leveldb_options_t*, leveldb_filterpolicy_t*); diff --git a/include/leveldb/cache.h b/include/leveldb/cache.h index 6842bfa92..34333d965 100644 --- a/include/leveldb/cache.h +++ b/include/leveldb/cache.h @@ -18,18 +18,21 @@ #ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_ #define STORAGE_LEVELDB_INCLUDE_CACHE_H_ +#include #include #include #include "leveldb/slice.h" namespace leveldb { +using std::shared_ptr; + class Cache; // Create a new cache with a fixed size capacity. This implementation // of Cache uses a least-recently-used eviction policy. -extern Cache* NewLRUCache(size_t capacity); -extern Cache* NewLRUCache(size_t capacity, int numShardBits); +extern shared_ptr NewLRUCache(size_t capacity); +extern shared_ptr NewLRUCache(size_t capacity, int numShardBits); class BlockMetrics; diff --git a/include/leveldb/db.h b/include/leveldb/db.h index f016fbd98..a747325bd 100644 --- a/include/leveldb/db.h +++ b/include/leveldb/db.h @@ -7,6 +7,7 @@ #include #include +#include #include #include "leveldb/iterator.h" #include "leveldb/options.h" @@ -15,6 +16,8 @@ namespace leveldb { +using std::unique_ptr; + // Update Makefile if you change these static const int kMajorVersion = 1; static const int kMinorVersion = 5; @@ -201,7 +204,7 @@ class DB { // cleared aggressively and the iterator might keep getting invalid before // an update is read. virtual Status GetUpdatesSince(SequenceNumber seq_number, - TransactionLogIterator** iter) = 0; + unique_ptr* iter) = 0; private: static Status InternalOpen(const Options& options, const std::string& name, diff --git a/include/leveldb/env.h b/include/leveldb/env.h index ade56f72c..b03228cb0 100644 --- a/include/leveldb/env.h +++ b/include/leveldb/env.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include "leveldb/status.h" @@ -28,6 +29,9 @@ class SequentialFile; class Slice; class WritableFile; +using std::unique_ptr; +using std::shared_ptr; + class Env { public: Env() { } @@ -47,7 +51,7 @@ class Env { // // The returned file will only be accessed by one thread at a time. virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result) = 0; + unique_ptr* result) = 0; // Create a brand new random access read-only file with the // specified name. On success, stores a pointer to the new file in @@ -57,7 +61,7 @@ class Env { // // The returned file may be concurrently accessed by multiple threads. virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result) = 0; + unique_ptr* result) = 0; // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a @@ -67,7 +71,7 @@ class Env { // // The returned file will only be accessed by one thread at a time. virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) = 0; + unique_ptr* result) = 0; // Returns true iff the named file exists. virtual bool FileExists(const std::string& fname) = 0; @@ -143,7 +147,8 @@ class Env { virtual Status GetTestDirectory(std::string* path) = 0; // Create and return a log file for storing informational messages. - virtual Status NewLogger(const std::string& fname, Logger** result) = 0; + virtual Status NewLogger(const std::string& fname, + shared_ptr* result) = 0; // Returns the number of micro-seconds since some fixed point in time. Only // useful for computing deltas of time. @@ -218,6 +223,26 @@ class RandomAccessFile { // Safe for concurrent use by multiple threads. virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const = 0; + + // Tries to get an unique ID for this file that will be the same each time + // the file is opened (and will stay the same while the file is open). + // Furthermore, it tries to make this ID at most "max_size" bytes. If such an + // ID can be created this function returns the length of the ID and places it + // in "id"; otherwise, this function returns 0, in which case "id" may more + // may not have been modified. + // + // This function guarantees, for IDs from a given environment, two unique ids + // cannot be made equal to eachother by adding arbitrary bytes to one of + // them. That is, no unique ID is the prefix of another. + // + // This function guarantees that the returned ID will not be interpretable as + // a single varint. + // + // Note: these IDs are only valid for the duration of the process. + virtual size_t GetUniqueId(char* id, size_t max_size) const { + return 0; // Default implementation to prevent issues with backwards + // compatibility. + }; }; // A file abstraction for sequential writing. The implementation @@ -225,7 +250,8 @@ class RandomAccessFile { // at a time to the file. class WritableFile { public: - WritableFile() { } + WritableFile() : last_preallocated_block_(0), preallocation_block_size_ (0) { + } virtual ~WritableFile(); virtual Status Append(const Slice& data) = 0; @@ -250,7 +276,57 @@ class WritableFile { return 0; } + /* + * Get and set the default pre-allocation block size for writes to + * this file. If non-zero, then Allocate will be used to extend the + * underlying storage of a file (generally via fallocate) if the Env + * instance supports it. + */ + void SetPreallocationBlockSize(size_t size) { + preallocation_block_size_ = size; + } + + virtual void GetPreallocationStatus(size_t* block_size, + size_t* last_allocated_block) { + *last_allocated_block = last_preallocated_block_; + *block_size = preallocation_block_size_; + } + + protected: + // PrepareWrite performs any necessary preparation for a write + // before the write actually occurs. This allows for pre-allocation + // of space on devices where it can result in less file + // fragmentation and/or less waste from over-zealous filesystem + // pre-allocation. + void PrepareWrite(size_t offset, size_t len) { + if (preallocation_block_size_ == 0) { + return; + } + // If this write would cross one or more preallocation blocks, + // determine what the last preallocation block necesessary to + // cover this write would be and Allocate to that point. + const auto block_size = preallocation_block_size_; + size_t new_last_preallocated_block = + (offset + len + block_size - 1) / block_size; + if (new_last_preallocated_block > last_preallocated_block_) { + size_t num_spanned_blocks = + new_last_preallocated_block - last_preallocated_block_; + Allocate(block_size * last_preallocated_block_, + block_size * num_spanned_blocks); + last_preallocated_block_ = new_last_preallocated_block; + } + } + + /* + * Pre-allocate space for a file. + */ + virtual Status Allocate(off_t offset, off_t len) { + return Status::OK(); + } + private: + size_t last_preallocated_block_; + size_t preallocation_block_size_; // No copying allowed WritableFile(const WritableFile&); void operator=(const WritableFile&); @@ -288,6 +364,12 @@ class FileLock { }; // Log the specified data to *info_log if info_log is non-NULL. +extern void Log(const shared_ptr& info_log, const char* format, ...) +# if defined(__GNUC__) || defined(__clang__) + __attribute__((__format__ (__printf__, 2, 3))) +# endif + ; + extern void Log(Logger* info_log, const char* format, ...) # if defined(__GNUC__) || defined(__clang__) __attribute__((__format__ (__printf__, 2, 3))) @@ -315,13 +397,15 @@ class EnvWrapper : public Env { Env* target() const { return target_; } // The following text is boilerplate that forwards all methods to target() - Status NewSequentialFile(const std::string& f, SequentialFile** r) { + Status NewSequentialFile(const std::string& f, + unique_ptr* r) { return target_->NewSequentialFile(f, r); } - Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) { + Status NewRandomAccessFile(const std::string& f, + unique_ptr* r) { return target_->NewRandomAccessFile(f, r); } - Status NewWritableFile(const std::string& f, WritableFile** r) { + Status NewWritableFile(const std::string& f, unique_ptr* r) { return target_->NewWritableFile(f, r); } bool FileExists(const std::string& f) { return target_->FileExists(f); } @@ -359,7 +443,8 @@ class EnvWrapper : public Env { virtual Status GetTestDirectory(std::string* path) { return target_->GetTestDirectory(path); } - virtual Status NewLogger(const std::string& fname, Logger** result) { + virtual Status NewLogger(const std::string& fname, + shared_ptr* result) { return target_->NewLogger(fname, result); } uint64_t NowMicros() { diff --git a/include/leveldb/options.h b/include/leveldb/options.h index 3145fc46e..fb29c297a 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -7,6 +7,8 @@ #include #include +#include +#include #include #include "leveldb/slice.h" @@ -20,6 +22,8 @@ class Logger; class Snapshot; class Statistics; +using std::shared_ptr; + // DB contents are stored in a set of blocks, each of which holds a // sequence of key,value pairs. Each block may be compressed before // being stored in a file. The following enum describes which @@ -84,7 +88,7 @@ struct Options { // be written to info_log if it is non-NULL, or to a file stored // in the same directory as the DB contents if info_log is NULL. // Default: NULL - Logger* info_log; + shared_ptr info_log; // ------------------- // Parameters that affect performance @@ -121,7 +125,7 @@ struct Options { // If non-NULL, use the specified cache for blocks. // If NULL, leveldb will automatically create and use an 8MB internal cache. // Default: NULL - Cache* block_cache; + shared_ptr block_cache; // Approximate size of user data packed per block. Note that the // block size specified here corresponds to uncompressed data. The @@ -167,7 +171,7 @@ struct Options { // array and it could be freed anytime after the return from Open(). // This could have been a std::vector but that makes the equivalent // java/C api hard to construct. - CompressionType* compression_per_level; + std::vector compression_per_level; //different options for compression algorithms CompressionOptions compression_opts; @@ -326,7 +330,7 @@ struct Options { // Create an Options object with default values for all fields. Options(); - void Dump(Logger * log) const; + void Dump(Logger* log) const; // This method allows an application to modify/delete a key-value at // the time of compaction. The compaction process invokes this @@ -360,6 +364,12 @@ struct Options { // deleted. // Default : 0 uint64_t WAL_ttl_seconds; + + // Number of bytes to preallocate (via fallocate) the manifest + // files. Default is 4mb, which is reasonable to reduce random IO + // as well as prevent overallocation for mounts that preallocate + // large amounts of data (such as xfs's allocsize option). + size_t manifest_preallocation_size; }; // Options that control read operations diff --git a/include/leveldb/table.h b/include/leveldb/table.h index 7cf3556c3..497312dee 100644 --- a/include/leveldb/table.h +++ b/include/leveldb/table.h @@ -5,6 +5,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_TABLE_H_ #define STORAGE_LEVELDB_INCLUDE_TABLE_H_ +#include #include #include "leveldb/iterator.h" @@ -18,6 +19,8 @@ class RandomAccessFile; struct ReadOptions; class TableCache; +using std::unique_ptr; + // A Table is a sorted map from strings to strings. Tables are // immutable and persistent. A Table may be safely accessed from // multiple threads without external synchronization. @@ -37,9 +40,9 @@ class Table { // *file must remain live while this Table is in use. static Status Open(const Options& options, uint64_t file_number, - RandomAccessFile* file, + unique_ptr&& file, uint64_t file_size, - Table** table); + unique_ptr
* table); ~Table(); @@ -56,6 +59,10 @@ class Table { // be close to the file length. uint64_t ApproximateOffsetOf(const Slice& key) const; + // Returns true if the block for the specified key is in cache. + // REQUIRES: key is in this table. + bool TEST_KeyInCache(const ReadOptions& options, const Slice& key); + private: struct Rep; Rep* rep_; @@ -78,6 +85,8 @@ class Table { void ReadMeta(const Footer& footer); void ReadFilter(const Slice& filter_handle_value); + static void SetupCacheKeyPrefix(Rep* rep); + // No copying allowed Table(const Table&); void operator=(const Table&); diff --git a/table/table.cc b/table/table.cc index 48a0abffe..eb2f5a7f7 100644 --- a/table/table.cc +++ b/table/table.cc @@ -18,6 +18,11 @@ namespace leveldb { +// The longest the prefix of the cache key used to identify blocks can be. +// We are using the fact that we know for Posix files the unique ID is three +// varints. +const size_t kMaxCacheKeyPrefixSize = kMaxVarint64Length*3+1; + struct Table::Rep { ~Rep() { delete filter; @@ -27,8 +32,9 @@ struct Table::Rep { Options options; Status status; - RandomAccessFile* file; - uint64_t cache_id; + unique_ptr file; + char cache_key_prefix[kMaxCacheKeyPrefixSize]; + size_t cache_key_prefix_size; uint64_t file_number; FilterBlockReader* filter; const char* filter_data; @@ -37,12 +43,31 @@ struct Table::Rep { Block* index_block; }; +// Helper function to setup the cache key's prefix for the Table. +void Table::SetupCacheKeyPrefix(Rep* rep) { + assert(kMaxCacheKeyPrefixSize >= 10); + rep->cache_key_prefix_size = 0; + if (rep->options.block_cache) { + rep->cache_key_prefix_size = rep->file->GetUniqueId(rep->cache_key_prefix, + kMaxCacheKeyPrefixSize); + + if (rep->cache_key_prefix_size == 0) { + // If the prefix wasn't generated or was too long, we create one from the + // cache. + char* end = EncodeVarint64(rep->cache_key_prefix, + rep->options.block_cache->NewId()); + rep->cache_key_prefix_size = + static_cast(end - rep->cache_key_prefix); + } + } +} + Status Table::Open(const Options& options, uint64_t file_number, - RandomAccessFile* file, + unique_ptr&& file, uint64_t size, - Table** table) { - *table = NULL; + unique_ptr
* table) { + table->reset(); if (size < Footer::kEncodedLength) { return Status::InvalidArgument("file is too short to be an sstable"); } @@ -68,7 +93,7 @@ Status Table::Open(const Options& options, BlockContents contents; Block* index_block = NULL; if (s.ok()) { - s = ReadBlock(file, ReadOptions(), footer.index_handle(), &contents); + s = ReadBlock(file.get(), ReadOptions(), footer.index_handle(), &contents); if (s.ok()) { index_block = new Block(contents); } @@ -79,14 +104,14 @@ Status Table::Open(const Options& options, // ready to serve requests. Rep* rep = new Table::Rep; rep->options = options; - rep->file = file; + rep->file = std::move(file); rep->metaindex_handle = footer.metaindex_handle(); rep->index_block = index_block; - rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0); + SetupCacheKeyPrefix(rep); rep->file_number = file_number; rep->filter_data = NULL; rep->filter = NULL; - *table = new Table(rep); + table->reset(new Table(rep)); (*table)->ReadMeta(footer); } else { if (index_block) delete index_block; @@ -104,7 +129,8 @@ void Table::ReadMeta(const Footer& footer) { // it is an empty block. ReadOptions opt; BlockContents contents; - if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) { + if (!ReadBlock(rep_->file.get(), opt, footer.metaindex_handle(), + &contents).ok()) { // Do not propagate errors since meta info is not needed for operation return; } @@ -132,7 +158,7 @@ void Table::ReadFilter(const Slice& filter_handle_value) { // requiring checksum verification in Table::Open. ReadOptions opt; BlockContents block; - if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) { + if (!ReadBlock(rep_->file.get(), opt, filter_handle, &block).ok()) { return; } if (block.heap_allocated) { @@ -187,7 +213,7 @@ Iterator* Table::BlockReader(void* arg, const Slice& index_value, bool* didIO) { Table* table = reinterpret_cast(arg); - Cache* block_cache = table->rep_->options.block_cache; + Cache* block_cache = table->rep_->options.block_cache.get(); Statistics* const statistics = table->rep_->options.statistics; Block* block = NULL; Cache::Handle* cache_handle = NULL; @@ -201,17 +227,22 @@ Iterator* Table::BlockReader(void* arg, if (s.ok()) { BlockContents contents; if (block_cache != NULL) { - char cache_key_buffer[16]; - EncodeFixed64(cache_key_buffer, table->rep_->cache_id); - EncodeFixed64(cache_key_buffer+8, handle.offset()); - Slice key(cache_key_buffer, sizeof(cache_key_buffer)); + char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; + const size_t cache_key_prefix_size = table->rep_->cache_key_prefix_size; + assert(cache_key_prefix_size != 0); + assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize); + memcpy(cache_key, table->rep_->cache_key_prefix, + cache_key_prefix_size); + char* end = EncodeVarint64(cache_key + cache_key_prefix_size, + handle.offset()); + Slice key(cache_key, static_cast(end-cache_key)); cache_handle = block_cache->Lookup(key); if (cache_handle != NULL) { block = reinterpret_cast(block_cache->Value(cache_handle)); RecordTick(statistics, BLOCK_CACHE_HIT); } else { - s = ReadBlock(table->rep_->file, options, handle, &contents); + s = ReadBlock(table->rep_->file.get(), options, handle, &contents); if (s.ok()) { block = new Block(contents); if (contents.cachable && options.fill_cache) { @@ -226,7 +257,7 @@ Iterator* Table::BlockReader(void* arg, RecordTick(statistics, BLOCK_CACHE_MISS); } } else { - s = ReadBlock(table->rep_->file, options, handle, &contents); + s = ReadBlock(table->rep_->file.get(), options, handle, &contents); if (s.ok()) { block = new Block(contents); } @@ -315,6 +346,17 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k, return s; } +void SaveDidIO(void* arg, const Slice& key, const Slice& value, bool didIO) { + *reinterpret_cast(arg) = didIO; +} +bool Table::TEST_KeyInCache(const ReadOptions& options, const Slice& key) { + // We use InternalGet() as it has logic that checks whether we read the + // block from the disk or not. + bool didIO = false; + Status s = InternalGet(options, key, &didIO, SaveDidIO); + assert(s.ok()); + return !didIO; +} uint64_t Table::ApproximateOffsetOf(const Slice& key) const { Iterator* index_iter = diff --git a/table/table_builder.cc b/table/table_builder.cc index d867a1ca9..975d773fa 100644 --- a/table/table_builder.cc +++ b/table/table_builder.cc @@ -156,15 +156,14 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) { CompressionType type; // If the use has specified a different compression level for each level, // then pick the compresison for that level. - if (r->options.compression_per_level != NULL) { - if (level_ == -1) { - // this is mostly for backward compatibility. The builder does not - // know which level this file belongs to. Apply the compression level - // specified for level 0 to all levels. - type = r->options.compression_per_level[0]; - } else { - type = r->options.compression_per_level[level_]; - } + if (!r->options.compression_per_level.empty()) { + const int n = r->options.compression_per_level.size(); + // It is possible for level_ to be -1; in that case, we use level + // 0's compression. This occurs mostly in backwards compatibility + // situations when the builder doesn't know what level the file + // belongs to. Likewise, if level_ is beyond the end of the + // specified compression levels, use the last value. + type = r->options.compression_per_level[std::max(0, std::min(level_, n))]; } else { type = r->options.compression; } diff --git a/table/table_test.cc b/table/table_test.cc index 9ca518ee6..a720a88f4 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -109,8 +109,8 @@ class StringSink: public WritableFile { class StringSource: public RandomAccessFile { public: - StringSource(const Slice& contents) - : contents_(contents.data(), contents.size()) { + StringSource(const Slice& contents, uint64_t uniq_id) + : contents_(contents.data(), contents.size()), uniq_id_(uniq_id) { } virtual ~StringSource() { } @@ -130,8 +130,20 @@ class StringSource: public RandomAccessFile { return Status::OK(); } + virtual size_t GetUniqueId(char* id, size_t max_size) const { + if (max_size < 20) { + return 0; + } + + char* rid = id; + rid = EncodeVarint64(rid, uniq_id_); + rid = EncodeVarint64(rid, 0); + return static_cast(rid-id); + } + private: std::string contents_; + uint64_t uniq_id_; }; typedef std::map KVMap; @@ -221,16 +233,15 @@ class BlockConstructor: public Constructor { class TableConstructor: public Constructor { public: TableConstructor(const Comparator* cmp) - : Constructor(cmp), - source_(NULL), table_(NULL) { + : Constructor(cmp) { } ~TableConstructor() { Reset(); } virtual Status FinishImpl(const Options& options, const KVMap& data) { Reset(); - StringSink sink; - TableBuilder builder(options, &sink); + sink_.reset(new StringSink()); + TableBuilder builder(options, sink_.get()); for (KVMap::const_iterator it = data.begin(); it != data.end(); @@ -241,16 +252,14 @@ class TableConstructor: public Constructor { Status s = builder.Finish(); ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(sink.contents().size(), builder.FileSize()); + ASSERT_EQ(sink_->contents().size(), builder.FileSize()); // Open the table - source_ = new StringSource(sink.contents()); - Options table_options; - table_options.comparator = options.comparator; - table_options.compression_opts = options.compression_opts; + uniq_id_ = cur_uniq_id_++; + source_.reset(new StringSource(sink_->contents(), uniq_id_)); // Give the table an arbitrary file number. - return Table::Open(table_options, 9001u, source_, sink.contents().size(), - &table_); + return Table::Open(options, 9001u, std::move(source_), + sink_->contents().size(), &table_); } virtual Iterator* NewIterator() const { @@ -261,19 +270,34 @@ class TableConstructor: public Constructor { return table_->ApproximateOffsetOf(key); } + virtual Status Reopen(const Options& options) { + source_.reset(new StringSource(sink_->contents(), uniq_id_)); + return Table::Open(options, 9001u, std::move(source_), + sink_->contents().size(), &table_); + } + + virtual Table* table() { + return table_.get(); + } + private: void Reset() { - delete table_; - delete source_; - table_ = NULL; - source_ = NULL; + uniq_id_ = 0; + table_.reset(); + sink_.reset(); + source_.reset(); } - StringSource* source_; - Table* table_; + uint64_t uniq_id_; + unique_ptr sink_; + unique_ptr source_; + unique_ptr
table_; TableConstructor(); + + static uint64_t cur_uniq_id_; }; +uint64_t TableConstructor::cur_uniq_id_ = 1; // A helper class that converts internal format keys into user keys class KeyConvertingIterator: public Iterator { @@ -896,6 +920,44 @@ TEST(TableTest, ApproximateOffsetOfCompressed) { } +TEST(TableTest, BlockCacheLeak) { + // Check that when we reopen a table we don't lose access to blocks already + // in the cache. This test checks whether the Table actually makes use of the + // unique ID from the file. + + Options opt; + opt.block_size = 1024; + opt.compression = kNoCompression; + opt.block_cache = NewLRUCache(16*1024*1024); // big enough so we don't ever + // lose cached values. + + TableConstructor c(BytewiseComparator()); + c.Add("k01", "hello"); + c.Add("k02", "hello2"); + c.Add("k03", std::string(10000, 'x')); + c.Add("k04", std::string(200000, 'x')); + c.Add("k05", std::string(300000, 'x')); + c.Add("k06", "hello3"); + c.Add("k07", std::string(100000, 'x')); + std::vector keys; + KVMap kvmap; + c.Finish(opt, &keys, &kvmap); + + unique_ptr iter(c.NewIterator()); + iter->SeekToFirst(); + while (iter->Valid()) { + iter->key(); + iter->value(); + iter->Next(); + } + ASSERT_OK(iter->status()); + + ASSERT_OK(c.Reopen(opt)); + for (const std::string& key: keys) { + ASSERT_TRUE(c.table()->TEST_KeyInCache(ReadOptions(), key)); + } +} + } // namespace leveldb int main(int argc, char** argv) { diff --git a/tools/db_repl_stress.cc b/tools/db_repl_stress.cc index 091b0ca58..582e567ee 100644 --- a/tools/db_repl_stress.cc +++ b/tools/db_repl_stress.cc @@ -51,18 +51,13 @@ struct ReplicationThread { volatile bool has_more; }; -// experimenting with isNull. Makes code more readable? -static inline bool isNull(const void * const ptr) { - return ptr == NULL; -} - static void ReplicationThreadBody(void* arg) { ReplicationThread* t = reinterpret_cast(arg); DB* db = t->db; - TransactionLogIterator* iter = NULL; + unique_ptr iter; SequenceNumber currentSeqNum = 0; while (t->stop.Acquire_Load() != NULL) { - if (isNull(iter)) { + if (!iter) { db->GetUpdatesSince(currentSeqNum, &iter); fprintf(stdout, "Refreshing iterator\n"); iter->Next(); @@ -83,8 +78,7 @@ static void ReplicationThreadBody(void* arg) { t->no_read++; } } - delete iter; - iter = NULL; + iter.reset(); } } diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 02934c85e..5a5e41dda 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -433,7 +433,6 @@ class StressTest { ~StressTest() { delete db_; - delete cache_; delete filter_policy_; } @@ -757,7 +756,7 @@ class StressTest { } private: - Cache* cache_; + shared_ptr cache_; const FilterPolicy* filter_policy_; DB* db_; int num_times_reopened_; diff --git a/tools/ldb.cc b/tools/ldb.cc index 7585e75d3..56e3d9670 100644 --- a/tools/ldb.cc +++ b/tools/ldb.cc @@ -10,36 +10,53 @@ class LDBCommandRunner { public: static void PrintHelp(const char* exec_name) { - std::string ret; - ret.append("--- compact ----:\n"); - ret.append(exec_name); - ret.append(" compact "); - Compactor::Help(ret); - - ret.append("\n--- dump ----:\n"); - ret.append(exec_name); - ret.append(" dump "); - DBDumper::Help(ret); - - ret.append("\n--- load ----:\n"); - ret.append(exec_name); - ret.append(" load "); - DBLoader::Help(ret); - - ret.append("\n--- query ----:\n"); - ret.append(exec_name); - ret.append(" query "); - DBQuerier::Help(ret); - - ret.append("\n---reduce_levels ----:\n"); - ret.append(exec_name); - ret.append(" reduce_levels "); - ReduceDBLevels::Help(ret); - - ret.append("\n---dump_wal----:\n"); - ret.append(exec_name); - ret.append(" dump_wal "); - WALDumper::Help(ret); + string ret; + + ret.append("ldb - LevelDB Tool"); + ret.append("\n\n"); + ret.append("All commands MUST specify --" + LDBCommand::ARG_DB + + "=\n"); + ret.append("\n"); + ret.append("The following optional parameters control if keys/values are " + "input/output as hex or as plain strings:\n"); + ret.append(" --" + LDBCommand::ARG_KEY_HEX + + " : Keys are input/output as hex\n"); + ret.append(" --" + LDBCommand::ARG_VALUE_HEX + + " : Values are input/output as hex\n"); + ret.append(" --" + LDBCommand::ARG_HEX + + " : Both keys and values are input/output as hex\n"); + ret.append("\n"); + + ret.append("The following optional parameters control the database " + "internals:\n"); + ret.append(" --" + LDBCommand::ARG_BLOOM_BITS + "=\n"); + ret.append(" --" + LDBCommand::ARG_COMPRESSION_TYPE + + "=\n"); + ret.append(" --" + LDBCommand::ARG_BLOCK_SIZE + + "=\n"); + ret.append(" --" + LDBCommand::ARG_AUTO_COMPACTION + "=\n"); + ret.append(" --" + LDBCommand::ARG_WRITE_BUFFER_SIZE + + "=\n"); + ret.append(" --" + LDBCommand::ARG_FILE_SIZE + "=\n"); + + ret.append("\n\n"); + ret.append("Data Access Commands:\n"); + PutCommand::Help(ret); + GetCommand::Help(ret); + BatchPutCommand::Help(ret); + ScanCommand::Help(ret); + DeleteCommand::Help(ret); + DBQuerierCommand::Help(ret); + ApproxSizeCommand::Help(ret); + + ret.append("\n\n"); + ret.append("Admin Commands:\n"); + WALDumperCommand::Help(ret); + CompactorCommand::Help(ret); + ReduceDBLevelsCommand::Help(ret); + DBDumperCommand::Help(ret); + DBLoaderCommand::Help(ret); + fprintf(stderr, "%s\n", ret.c_str()); } @@ -48,46 +65,26 @@ public: PrintHelp(argv[0]); exit(1); } - const char* cmd = argv[1]; - std::string db_name; - std::vector args; - for (int i = 2; i < argc; i++) { - if (strncmp(argv[i], "--db=", strlen("--db=")) == 0) { - db_name = argv[i] + strlen("--db="); - } else { - args.push_back(argv[i]); - } - } - LDBCommand* cmdObj = NULL; - if (strcmp(cmd, "compact") == 0) { - // run compactor - cmdObj = new Compactor(db_name, args); - } else if (strcmp(cmd, "dump") == 0) { - // run dump - cmdObj = new DBDumper(db_name, args); - } else if (strcmp(cmd, "load") == 0) { - // run loader - cmdObj = new DBLoader(db_name, args); - } else if (strcmp(cmd, "query") == 0) { - // run querier - cmdObj = new DBQuerier(db_name, args); - } else if (strcmp(cmd, "reduce_levels") == 0) { - // reduce db levels - cmdObj = new ReduceDBLevels(db_name, args); - } else if (strcmp(cmd, "dump_wal") == 0) { - cmdObj = new WALDumper(args); - } else { - fprintf(stderr, "Unknown command: %s\n", cmd); + LDBCommand* cmdObj = LDBCommand::InitFromCmdLineArgs(argc, argv); + if (cmdObj == NULL) { + fprintf(stderr, "Unknown command\n"); PrintHelp(argv[0]); exit(1); } + if (!cmdObj->ValidateCmdLineOptions()) { + exit(1); + } + cmdObj->Run(); LDBCommandExecuteResult ret = cmdObj->GetExecuteState(); fprintf(stderr, "%s\n", ret.ToString().c_str()); delete cmdObj; + + exit(ret.IsFailed()); } + }; } diff --git a/tools/ldb_test.py b/tools/ldb_test.py new file mode 100644 index 000000000..142a5a890 --- /dev/null +++ b/tools/ldb_test.py @@ -0,0 +1,308 @@ +import os +import os.path +import shutil +import subprocess +import time +import unittest +import tempfile + +def my_check_output(*popenargs, **kwargs): + """ + If we had python 2.7, we should simply use subprocess.check_output. + This is a stop-gap solution for python 2.6 + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise Exception("Exit code is not 0. It is %d. Command: %s" % + (retcode, cmd)) + return output + + +class LDBTestCase(unittest.TestCase): + def setUp(self): + self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_") + self.DB_NAME = "testdb" + + def tearDown(self): + assert(self.TMP_DIR.strip() != "/" + and self.TMP_DIR.strip() != "/tmp" + and self.TMP_DIR.strip() != "/tmp/") #Just some paranoia + + shutil.rmtree(self.TMP_DIR) + + def dbParam(self, dbName): + return "--db=%s" % os.path.join(self.TMP_DIR, dbName) + + def assertRunOKFull(self, params, expectedOutput): + """ + All command-line params must be specified. + Allows full flexibility in testing; for example: missing db param. + + """ + + output = my_check_output("./ldb %s |grep -v \"Created bg thread\"" % + params, shell=True) + self.assertEquals(output.strip(), expectedOutput.strip()); + + def assertRunFAILFull(self, params): + """ + All command-line params must be specified. + Allows full flexibility in testing; for example: missing db param. + + """ + try: + my_check_output("./ldb %s |grep -v \"Created bg thread\"" % params, + shell=True) + except Exception, e: + return + self.fail( + "Exception should have been raised for command with params: %s" % + params) + + def assertRunOK(self, params, expectedOutput): + """ + Uses the default test db. + + """ + self.assertRunOKFull("%s %s" % (self.dbParam(self.DB_NAME), params), + expectedOutput) + + def assertRunFAIL(self, params): + """ + Uses the default test db. + """ + self.assertRunFAILFull("%s %s" % (self.dbParam(self.DB_NAME), params)) + + def testSimpleStringPutGet(self): + self.assertRunFAIL("put x1 y1") + self.assertRunOK("put --create_if_missing x1 y1", "OK") + self.assertRunOK("get x1", "y1") + self.assertRunFAIL("get x2") + + self.assertRunOK("put x2 y2", "OK") + self.assertRunOK("get x1", "y1") + self.assertRunOK("get x2", "y2") + self.assertRunFAIL("get x3") + + self.assertRunOK("scan --from=x1 --to=z", "x1 : y1\nx2 : y2") + self.assertRunOK("put x3 y3", "OK") + + self.assertRunOK("scan --from=x1 --to=z", "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK("scan --from=x", "x1 : y1\nx2 : y2\nx3 : y3") + + self.assertRunOK("scan --to=x2", "x1 : y1") + self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 : y1") + self.assertRunOK("scan --from=x1 --to=z --max_keys=2", + "x1 : y1\nx2 : y2") + + self.assertRunOK("scan --from=x1 --to=z --max_keys=3", + "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK("scan --from=x1 --to=z --max_keys=4", + "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK("scan --from=x1 --to=x2", "x1 : y1") + self.assertRunOK("scan --from=x2 --to=x4", "x2 : y2\nx3 : y3") + self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL + self.assertRunFAIL("scan --from=x1 --to=z --max_keys=foo") + + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3") + + self.assertRunOK("delete x1", "OK") + self.assertRunOK("scan", "x2 : y2\nx3 : y3") + + self.assertRunOK("delete NonExistentKey", "OK") + # It is wierd that GET and SCAN raise exception for + # non-existent key, while delete does not + + def dumpDb(self, params, dumpFile): + return 0 == os.system("./ldb dump %s > %s" % (params, dumpFile)) + + def loadDb(self, params, dumpFile): + return 0 == os.system("cat %s | ./ldb load %s" % (dumpFile, params)) + + def testStringBatchPut(self): + self.assertRunOK("batchput x1 y1 --create_if_missing", "OK") + self.assertRunOK("scan", "x1 : y1") + self.assertRunOK("batchput x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK") + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") + self.assertRunFAIL("batchput") + self.assertRunFAIL("batchput k1") + self.assertRunFAIL("batchput k1 v1 k2") + + + def testHexPutGet(self): + self.assertRunOK("put a1 b1 --create_if_missing", "OK") + self.assertRunOK("scan", "a1 : b1") + self.assertRunOK("scan --hex", "0x6131 : 0x6231") + self.assertRunFAIL("put --hex 6132 6232") + self.assertRunOK("put --hex 0x6132 0x6232", "OK") + self.assertRunOK("scan --hex", "0x6131 : 0x6231\n0x6132 : 0x6232") + self.assertRunOK("scan", "a1 : b1\na2 : b2") + self.assertRunOK("get a1", "b1") + self.assertRunOK("get --hex 0x6131", "0x6231") + self.assertRunOK("get a2", "b2") + self.assertRunOK("get --hex 0x6132", "0x6232") + self.assertRunOK("get --key_hex 0x6132", "b2") + self.assertRunOK("get --key_hex --value_hex 0x6132", "0x6232") + self.assertRunOK("get --value_hex a2", "0x6232") + self.assertRunOK("scan --key_hex --value_hex", + "0x6131 : 0x6231\n0x6132 : 0x6232") + self.assertRunOK("scan --hex --from=0x6131 --to=0x6133", + "0x6131 : 0x6231\n0x6132 : 0x6232") + self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", + "0x6131 : 0x6231") + self.assertRunOK("scan --key_hex", "0x6131 : b1\n0x6132 : b2") + self.assertRunOK("scan --value_hex", "a1 : 0x6231\na2 : 0x6232") + self.assertRunOK("batchput --hex 0x6133 0x6233 0x6134 0x6234", "OK") + self.assertRunOK("scan", "a1 : b1\na2 : b2\na3 : b3\na4 : b4") + self.assertRunOK("delete --hex 0x6133", "OK") + self.assertRunOK("scan", "a1 : b1\na2 : b2\na4 : b4") + + + def testInvalidCmdLines(self): + # db not specified + self.assertRunFAILFull("put 0x6133 0x6233 --hex --create_if_missing") + # No param called he + self.assertRunFAIL("put 0x6133 0x6233 --he --create_if_missing") + # max_keys is not applicable for put + self.assertRunFAIL("put 0x6133 0x6233 --max_keys=1 --create_if_missing") + # hex has invalid boolean value + self.assertRunFAIL("put 0x6133 0x6233 --hex=Boo --create_if_missing") + + + def testDumpLoad(self): + self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", + "OK") + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + + # Dump and load without any additional params specified + dumpFilePath = os.path.join(self.TMP_DIR, "dump1") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump1") + self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) + self.assertTrue(self.loadDb( + "--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, + "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + # Dump and load in hex + dumpFilePath = os.path.join(self.TMP_DIR, "dump2") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump2") + self.assertTrue(self.dumpDb("--db=%s --hex" % origDbPath, dumpFilePath)) + self.assertTrue(self.loadDb( + "--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath)) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, + "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + # Dump only a portion of the key range + dumpFilePath = os.path.join(self.TMP_DIR, "dump3") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump3") + self.assertTrue(self.dumpDb( + "--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath)) + self.assertTrue(self.loadDb( + "--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2") + + # Dump upto max_keys rows + dumpFilePath = os.path.join(self.TMP_DIR, "dump4") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump4") + self.assertTrue(self.dumpDb( + "--db=%s --max_keys=3" % origDbPath, dumpFilePath)) + self.assertTrue(self.loadDb( + "--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, + "x1 : y1\nx2 : y2\nx3 : y3") + + # Load into an existing db, create_if_missing is not specified + self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) + self.assertTrue(self.loadDb("--db=%s" % loadedDbPath, dumpFilePath)) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, + "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + # Dump and load with WAL disabled + dumpFilePath = os.path.join(self.TMP_DIR, "dump5") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump5") + self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) + self.assertTrue(self.loadDb( + "--db=%s --disable_wal --create_if_missing" % loadedDbPath, + dumpFilePath)) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, + "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + # Dump and load with lots of extra params specified + extraParams = " ".join(["--bloom_bits=14", "--compression_type=bzip2", + "--block_size=1024", "--auto_compaction=true", + "--write_buffer_size=4194304", + "--file_size=2097152"]) + dumpFilePath = os.path.join(self.TMP_DIR, "dump6") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump6") + self.assertTrue(self.dumpDb( + "--db=%s %s" % (origDbPath, extraParams), dumpFilePath)) + self.assertTrue(self.loadDb( + "--db=%s %s --create_if_missing" % (loadedDbPath, extraParams), + dumpFilePath)) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, + "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + # Dump with count_only + dumpFilePath = os.path.join(self.TMP_DIR, "dump7") + loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump7") + self.assertTrue(self.dumpDb( + "--db=%s --count_only" % origDbPath, dumpFilePath)) + self.assertTrue(self.loadDb( + "--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) + # DB should have atleast one value for scan to work + self.assertRunOKFull("put --db=%s k1 v1" % loadedDbPath, "OK") + self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 : v1") + + # Dump command fails because of typo in params + dumpFilePath = os.path.join(self.TMP_DIR, "dump8") + self.assertFalse(self.dumpDb( + "--db=%s --create_if_missin" % origDbPath, dumpFilePath)) + + + def testMiscAdminTask(self): + # These tests need to be improved; for example with asserts about + # whether compaction or level reduction actually took place. + self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", + "OK") + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + + self.assertTrue(0 == os.system("./ldb compact --db=%s" % origDbPath)) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + self.assertTrue(0 == os.system( + "./ldb reduce_levels --db=%s --new_levels=2" % origDbPath)) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + self.assertTrue(0 == os.system( + "./ldb reduce_levels --db=%s --new_levels=3" % origDbPath)) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + self.assertTrue(0 == os.system( + "./ldb compact --db=%s --from=x1 --to=x3" % origDbPath)) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + self.assertTrue(0 == os.system( + "./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" % + origDbPath)) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + #TODO(dilip): Not sure what should be passed to WAL.Currently corrupted. + self.assertTrue(0 == os.system( + "./ldb dump_wal --db=%s --walfile=%s --header" % ( + origDbPath, os.path.join(origDbPath, "LOG")))) + self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + + +if __name__ == "__main__": + unittest.main() + diff --git a/tools/reduce_levels_test.cc b/tools/reduce_levels_test.cc index f7e9377c2..1e48e296e 100644 --- a/tools/reduce_levels_test.cc +++ b/tools/reduce_levels_test.cc @@ -84,11 +84,13 @@ Status ReduceLevelTest::OpenDB(bool create_if_missing, int num_levels, } bool ReduceLevelTest::ReduceLevels(int target_level) { - std::vector args = leveldb::ReduceDBLevels::PrepareArgs( - target_level, false); - ReduceDBLevels level_reducer(dbname_, args); - level_reducer.Run(); - return level_reducer.GetExecuteState().IsSucceed(); + std::vector args = leveldb::ReduceDBLevelsCommand::PrepareArgs( + dbname_, target_level, false); + LDBCommand* level_reducer = LDBCommand::InitFromCmdLineArgs(args); + level_reducer->Run(); + bool is_succeed = level_reducer->GetExecuteState().IsSucceed(); + delete level_reducer; + return is_succeed; } TEST(ReduceLevelTest, Last_Level) { diff --git a/tools/sst_dump.cc b/tools/sst_dump.cc index 2f31f05c3..c35547be6 100644 --- a/tools/sst_dump.cc +++ b/tools/sst_dump.cc @@ -48,16 +48,16 @@ SstFileReader::SstFileReader(std::string file_path, Status SstFileReader::ReadSequential(bool print_kv, uint64_t file_number, uint64_t read_num) { - Table* table; + unique_ptr
table; Options table_options; - RandomAccessFile* file = NULL; + unique_ptr file; Status s = table_options.env->NewRandomAccessFile(file_name_, &file); if(!s.ok()) { return s; } uint64_t file_size; table_options.env->GetFileSize(file_name_, &file_size); - s = Table::Open(table_options, file_number, file, file_size, &table); + s = Table::Open(table_options, file_number, std::move(file), file_size, &table); if(!s.ok()) { return s; } diff --git a/util/auto_split_logger.h b/util/auto_split_logger.h index 0a7697771..eada68a02 100644 --- a/util/auto_split_logger.h +++ b/util/auto_split_logger.h @@ -26,7 +26,7 @@ class AutoSplitLogger : public Logger { std::string db_log_dir_; std::string db_absolute_path_; Env* env_; - UnderlyingLogger* logger_; + shared_ptr logger_; const size_t MAX_LOG_FILE_SIZE; Status status_; @@ -42,7 +42,6 @@ class AutoSplitLogger : public Logger { log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_); InitLogger(); } - ~AutoSplitLogger() { delete logger_; } virtual void Logv(const char* format, va_list ap) { assert(GetStatus().ok()); @@ -50,7 +49,6 @@ class AutoSplitLogger : public Logger { logger_->Logv(format, ap); // Check if the log file should be splitted. if (logger_->GetLogFileSize() > MAX_LOG_FILE_SIZE) { - delete logger_; std::string old_fname = OldInfoLogFileName( dbname_, env_->NowMicros(), db_absolute_path_, db_log_dir_); env_->RenameFile(log_fname_, old_fname); diff --git a/util/build_version.h b/util/build_version.h index bcf869372..516c6404b 100644 --- a/util/build_version.h +++ b/util/build_version.h @@ -4,7 +4,6 @@ // these variables tell us about the git config and time extern const char* leveldb_build_git_sha; -extern const char* leveldb_build_git_datetime; // these variables tell us when the compilation occured extern const char* leveldb_build_compile_time; diff --git a/util/cache.cc b/util/cache.cc index 7dce68d72..bd61d4802 100644 --- a/util/cache.cc +++ b/util/cache.cc @@ -461,15 +461,15 @@ class ShardedLRUCache : public Cache { } // end anonymous namespace -Cache* NewLRUCache(size_t capacity) { - return new ShardedLRUCache(capacity); +shared_ptr NewLRUCache(size_t capacity) { + return std::make_shared(capacity); } -Cache* NewLRUCache(size_t capacity, int numShardBits) { +shared_ptr NewLRUCache(size_t capacity, int numShardBits) { if (numShardBits >= 20) { return NULL; // the cache cannot be sharded into too many fine pieces } - return new ShardedLRUCache(capacity, numShardBits); + return std::make_shared(capacity, numShardBits); } } // namespace leveldb diff --git a/util/cache_test.cc b/util/cache_test.cc index 78e9e59e2..ca6eb2b8a 100644 --- a/util/cache_test.cc +++ b/util/cache_test.cc @@ -35,14 +35,13 @@ class CacheTest { static const int kCacheSize = 1000; std::vector deleted_keys_; std::vector deleted_values_; - Cache* cache_; + shared_ptr cache_; CacheTest() : cache_(NewLRUCache(kCacheSize)) { current_ = this; } ~CacheTest() { - delete cache_; } int Lookup(int key) { diff --git a/util/coding.h b/util/coding.h index d70bab7b6..19fa4acb1 100644 --- a/util/coding.h +++ b/util/coding.h @@ -18,6 +18,10 @@ namespace leveldb { +// The maximum length of a varint in bytes for 32 and 64 bits respectively. +const unsigned int kMaxVarint32Length = 5; +const unsigned int kMaxVarint64Length = 10; + // Standard Put... routines append to a string extern void PutFixed32(std::string* dst, uint32_t value); extern void PutFixed64(std::string* dst, uint64_t value); diff --git a/util/env.cc b/util/env.cc index c2600e964..0aaa03ae7 100644 --- a/util/env.cc +++ b/util/env.cc @@ -25,7 +25,16 @@ FileLock::~FileLock() { } void Log(Logger* info_log, const char* format, ...) { - if (info_log != NULL) { + if (info_log) { + va_list ap; + va_start(ap, format); + info_log->Logv(format, ap); + va_end(ap); + } +} + +void Log(const shared_ptr& info_log, const char* format, ...) { + if (info_log) { va_list ap; va_start(ap, format); info_log->Logv(format, ap); @@ -36,7 +45,7 @@ void Log(Logger* info_log, const char* format, ...) { static Status DoWriteStringToFile(Env* env, const Slice& data, const std::string& fname, bool should_sync) { - WritableFile* file; + unique_ptr file; Status s = env->NewWritableFile(fname, &file); if (!s.ok()) { return s; @@ -45,10 +54,6 @@ static Status DoWriteStringToFile(Env* env, const Slice& data, if (s.ok() && should_sync) { s = file->Sync(); } - if (s.ok()) { - s = file->Close(); - } - delete file; // Will auto-close if we did not close above if (!s.ok()) { env->DeleteFile(fname); } @@ -67,7 +72,7 @@ Status WriteStringToFileSync(Env* env, const Slice& data, Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { data->clear(); - SequentialFile* file; + unique_ptr file; Status s = env->NewSequentialFile(fname, &file); if (!s.ok()) { return s; @@ -86,7 +91,6 @@ Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { } } delete[] space; - delete file; return s; } diff --git a/util/env_hdfs.cc b/util/env_hdfs.cc index 095737ca3..65b92c9e9 100644 --- a/util/env_hdfs.cc +++ b/util/env_hdfs.cc @@ -490,7 +490,8 @@ Status HdfsEnv::UnlockFile(FileLock* lock) { return Status::OK(); } -Status HdfsEnv::NewLogger(const std::string& fname, Logger** result) { +Status HdfsEnv::NewLogger(const std::string& fname, + shared_ptr* result) { HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname); if (f == NULL || !f->isValid()) { *result = NULL; @@ -515,7 +516,7 @@ Status HdfsEnv::NewLogger(const std::string& fname, Logger** result) { #include "hdfs/env_hdfs.h" namespace leveldb { Status HdfsEnv::NewSequentialFile(const std::string& fname, - SequentialFile** result) { + unique_ptr* result) { return Status::NotSupported("Not compiled with hdfs support"); } } diff --git a/util/env_posix.cc b/util/env_posix.cc index 7a6a0f80c..a33acfe07 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -11,18 +11,23 @@ #include #include #include +#include #include #include #include #include #include #include +#if defined(OS_LINUX) +#include +#endif #if defined(LEVELDB_PLATFORM_ANDROID) #include #endif #include "leveldb/env.h" #include "leveldb/slice.h" #include "port/port.h" +#include "util/coding.h" #include "util/logging.h" #include "util/posix_logger.h" @@ -108,6 +113,35 @@ class PosixRandomAccessFile: public RandomAccessFile { } return s; } + +#if defined(OS_LINUX) + virtual size_t GetUniqueId(char* id, size_t max_size) const { + // TODO: possibly allow this function to handle tighter bounds. + if (max_size < kMaxVarint64Length*3) { + return 0; + } + + struct stat buf; + int result = fstat(fd_, &buf); + if (result == -1) { + return 0; + } + + long version = 0; + result = ioctl(fd_, FS_IOC_GETVERSION, &version); + if (result == -1) { + return 0; + } + uint64_t uversion = (uint64_t)version; + + char* rid = id; + rid = EncodeVarint64(rid, buf.st_dev); + rid = EncodeVarint64(rid, buf.st_ino); + rid = EncodeVarint64(rid, uversion); + assert(rid >= id); + return static_cast(rid-id); + } +#endif }; // mmap() based random-access @@ -232,6 +266,7 @@ class PosixMmapFile : public WritableFile { virtual Status Append(const Slice& data) { const char* src = data.data(); size_t left = data.size(); + PrepareWrite(GetFileSize(), left); while (left > 0) { assert(base_ <= dst_); assert(dst_ <= limit_); @@ -330,6 +365,16 @@ class PosixMmapFile : public WritableFile { size_t used = dst_ - base_; return file_offset_ + used; } + +#ifdef OS_LINUX + virtual Status Allocate(off_t offset, off_t len) { + if (!fallocate(fd_, FALLOC_FL_KEEP_SIZE, offset, len)) { + return Status::OK(); + } else { + return IOError(filename_, errno); + } + } +#endif }; // Use posix write to write data to a file. @@ -371,6 +416,7 @@ class PosixWritableFile : public WritableFile { pending_sync_ = true; pending_fsync_ = true; + PrepareWrite(GetFileSize(), left); // if there is no space in the cache, then flush if (cursize_ + left > capacity_) { s = Flush(); @@ -455,6 +501,16 @@ class PosixWritableFile : public WritableFile { virtual uint64_t GetFileSize() { return filesize_; } + +#ifdef OS_LINUX + virtual Status Allocate(off_t offset, off_t len) { + if (!fallocate(fd_, FALLOC_FL_KEEP_SIZE, offset, len)) { + return Status::OK(); + } else { + return IOError(filename_, errno); + } + } +#endif }; static int LockOrUnlock(const std::string& fname, int fd, bool lock) { @@ -510,20 +566,21 @@ class PosixEnv : public Env { } virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result) { + unique_ptr* result) { + result->reset(); FILE* f = fopen(fname.c_str(), "r"); if (f == NULL) { *result = NULL; return IOError(fname, errno); } else { - *result = new PosixSequentialFile(fname, f); + result->reset(new PosixSequentialFile(fname, f)); return Status::OK(); } } virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result) { - *result = NULL; + unique_ptr* result) { + result->reset(); Status s; int fd = open(fname.c_str(), O_RDONLY); if (fd < 0) { @@ -537,30 +594,30 @@ class PosixEnv : public Env { if (s.ok()) { void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); if (base != MAP_FAILED) { - *result = new PosixMmapReadableFile(fname, base, size); + result->reset(new PosixMmapReadableFile(fname, base, size)); } else { s = IOError(fname, errno); } } close(fd); } else { - *result = new PosixRandomAccessFile(fname, fd); + result->reset(new PosixRandomAccessFile(fname, fd)); } return s; } virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { + unique_ptr* result) { + result->reset(); Status s; const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); if (fd < 0) { - *result = NULL; s = IOError(fname, errno); } else { if (useMmapWrite) { - *result = new PosixMmapFile(fname, fd, page_size_); + result->reset(new PosixMmapFile(fname, fd, page_size_)); } else { - *result = new PosixWritableFile(fname, fd, 65536); + result->reset(new PosixWritableFile(fname, fd, 65536)); } } return s; @@ -706,13 +763,14 @@ class PosixEnv : public Env { return thread_id; } - virtual Status NewLogger(const std::string& fname, Logger** result) { + virtual Status NewLogger(const std::string& fname, + shared_ptr* result) { FILE* f = fopen(fname.c_str(), "w"); if (f == NULL) { - *result = NULL; + result->reset(); return IOError(fname, errno); } else { - *result = new PosixLogger(f, &PosixEnv::gettid); + result->reset(new PosixLogger(f, &PosixEnv::gettid)); return Status::OK(); } } diff --git a/util/env_test.cc b/util/env_test.cc index dcc1457e7..fa6483da9 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -4,7 +4,9 @@ #include "leveldb/env.h" +#include #include "port/port.h" +#include "util/coding.h" #include "util/testharness.h" namespace leveldb { @@ -97,6 +99,146 @@ TEST(EnvPosixTest, StartThread) { ASSERT_EQ(state.val, 3); } +bool IsSingleVarint(const std::string& s) { + Slice slice(s); + + uint64_t v; + if (!GetVarint64(&slice, &v)) { + return false; + } + + return slice.size() == 0; +} + +bool IsUniqueIDValid(const std::string& s) { + return !s.empty() && !IsSingleVarint(s); +} + +const size_t MAX_ID_SIZE = 100; +char temp_id[MAX_ID_SIZE]; + +TEST(EnvPosixTest, RandomAccessUniqueID) { + // Create file. + std::string fname = test::TmpDir() + "/" + "testfile"; + unique_ptr wfile; + ASSERT_OK(env_->NewWritableFile(fname, &wfile)); + + unique_ptr file; + + // Get Unique ID + ASSERT_OK(env_->NewRandomAccessFile(fname, &file)); + size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); + ASSERT_TRUE(id_size > 0); + std::string unique_id1(temp_id, id_size); + ASSERT_TRUE(IsUniqueIDValid(unique_id1)); + + // Get Unique ID again + ASSERT_OK(env_->NewRandomAccessFile(fname, &file)); + id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); + ASSERT_TRUE(id_size > 0); + std::string unique_id2(temp_id, id_size); + ASSERT_TRUE(IsUniqueIDValid(unique_id2)); + + // Get Unique ID again after waiting some time. + env_->SleepForMicroseconds(1000000); + ASSERT_OK(env_->NewRandomAccessFile(fname, &file)); + id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); + ASSERT_TRUE(id_size > 0); + std::string unique_id3(temp_id, id_size); + ASSERT_TRUE(IsUniqueIDValid(unique_id3)); + + // Check IDs are the same. + ASSERT_EQ(unique_id1, unique_id2); + ASSERT_EQ(unique_id2, unique_id3); + + // Delete the file + env_->DeleteFile(fname); +} + +// Returns true if any of the strings in ss are the prefix of another string. +bool HasPrefix(const std::unordered_set& ss) { + for (const std::string& s: ss) { + if (s.empty()) { + return true; + } + for (size_t i = 1; i < s.size(); ++i) { + if (ss.count(s.substr(0, i)) != 0) { + return true; + } + } + } + return false; +} + +TEST(EnvPosixTest, RandomAccessUniqueIDConcurrent) { + // Check whether a bunch of concurrently existing files have unique IDs. + + // Create the files + std::vector fnames; + for (int i = 0; i < 1000; ++i) { + fnames.push_back(test::TmpDir() + "/" + "testfile" + std::to_string(i)); + + // Create file. + unique_ptr wfile; + ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile)); + } + + // Collect and check whether the IDs are unique. + std::unordered_set ids; + for (const std::string fname: fnames) { + unique_ptr file; + std::string unique_id; + ASSERT_OK(env_->NewRandomAccessFile(fname, &file)); + size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); + ASSERT_TRUE(id_size > 0); + unique_id = std::string(temp_id, id_size); + ASSERT_TRUE(IsUniqueIDValid(unique_id)); + + ASSERT_TRUE(ids.count(unique_id) == 0); + ids.insert(unique_id); + } + + // Delete the files + for (const std::string fname: fnames) { + ASSERT_OK(env_->DeleteFile(fname)); + } + + ASSERT_TRUE(!HasPrefix(ids)); +} + +TEST(EnvPosixTest, RandomAccessUniqueIDDeletes) { + std::string fname = test::TmpDir() + "/" + "testfile"; + + // Check that after file is deleted we don't get same ID again in a new file. + std::unordered_set ids; + for (int i = 0; i < 1000; ++i) { + // Create file. + { + unique_ptr wfile; + ASSERT_OK(env_->NewWritableFile(fname, &wfile)); + } + + // Get Unique ID + std::string unique_id; + { + unique_ptr file; + ASSERT_OK(env_->NewRandomAccessFile(fname, &file)); + size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); + ASSERT_TRUE(id_size > 0); + unique_id = std::string(temp_id, id_size); + } + + ASSERT_TRUE(IsUniqueIDValid(unique_id)); + ASSERT_TRUE(ids.count(unique_id) == 0); + ids.insert(unique_id); + + // Delete the file + ASSERT_OK(env_->DeleteFile(fname)); + } + + ASSERT_TRUE(!HasPrefix(ids)); +} + } // namespace leveldb int main(int argc, char** argv) { diff --git a/util/histogram.cc b/util/histogram.cc index bb95f583e..089789cd6 100644 --- a/util/histogram.cc +++ b/util/histogram.cc @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include #include #include #include "port/port.h" @@ -9,7 +10,10 @@ namespace leveldb { -const double Histogram::kBucketLimit[kNumBuckets] = { +HistogramBucketMapper::HistogramBucketMapper() : + // Add newer bucket index here. + // Should be alwyas added in sorted order. + bucketValues_({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000, @@ -24,30 +28,55 @@ const double Histogram::kBucketLimit[kNumBuckets] = { 70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000, 180000000, 200000000, 250000000, 300000000, 350000000, 400000000, 450000000, 500000000, 600000000, 700000000, 800000000, 900000000, - 1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000, - 2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0, - 5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0, - 1e200, -}; + 1000000000}), + maxBucketValue_(bucketValues_.back()), + minBucketValue_(bucketValues_.front()) { + for (size_t i =0; i < bucketValues_.size(); ++i) { + valueIndexMap_[bucketValues_[i]] = i; + } +} + +const size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const { + if (value >= maxBucketValue_) { + return bucketValues_.size() - 1; + } else if ( value >= minBucketValue_ ) { + std::map::const_iterator lowerBound = + valueIndexMap_.lower_bound(value); + if (lowerBound != valueIndexMap_.end()) { + return lowerBound->second; + } else { + return 0; + } + } else { + return 0; + } +} + +namespace { + const HistogramBucketMapper bucketMapper; +} + + +Histogram::Histogram() : + min_(bucketMapper.LastValue()), + max_(0), + num_(0), + sum_(0), + sum_squares_(0), + buckets_(std::vector(bucketMapper.BucketCount(), 0)) {} void Histogram::Clear() { - min_ = kBucketLimit[kNumBuckets-1]; + min_ = bucketMapper.LastValue(); max_ = 0; num_ = 0; sum_ = 0; sum_squares_ = 0; - for (int i = 0; i < kNumBuckets; i++) { - buckets_[i] = 0; - } + buckets_.resize(bucketMapper.BucketCount(), 0); } -void Histogram::Add(double value) { - // Linear search is fast enough for our usage in db_bench - int b = 0; - while (b < kNumBuckets - 1 && kBucketLimit[b] <= value) { - b++; - } - buckets_[b] += 1.0; +void Histogram::Add(uint64_t value) { + const size_t index = bucketMapper.IndexForValue(value); + buckets_[index] += 1; if (min_ > value) min_ = value; if (max_ < value) max_ = value; num_++; @@ -55,13 +84,17 @@ void Histogram::Add(double value) { sum_squares_ += (value * value); } +void Histogram::Add(double value) { + Add(static_cast(value)); +} + void Histogram::Merge(const Histogram& other) { if (other.min_ < min_) min_ = other.min_; if (other.max_ > max_) max_ = other.max_; num_ += other.num_; sum_ += other.sum_; sum_squares_ += other.sum_squares_; - for (int b = 0; b < kNumBuckets; b++) { + for (int b = 0; b < bucketMapper.BucketCount(); b++) { buckets_[b] += other.buckets_[b]; } } @@ -73,15 +106,19 @@ double Histogram::Median() const { double Histogram::Percentile(double p) const { double threshold = num_ * (p / 100.0); double sum = 0; - for (int b = 0; b < kNumBuckets; b++) { + for (int b = 0; b < bucketMapper.BucketCount(); b++) { sum += buckets_[b]; if (sum >= threshold) { // Scale linearly within this bucket - double left_point = (b == 0) ? 0 : kBucketLimit[b-1]; - double right_point = kBucketLimit[b]; + double left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b-1); + double right_point = bucketMapper.BucketLimit(b); double left_sum = sum - buckets_[b]; double right_sum = sum; - double pos = (threshold - left_sum) / (right_sum - left_sum); + double pos = 0; + double right_left_diff = right_sum - left_sum; + if (right_left_diff != 0) { + pos = (threshold - left_sum) / (right_sum - left_sum); + } double r = left_point + (right_point - left_point) * pos; if (r < min_) r = min_; if (r > max_) r = max_; @@ -116,16 +153,16 @@ std::string Histogram::ToString() const { r.append("------------------------------------------------------\n"); const double mult = 100.0 / num_; double sum = 0; - for (int b = 0; b < kNumBuckets; b++) { + for (int b = 0; b < bucketMapper.BucketCount(); b++) { if (buckets_[b] <= 0.0) continue; sum += buckets_[b]; snprintf(buf, sizeof(buf), - "[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ", - ((b == 0) ? 0.0 : kBucketLimit[b-1]), // left - kBucketLimit[b], // right - buckets_[b], // count - mult * buckets_[b], // percentage - mult * sum); // cumulative percentage + "[ %ld, %ld ) %ld %7.3f%% %7.3f%% ", + ((b == 0) ? 0 : bucketMapper.BucketLimit(b-1)), // left + bucketMapper.BucketLimit(b), // right + buckets_[b], // count + mult * buckets_[b], // percentage + mult * sum); // cumulative percentage r.append(buf); // Add hash marks based on percentage; 20 marks for 100%. diff --git a/util/histogram.h b/util/histogram.h index 1ef9f3c8a..03d7c6a8b 100644 --- a/util/histogram.h +++ b/util/histogram.h @@ -5,36 +5,70 @@ #ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ #define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ +#include #include +#include +#include namespace leveldb { +class HistogramBucketMapper { + public: + + HistogramBucketMapper(); + + // converts a value to the bucket index. + const size_t IndexForValue(const uint64_t value) const; + // number of buckets required. + + const size_t BucketCount() const { + return bucketValues_.size(); + } + + uint64_t LastValue() const { + return maxBucketValue_; + } + + uint64_t FirstValue() const { + return minBucketValue_; + } + + uint64_t BucketLimit(const uint64_t bucketNumber) const { + assert(bucketNumber < BucketCount()); + return bucketValues_[bucketNumber]; + } + + private: + const std::vector bucketValues_; + const uint64_t maxBucketValue_; + const uint64_t minBucketValue_; + std::map valueIndexMap_; +}; + class Histogram { public: - Histogram() { } - ~Histogram() { } + Histogram(); void Clear(); + void Add(uint64_t value); void Add(double value); void Merge(const Histogram& other); std::string ToString() const; + double Median() const; + double Percentile(double p) const; + double Average() const; + double StandardDeviation() const; + private: double min_; double max_; double num_; double sum_; double sum_squares_; + std::vector buckets_; - enum { kNumBuckets = 154 }; - static const double kBucketLimit[kNumBuckets]; - double buckets_[kNumBuckets]; - - double Median() const; - double Percentile(double p) const; - double Average() const; - double StandardDeviation() const; }; } // namespace leveldb diff --git a/util/histogram_test.cc b/util/histogram_test.cc new file mode 100644 index 000000000..2a7aae4ca --- /dev/null +++ b/util/histogram_test.cc @@ -0,0 +1,57 @@ +#include "util/histogram.h" + +#include "util/testharness.h" + +namespace leveldb { + +class HistogramTest { }; + +TEST(HistogramTest, BasicOperation) { + + Histogram histogram; + for (uint64_t i = 1; i <= 100; i++) { + histogram.Add(i); + } + + { + double median = histogram.Median(); + // ASSERT_LE(median, 50); + ASSERT_GT(median, 0); + } + + { + double percentile100 = histogram.Percentile(100.0); + ASSERT_LE(percentile100, 100.0); + ASSERT_GT(percentile100, 0.0); + double percentile99 = histogram.Percentile(99.0); + double percentile85 = histogram.Percentile(85.0); + ASSERT_LE(percentile99, 99.0); + ASSERT_TRUE(percentile99 >= percentile85); + } + + ASSERT_EQ(histogram.Average(), 50.5); // avg is acurately caluclated. +} + +TEST(HistogramTest, EmptyHistogram) { + Histogram histogram; + ASSERT_EQ(histogram.Median(), 0.0); + ASSERT_EQ(histogram.Percentile(85.0), 0.0); + ASSERT_EQ(histogram.Average(), 0.0); +} + +TEST(HistogramTest, ClearHistogram) { + Histogram histogram; + for (uint64_t i = 1; i <= 100; i++) { + histogram.Add(i); + } + histogram.Clear(); + ASSERT_EQ(histogram.Median(), 0); + ASSERT_EQ(histogram.Percentile(85.0), 0); + ASSERT_EQ(histogram.Average(), 0); +} + +} // namespace leveldb + +int main(int argc, char** argv) { + return leveldb::test::RunAllTests(); +} diff --git a/util/ldb_cmd.cc b/util/ldb_cmd.cc index eca681bd0..e60ea60ac 100644 --- a/util/ldb_cmd.cc +++ b/util/ldb_cmd.cc @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include +#include #include "leveldb/write_batch.h" #include "db/dbformat.h" @@ -11,129 +13,281 @@ namespace leveldb { -const char* LDBCommand::BLOOM_ARG = "--bloom_bits="; -const char* LDBCommand::COMPRESSION_TYPE_ARG = "--compression_type="; -const char* LDBCommand::BLOCK_SIZE = "--block_size="; -const char* LDBCommand::AUTO_COMPACTION = "--auto_compaction="; -const char* LDBCommand::WRITE_BUFFER_SIZE_ARG = "--write_buffer_size="; -const char* LDBCommand::FILE_SIZE_ARG = "--file_size="; +const string LDBCommand::ARG_DB = "db"; +const string LDBCommand::ARG_HEX = "hex"; +const string LDBCommand::ARG_KEY_HEX = "key_hex"; +const string LDBCommand::ARG_VALUE_HEX = "value_hex"; +const string LDBCommand::ARG_FROM = "from"; +const string LDBCommand::ARG_TO = "to"; +const string LDBCommand::ARG_MAX_KEYS = "max_keys"; +const string LDBCommand::ARG_BLOOM_BITS = "bloom_bits"; +const string LDBCommand::ARG_COMPRESSION_TYPE = "compression_type"; +const string LDBCommand::ARG_BLOCK_SIZE = "block_size"; +const string LDBCommand::ARG_AUTO_COMPACTION = "auto_compaction"; +const string LDBCommand::ARG_WRITE_BUFFER_SIZE = "write_buffer_size"; +const string LDBCommand::ARG_FILE_SIZE = "file_size"; +const string LDBCommand::ARG_CREATE_IF_MISSING = "create_if_missing"; + const char* LDBCommand::DELIM = " ==> "; -void LDBCommand::parse_open_args(std::vector& args) { - std::vector rest_of_args; - for (unsigned int i = 0; i < args.size(); i++) { - std::string& arg = args.at(i); - if (arg.find(BLOOM_ARG) == 0 - || arg.find(COMPRESSION_TYPE_ARG) == 0 - || arg.find(BLOCK_SIZE) == 0 - || arg.find(AUTO_COMPACTION) == 0 - || arg.find(WRITE_BUFFER_SIZE_ARG) == 0 - || arg.find(FILE_SIZE_ARG) == 0) { - open_args_.push_back(arg); +LDBCommand* LDBCommand::InitFromCmdLineArgs(int argc, char** argv) { + vector args; + for (int i = 1; i < argc; i++) { + args.push_back(argv[i]); + } + return InitFromCmdLineArgs(args); +} + +/** + * Parse the command-line arguments and create the appropriate LDBCommand2 + * instance. + * The command line arguments must be in the following format: + * ./ldb --db=PATH_TO_DB [--commonOpt1=commonOpt1Val] .. + * COMMAND ... [-cmdSpecificOpt1=cmdSpecificOpt1Val] .. + * This is similar to the command line format used by HBaseClientTool. + * Command name is not included in args. + * Returns NULL if the command-line cannot be parsed. + */ +LDBCommand* LDBCommand::InitFromCmdLineArgs(const vector& args) { + // --x=y command line arguments are added as x->y map entries. + map options; + + // Command-line arguments of the form --hex end up in this array as hex + vector flags; + + // Everything other than options and flags. Represents commands + // and their parameters. For eg: put key1 value1 go into this vector. + vector cmdTokens; + + const string OPTION_PREFIX = "--"; + + for (vector::const_iterator itr = args.begin(); + itr != args.end(); itr++) { + string arg = *itr; + if (boost::starts_with(arg, OPTION_PREFIX)){ + vector splits; + boost::split(splits, arg, boost::is_any_of("=")); + if (splits.size() == 2) { + string optionKey = splits[0].substr(OPTION_PREFIX.size()); + options[optionKey] = splits[1]; + } else { + string optionKey = splits[0].substr(OPTION_PREFIX.size()); + flags.push_back(optionKey); + } } else { - rest_of_args.push_back(arg); + cmdTokens.push_back(string(arg)); + } + } + + if (cmdTokens.size() < 1) { + fprintf(stderr, "Command not specified!"); + return NULL; + } + + string cmd = cmdTokens[0]; + vector cmdParams(cmdTokens.begin()+1, cmdTokens.end()); + + if (cmd == GetCommand::Name()) { + return new GetCommand(cmdParams, options, flags); + } else if (cmd == PutCommand::Name()) { + return new PutCommand(cmdParams, options, flags); + } else if (cmd == BatchPutCommand::Name()) { + return new BatchPutCommand(cmdParams, options, flags); + } else if (cmd == ScanCommand::Name()) { + return new ScanCommand(cmdParams, options, flags); + } else if (cmd == DeleteCommand::Name()) { + return new DeleteCommand(cmdParams, options, flags); + } else if (cmd == ApproxSizeCommand::Name()) { + return new ApproxSizeCommand(cmdParams, options, flags); + } else if (cmd == DBQuerierCommand::Name()) { + return new DBQuerierCommand(cmdParams, options, flags); + } else if (cmd == CompactorCommand::Name()) { + return new CompactorCommand(cmdParams, options, flags); + } else if (cmd == WALDumperCommand::Name()) { + return new WALDumperCommand(cmdParams, options, flags); + } else if (cmd == ReduceDBLevelsCommand::Name()) { + return new ReduceDBLevelsCommand(cmdParams, options, flags); + } else if (cmd == DBDumperCommand::Name()) { + return new DBDumperCommand(cmdParams, options, flags); + } else if (cmd == DBLoaderCommand::Name()) { + return new DBLoaderCommand(cmdParams, options, flags); + } + + return NULL; +} + +/** + * Parses the specific integer option and fills in the value. + * Returns true if the option is found. + * Returns false if the option is not found or if there is an error parsing the + * value. If there is an error, the specified exec_state is also + * updated. + */ +bool LDBCommand::ParseIntOption(const map& options, + string option, int& value, LDBCommandExecuteResult& exec_state) { + + map::const_iterator itr = options_.find(option); + if (itr != options_.end()) { + try { + value = boost::lexical_cast(itr->second); + return true; + } catch( const boost::bad_lexical_cast & ) { + exec_state = LDBCommandExecuteResult::FAILED(option + + " has an invalid value."); } } - swap(args, rest_of_args); + return false; } leveldb::Options LDBCommand::PrepareOptionsForOpenDB() { + leveldb::Options opt; opt.create_if_missing = false; - for (unsigned int i = 0; i < open_args_.size(); i++) { - std::string& arg = open_args_.at(i); - if (arg.find(BLOOM_ARG) == 0) { - std::string bits_string = arg.substr(strlen(BLOOM_ARG)); - int bits = atoi(bits_string.c_str()); - if (bits == 0) { - // Badly-formatted bits. - exec_state_ = LDBCommandExecuteResult::FAILED( - std::string("Badly-formatted bits: ") + bits_string); - } + + map::const_iterator itr; + + int bits; + if (ParseIntOption(options_, ARG_BLOOM_BITS, bits, exec_state_)) { + if (bits > 0) { opt.filter_policy = leveldb::NewBloomFilterPolicy(bits); - } else if (arg.find(BLOCK_SIZE) == 0) { - std::string block_size_string = arg.substr(strlen(BLOCK_SIZE)); - int block_size = atoi(block_size_string.c_str()); - if (block_size == 0) { - // Badly-formatted bits. - exec_state_ = LDBCommandExecuteResult::FAILED( - std::string("Badly-formatted block size: ") + block_size_string); - } + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(ARG_BLOOM_BITS + + " must be > 0."); + } + } + + int block_size; + if (ParseIntOption(options_, ARG_BLOCK_SIZE, block_size, exec_state_)) { + if (block_size > 0) { opt.block_size = block_size; - } else if (arg.find(AUTO_COMPACTION) == 0) { - std::string value = arg.substr(strlen(AUTO_COMPACTION)); - if (value == "false") { - opt.disable_auto_compactions = true; - } else if (value == "true") { - opt.disable_auto_compactions = false; - } else { - // Unknown compression. - exec_state_ = LDBCommandExecuteResult::FAILED( - "Unknown auto_compaction value: " + value); - } - } else if (arg.find(COMPRESSION_TYPE_ARG) == 0) { - std::string comp = arg.substr(strlen(COMPRESSION_TYPE_ARG)); - if (comp == "no") { - opt.compression = leveldb::kNoCompression; - } else if (comp == "snappy") { - opt.compression = leveldb::kSnappyCompression; - } else if (comp == "zlib") { - opt.compression = leveldb::kZlibCompression; - } else if (comp == "bzip2") { - opt.compression = leveldb::kBZip2Compression; - } else { - // Unknown compression. - exec_state_ = LDBCommandExecuteResult::FAILED( - "Unknown compression level: " + comp); - } - } else if (arg.find(WRITE_BUFFER_SIZE_ARG) == 0) { - std::string write_buffer_str = arg.substr(strlen(WRITE_BUFFER_SIZE_ARG)); - int write_buffer_size = atoi(write_buffer_str.c_str()); - if (write_buffer_size == 0) { - exec_state_ = LDBCommandExecuteResult::FAILED( - std::string("Badly-formatted buffer size: ") + write_buffer_str); - } + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(ARG_BLOCK_SIZE + + " must be > 0."); + } + } + + itr = options_.find(ARG_AUTO_COMPACTION); + if (itr != options_.end()) { + opt.disable_auto_compactions = ! StringToBool(itr->second); + } + + itr = options_.find(ARG_COMPRESSION_TYPE); + if (itr != options_.end()) { + string comp = itr->second; + if (comp == "no") { + opt.compression = leveldb::kNoCompression; + } else if (comp == "snappy") { + opt.compression = leveldb::kSnappyCompression; + } else if (comp == "zlib") { + opt.compression = leveldb::kZlibCompression; + } else if (comp == "bzip2") { + opt.compression = leveldb::kBZip2Compression; + } else { + // Unknown compression. + exec_state_ = LDBCommandExecuteResult::FAILED( + "Unknown compression level: " + comp); + } + } + + int write_buffer_size; + if (ParseIntOption(options_, ARG_WRITE_BUFFER_SIZE, write_buffer_size, + exec_state_)) { + if (write_buffer_size > 0) { opt.write_buffer_size = write_buffer_size; - } else if (arg.find(FILE_SIZE_ARG) == 0) { - std::string file_size_str = arg.substr(strlen(FILE_SIZE_ARG)); - int file_size = atoi(file_size_str.c_str()); - if (file_size == 0) { - exec_state_ = LDBCommandExecuteResult::FAILED( - std::string("Badly-formatted file size: ") + file_size_str); - } + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(ARG_WRITE_BUFFER_SIZE + + " must be > 0."); + } + } + + int file_size; + if (ParseIntOption(options_, ARG_FILE_SIZE, file_size, exec_state_)) { + if (file_size > 0) { opt.target_file_size_base = file_size; } else { - exec_state_ = LDBCommandExecuteResult::FAILED( - "Unknown option: " + arg); + exec_state_ = LDBCommandExecuteResult::FAILED(ARG_FILE_SIZE + + " must be > 0."); } } return opt; } +bool LDBCommand::ParseKeyValue(const string& line, string* key, string* value, + bool is_key_hex, bool is_value_hex) { + size_t pos = line.find(DELIM); + if (pos != std::string::npos) { + *key = line.substr(0, pos); + *value = line.substr(pos + strlen(DELIM)); + if (is_key_hex) { + *key = HexToString(*key); + } + if (is_value_hex) { + *value = HexToString(*value); + } + return true; + } else { + return false; + } +} -const char* LDBCommand::FROM_ARG = "--from="; -const char* LDBCommand::END_ARG = "--to="; -const char* LDBCommand::HEX_ARG = "--hex"; +/** + * Make sure that ONLY the command-line options and flags expected by this + * command are specified on the command-line. Extraneous options are usually + * the result of user error. + * Returns true if all checks pass. Else returns false, and prints an + * appropriate error msg to stderr. + */ +bool LDBCommand::ValidateCmdLineOptions() { + + for (map::const_iterator itr = options_.begin(); + itr != options_.end(); itr++) { + if (std::find(valid_cmd_line_options_.begin(), + valid_cmd_line_options_.end(), itr->first) == + valid_cmd_line_options_.end()) { + fprintf(stderr, "Invalid command-line option %s\n", itr->first.c_str()); + return false; + } + } -Compactor::Compactor(std::string& db_name, std::vector& args) : - LDBCommand(db_name, args), null_from_(true), null_to_(true), hex_(false) { - for (unsigned int i = 0; i < args.size(); i++) { - std::string& arg = args.at(i); - if (arg.find(FROM_ARG) == 0) { - null_from_ = false; - from_ = arg.substr(strlen(FROM_ARG)); - } else if (arg.find(END_ARG) == 0) { - null_to_ = false; - to_ = arg.substr(strlen(END_ARG)); - } else if (arg == HEX_ARG) { - hex_ = true; - } else { - exec_state_ = LDBCommandExecuteResult::FAILED("Unknown argument." + arg); + for (vector::const_iterator itr = flags_.begin(); + itr != flags_.end(); itr++) { + if (std::find(valid_cmd_line_options_.begin(), + valid_cmd_line_options_.end(), *itr) == + valid_cmd_line_options_.end()) { + fprintf(stderr, "Invalid command-line flag %s\n", itr->c_str()); + return false; } } - if (hex_) { + if (options_.find(ARG_DB) == options_.end()) { + fprintf(stderr, "%s must be specified\n", ARG_DB.c_str()); + return false; + } + + return true; +} + +CompactorCommand::CompactorCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, false, + BuildCmdLineOptions({ARG_FROM, ARG_TO, ARG_HEX, ARG_KEY_HEX, + ARG_VALUE_HEX})), + null_from_(true), null_to_(true) { + + map::const_iterator itr = options.find(ARG_FROM); + if (itr != options.end()) { + null_from_ = false; + from_ = itr->second; + } + + itr = options.find(ARG_TO); + if (itr != options.end()) { + null_to_ = false; + to_ = itr->second; + } + + if (is_key_hex_) { if (!null_from_) { from_ = HexToString(from_); } @@ -143,14 +297,14 @@ Compactor::Compactor(std::string& db_name, std::vector& args) : } } -void Compactor::Help(std::string& ret) { - LDBCommand::Help(ret); - ret.append("[--from=START KEY] "); - ret.append("[--to=START KEY] "); - ret.append("[--hex] "); +void CompactorCommand::Help(string& ret) { + ret.append(" "); + ret.append(CompactorCommand::Name()); + ret.append(HelpRangeCmdArgs()); + ret.append("\n"); } -void Compactor::DoCommand() { +void CompactorCommand::DoCommand() { leveldb::Slice* begin = NULL; leveldb::Slice* end = NULL; @@ -168,46 +322,35 @@ void Compactor::DoCommand() { delete end; } -const char* DBLoader::HEX_INPUT_ARG = "--input_hex"; -const char* DBLoader::CREATE_IF_MISSING_ARG = "--create_if_missing"; -const char* DBLoader::DISABLE_WAL_ARG = "--disable_wal"; +const string DBLoaderCommand::ARG_DISABLE_WAL = "disable_wal"; -DBLoader::DBLoader(std::string& db_name, std::vector& args) : - LDBCommand(db_name, args), - hex_input_(false), +DBLoaderCommand::DBLoaderCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, false, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, + ARG_FROM, ARG_TO, ARG_CREATE_IF_MISSING, + ARG_DISABLE_WAL})), create_if_missing_(false) { - for (unsigned int i = 0; i < args.size(); i++) { - std::string& arg = args.at(i); - if (arg == HEX_INPUT_ARG) { - hex_input_ = true; - } else if (arg == CREATE_IF_MISSING_ARG) { - create_if_missing_ = true; - } else if (arg == DISABLE_WAL_ARG) { - disable_wal_ = true; - } else { - exec_state_ = LDBCommandExecuteResult::FAILED("Unknown argument:" + arg); - } - } + + create_if_missing_ = IsFlagPresent(flags, ARG_CREATE_IF_MISSING); + disable_wal_ = IsFlagPresent(flags, ARG_DISABLE_WAL); } -void DBLoader::Help(std::string& ret) { - LDBCommand::Help(ret); - ret.append("["); - ret.append(HEX_INPUT_ARG); - ret.append("] ["); - ret.append(CREATE_IF_MISSING_ARG); - ret.append("] ["); - ret.append(DISABLE_WAL_ARG); - ret.append("]"); +void DBLoaderCommand::Help(string& ret) { + ret.append(" "); + ret.append(DBLoaderCommand::Name()); + ret.append(" [--" + ARG_CREATE_IF_MISSING + "]"); + ret.append(" [--" + ARG_DISABLE_WAL + "]"); + ret.append("\n"); } -leveldb::Options DBLoader::PrepareOptionsForOpenDB() { +leveldb::Options DBLoaderCommand::PrepareOptionsForOpenDB() { leveldb::Options opt = LDBCommand::PrepareOptionsForOpenDB(); opt.create_if_missing = create_if_missing_; return opt; } -void DBLoader::DoCommand() { +void DBLoaderCommand::DoCommand() { if (!db_) { return; } @@ -218,11 +361,11 @@ void DBLoader::DoCommand() { } int bad_lines = 0; - std::string line; + string line; while (std::getline(std::cin, line, '\n')) { - std::string key; - std::string value; - if (ParseKeyValue(line, &key, &value, hex_input_)) { + string key; + string value; + if (ParseKeyValue(line, &key, &value, is_key_hex_, is_value_hex_)) { db_->Put(write_options, Slice(key), Slice(value)); } else if (0 == line.find("Keys in range:")) { // ignore this line @@ -232,50 +375,53 @@ void DBLoader::DoCommand() { bad_lines ++; } } - + if (bad_lines > 0) { std::cout << "Warning: " << bad_lines << " bad lines ignored." << std::endl; } } -const char* DBDumper::MAX_KEYS_ARG = "--max_keys="; -const char* DBDumper::COUNT_ONLY_ARG = "--count_only"; -const char* DBDumper::STATS_ARG = "--stats"; -const char* DBDumper::HEX_OUTPUT_ARG = "--output_hex"; +const string DBDumperCommand::ARG_COUNT_ONLY = "count_only"; +const string DBDumperCommand::ARG_STATS = "stats"; -DBDumper::DBDumper(std::string& db_name, std::vector& args) : - LDBCommand(db_name, args), +DBDumperCommand::DBDumperCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, true, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, + ARG_FROM, ARG_TO, ARG_MAX_KEYS, + ARG_COUNT_ONLY, ARG_STATS})), null_from_(true), null_to_(true), max_keys_(-1), count_only_(false), - print_stats_(false), - hex_(false), - hex_output_(false) { - for (unsigned int i = 0; i < args.size(); i++) { - std::string& arg = args.at(i); - if (arg.find(FROM_ARG) == 0) { - null_from_ = false; - from_ = arg.substr(strlen(FROM_ARG)); - } else if (arg.find(END_ARG) == 0) { - null_to_ = false; - to_ = arg.substr(strlen(END_ARG)); - } else if (arg == HEX_ARG) { - hex_ = true; - } else if (arg.find(MAX_KEYS_ARG) == 0) { - max_keys_ = atoi(arg.substr(strlen(MAX_KEYS_ARG)).c_str()); - } else if (arg == STATS_ARG) { - print_stats_ = true; - } else if (arg == COUNT_ONLY_ARG) { - count_only_ = true; - } else if (arg == HEX_OUTPUT_ARG) { - hex_output_ = true; - } else { - exec_state_ = LDBCommandExecuteResult::FAILED("Unknown argument:" + arg); + print_stats_(false) { + + map::const_iterator itr = options.find(ARG_FROM); + if (itr != options.end()) { + null_from_ = false; + from_ = itr->second; + } + + itr = options.find(ARG_TO); + if (itr != options.end()) { + null_to_ = false; + to_ = itr->second; + } + + itr = options.find(ARG_MAX_KEYS); + if (itr != options.end()) { + try { + max_keys_ = boost::lexical_cast(itr->second); + } catch( const boost::bad_lexical_cast & ) { + exec_state_ = LDBCommandExecuteResult::FAILED(ARG_MAX_KEYS + + " has an invalid value"); } } - if (hex_) { + print_stats_ = IsFlagPresent(flags, ARG_STATS); + count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); + + if (is_key_hex_) { if (!null_from_) { from_ = HexToString(from_); } @@ -285,25 +431,24 @@ DBDumper::DBDumper(std::string& db_name, std::vector& args) : } } -void DBDumper::Help(std::string& ret) { - LDBCommand::Help(ret); - ret.append("[--from=START KEY] "); - ret.append("[--to=END Key] "); - ret.append("[--hex] "); - ret.append("[--output_hex] "); - ret.append("[--max_keys=NUM] "); - ret.append("[--count_only] "); - ret.append("[--stats] "); +void DBDumperCommand::Help(string& ret) { + ret.append(" "); + ret.append(DBDumperCommand::Name()); + ret.append(HelpRangeCmdArgs()); + ret.append(" [--" + ARG_MAX_KEYS + "=]"); + ret.append(" [--" + ARG_COUNT_ONLY + "]"); + ret.append(" [--" + ARG_STATS + "]"); + ret.append("\n"); } -void DBDumper::DoCommand() { +void DBDumperCommand::DoCommand() { if (!db_) { return; } // Parse command line args uint64_t count = 0; if (print_stats_) { - std::string stats; + string stats; if (db_->GetProperty("leveldb.stats", &stats)) { fprintf(stdout, "%s\n", stats.c_str()); } @@ -336,9 +481,9 @@ void DBDumper::DoCommand() { } ++count; if (!count_only_) { - std::string str = PrintKeyValue(iter->key().ToString(), + string str = PrintKeyValue(iter->key().ToString(), iter->value().ToString(), - hex_output_); + is_key_hex_, is_value_hex_); fprintf(stdout, "%s\n", str.c_str()); } } @@ -347,140 +492,48 @@ void DBDumper::DoCommand() { delete iter; } +const string ReduceDBLevelsCommand::ARG_NEW_LEVELS = "new_levels"; +const string ReduceDBLevelsCommand::ARG_PRINT_OLD_LEVELS = "print_old_levels"; -const char* DBQuerier::HEX_ARG = "--hex"; -const char* DBQuerier::HELP_CMD = "help"; -const char* DBQuerier::GET_CMD = "get"; -const char* DBQuerier::PUT_CMD = "put"; -const char* DBQuerier::DELETE_CMD = "delete"; - -DBQuerier::DBQuerier(std::string& db_name, std::vector& args) : - LDBCommand(db_name, args), - hex_(false) { - for (unsigned int i = 0; i < args.size(); i++) { - std::string& arg = args.at(i); - if (arg == HEX_ARG) { - hex_ = true; - } else { - exec_state_ = LDBCommandExecuteResult::FAILED("Unknown argument:" + arg); - } - } -} - -void DBQuerier::Help(std::string& ret) { - LDBCommand::Help(ret); - ret.append("[--hex] "); - ret.append("(type \"help\" on stdin for details.)"); -} - -void DBQuerier::DoCommand() { - if (!db_) { - return; - } +ReduceDBLevelsCommand::ReduceDBLevelsCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, false, + BuildCmdLineOptions({ARG_NEW_LEVELS, ARG_PRINT_OLD_LEVELS})), + old_levels_(1 << 16), + new_levels_(-1), + print_old_levels_(false) { - leveldb::ReadOptions read_options; - leveldb::WriteOptions write_options; - std::string line; - std::string key; - std::string value; - while (std::getline(std::cin, line, '\n')) { - - // Parse line into vector - std::vector tokens; - size_t pos = 0; - while (true) { - size_t pos2 = line.find(' ', pos); - if (pos2 == std::string::npos) { - break; - } - tokens.push_back(line.substr(pos, pos2-pos)); - pos = pos2 + 1; - } - tokens.push_back(line.substr(pos)); - - const std::string& cmd = tokens[0]; - - if (cmd == HELP_CMD) { - fprintf(stdout, - "get \n" - "put \n" - "delete \n"); - } else if (cmd == DELETE_CMD && tokens.size() == 2) { - key = (hex_ ? HexToString(tokens[1]) : tokens[1]); - db_->Delete(write_options, Slice(key)); - fprintf(stdout, "Successfully deleted %s\n", tokens[1].c_str()); - } else if (cmd == PUT_CMD && tokens.size() == 3) { - key = (hex_ ? HexToString(tokens[1]) : tokens[1]); - value = (hex_ ? HexToString(tokens[2]) : tokens[2]); - db_->Put(write_options, Slice(key), Slice(value)); - fprintf(stdout, "Successfully put %s %s\n", - tokens[1].c_str(), tokens[2].c_str()); - } else if (cmd == GET_CMD && tokens.size() == 2) { - key = (hex_ ? HexToString(tokens[1]) : tokens[1]); - if (db_->Get(read_options, Slice(key), &value).ok()) { - fprintf(stdout, "%s\n", PrintKeyValue(key, value, hex_).c_str()); - } else { - fprintf(stdout, "Not found %s\n", tokens[1].c_str()); - } - } else { - fprintf(stdout, "Unknown command %s\n", line.c_str()); - } - } -} - - - -const char* ReduceDBLevels::NEW_LEVLES_ARG = "--new_levels="; -const char* ReduceDBLevels::PRINT_OLD_LEVELS_ARG = "--print_old_levels"; - -ReduceDBLevels::ReduceDBLevels(std::string& db_name, - std::vector& args) -: LDBCommand(db_name, args), - old_levels_(1 << 16), - new_levels_(-1), - print_old_levels_(false) { - - for (unsigned int i = 0; i < args.size(); i++) { - std::string& arg = args.at(i); - if (arg.find(NEW_LEVLES_ARG) == 0) { - new_levels_ = atoi(arg.substr(strlen(NEW_LEVLES_ARG)).c_str()); - } else if (arg == PRINT_OLD_LEVELS_ARG) { - print_old_levels_ = true; - } else { - exec_state_ = LDBCommandExecuteResult::FAILED( - "Unknown argument." + arg); - } - } + ParseIntOption(options_, ARG_NEW_LEVELS, new_levels_, exec_state_); + print_old_levels_ = IsFlagPresent(flags, ARG_PRINT_OLD_LEVELS); if(new_levels_ <= 0) { exec_state_ = LDBCommandExecuteResult::FAILED( - " Use --new_levels to specify a new level number\n"); + " Use --" + ARG_NEW_LEVELS + " to specify a new level number\n"); } } -std::vector ReduceDBLevels::PrepareArgs(int new_levels, - bool print_old_level) { - std::vector ret; - char arg[100]; - sprintf(arg, "%s%d", NEW_LEVLES_ARG, new_levels); - ret.push_back(arg); +vector ReduceDBLevelsCommand::PrepareArgs(const string& db_path, + int new_levels, bool print_old_level) { + vector ret; + ret.push_back("reduce_levels"); + ret.push_back("--" + ARG_DB + "=" + db_path); + ret.push_back("--" + ARG_NEW_LEVELS + "=" + std::to_string(new_levels)); if(print_old_level) { - sprintf(arg, "%s", PRINT_OLD_LEVELS_ARG); - ret.push_back(arg); + ret.push_back("--" + ARG_PRINT_OLD_LEVELS); } return ret; } -void ReduceDBLevels::Help(std::string& msg) { - LDBCommand::Help(msg); - msg.append("[--new_levels=New number of levels] "); - msg.append("[--print_old_levels] "); - msg.append("[--compression=none|snappy|zlib|bzip2] "); - msg.append("[--file_size= per-file size] "); +void ReduceDBLevelsCommand::Help(string& ret) { + ret.append(" "); + ret.append(ReduceDBLevelsCommand::Name()); + ret.append(" --" + ARG_NEW_LEVELS + "="); + ret.append(" [--" + ARG_PRINT_OLD_LEVELS + "]"); + ret.append("\n"); } -leveldb::Options ReduceDBLevels::PrepareOptionsForOpenDB() { +leveldb::Options ReduceDBLevelsCommand::PrepareOptionsForOpenDB() { leveldb::Options opt = LDBCommand::PrepareOptionsForOpenDB(); opt.num_levels = old_levels_; // Disable size compaction @@ -490,7 +543,8 @@ leveldb::Options ReduceDBLevels::PrepareOptionsForOpenDB() { return opt; } -Status ReduceDBLevels::GetOldNumOfLevels(leveldb::Options& opt, int* levels) { +Status ReduceDBLevelsCommand::GetOldNumOfLevels(leveldb::Options& opt, + int* levels) { TableCache* tc = new TableCache(db_path_, &opt, 10); const InternalKeyComparator* cmp = new InternalKeyComparator( opt.comparator); @@ -515,7 +569,7 @@ Status ReduceDBLevels::GetOldNumOfLevels(leveldb::Options& opt, int* levels) { return st; } -void ReduceDBLevels::DoCommand() { +void ReduceDBLevelsCommand::DoCommand() { if (new_levels_ <= 1) { exec_state_ = LDBCommandExecuteResult::FAILED( "Invalid number of levels.\n"); @@ -575,38 +629,46 @@ void ReduceDBLevels::DoCommand() { } } -const char* WALDumper::WAL_FILE_ARG = "--walfile="; -WALDumper::WALDumper(std::vector& args) : - LDBCommand(args), print_header_(false) { +const string WALDumperCommand::ARG_WAL_FILE = "walfile"; +const string WALDumperCommand::ARG_PRINT_HEADER = "header"; + +WALDumperCommand::WALDumperCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, true, + BuildCmdLineOptions({ARG_WAL_FILE, ARG_PRINT_HEADER})), + print_header_(false) { + wal_file_.clear(); - for (unsigned int i = 0; i < args.size(); i++) { - std::string& arg = args.at(i); - if (arg == "--header") { - print_header_ = true; - } else if (arg.find(WAL_FILE_ARG) == 0) { - wal_file_ = arg.substr(strlen(WAL_FILE_ARG)); - } else { - exec_state_ = LDBCommandExecuteResult::FAILED("Unknown argument " + arg); - } + + map::const_iterator itr = options.find(ARG_WAL_FILE); + if (itr != options.end()) { + wal_file_ = itr->second; } + + print_header_ = IsFlagPresent(flags, ARG_PRINT_HEADER); + if (wal_file_.empty()) { - exec_state_ = LDBCommandExecuteResult::FAILED("Argument --walfile reqd."); + exec_state_ = LDBCommandExecuteResult::FAILED( + "Argument " + ARG_WAL_FILE + " must be specified."); } } -void WALDumper::Help(std::string& ret) { - ret.append("--walfile write_ahead_log "); - ret.append("[--header print's a header] "); +void WALDumperCommand::Help(string& ret) { + ret.append(" "); + ret.append(WALDumperCommand::Name()); + ret.append(" --" + ARG_WAL_FILE + "="); + ret.append(" --[" + ARG_PRINT_HEADER + "] "); + ret.append("\n"); } -void WALDumper::DoCommand() { +void WALDumperCommand::DoCommand() { struct StdErrReporter : public log::Reader::Reporter { virtual void Corruption(size_t bytes, const Status& s) { std::cerr<<"Corruption detected in log file "< file; Env* env_ = Env::Default(); Status status = env_->NewSequentialFile(wal_file_, &file); if (!status.ok()) { @@ -614,8 +676,8 @@ void WALDumper::DoCommand() { status.ToString()); } else { StdErrReporter reporter; - log::Reader reader(file, &reporter, true, 0); - std::string scratch; + log::Reader reader(std::move(file), &reporter, true, 0); + string scratch; WriteBatch batch; Slice record; std::stringstream row; @@ -639,4 +701,376 @@ void WALDumper::DoCommand() { } } + +GetCommand::GetCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, true, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX})) { + + if (params.size() != 1) { + exec_state_ = LDBCommandExecuteResult::FAILED( + " must be specified for the get command"); + } else { + key_ = params.at(0); + } + + if (is_key_hex_) { + key_ = HexToString(key_); + } +} + +void GetCommand::Help(string& ret) { + ret.append(" "); + ret.append(GetCommand::Name()); + ret.append(" "); + ret.append("\n"); +} + +void GetCommand::DoCommand() { + string value; + leveldb::Status st = db_->Get(leveldb::ReadOptions(), key_, &value); + if (st.ok()) { + fprintf(stdout, "%s\n", + (is_value_hex_ ? StringToHex(value) : value).c_str()); + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(st.ToString()); + } +} + + +ApproxSizeCommand::ApproxSizeCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, true, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, + ARG_FROM, ARG_TO})) { + + if (options.find(ARG_FROM) != options.end()) { + start_key_ = options.find(ARG_FROM)->second; + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(ARG_FROM + + " must be specified for approxsize command"); + return; + } + + if (options.find(ARG_TO) != options.end()) { + end_key_ = options.find(ARG_TO)->second; + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(ARG_TO + + " must be specified for approxsize command"); + return; + } + + if (is_key_hex_) { + start_key_ = HexToString(start_key_); + end_key_ = HexToString(end_key_); + } +} + +void ApproxSizeCommand::Help(string& ret) { + ret.append(" "); + ret.append(ApproxSizeCommand::Name()); + ret.append(HelpRangeCmdArgs()); + ret.append("\n"); +} + +void ApproxSizeCommand::DoCommand() { + + leveldb::Range ranges[1]; + ranges[0] = leveldb::Range(start_key_, end_key_); + uint64_t sizes[1]; + db_->GetApproximateSizes(ranges, 1, sizes); + fprintf(stdout, "%ld\n", sizes[0]); + /* Wierd that GetApproximateSizes() returns void, although documentation + * says that it returns a Status object. + if (!st.ok()) { + exec_state_ = LDBCommandExecuteResult::FAILED(st.ToString()); + } + */ +} + + +BatchPutCommand::BatchPutCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, false, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, + ARG_CREATE_IF_MISSING})) { + + if (params.size() < 2) { + exec_state_ = LDBCommandExecuteResult::FAILED( + "At least one pair must be specified batchput."); + } else if (params.size() % 2 != 0) { + exec_state_ = LDBCommandExecuteResult::FAILED( + "Equal number of s and s must be specified for batchput."); + } else { + for (size_t i = 0; i < params.size(); i += 2) { + string key = params.at(i); + string value = params.at(i+1); + key_values_.push_back(std::pair( + is_key_hex_ ? HexToString(key) : key, + is_value_hex_ ? HexToString(value) : value)); + } + } +} + +void BatchPutCommand::Help(string& ret) { + ret.append(" "); + ret.append(BatchPutCommand::Name()); + ret.append(" [ ] [..]"); + ret.append("\n"); +} + +void BatchPutCommand::DoCommand() { + leveldb::WriteBatch batch; + + for (vector>::const_iterator itr + = key_values_.begin(); itr != key_values_.end(); itr++) { + batch.Put(itr->first, itr->second); + } + leveldb::Status st = db_->Write(leveldb::WriteOptions(), &batch); + if (st.ok()) { + fprintf(stdout, "OK\n"); + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(st.ToString()); + } +} + +leveldb::Options BatchPutCommand::PrepareOptionsForOpenDB() { + leveldb::Options opt = LDBCommand::PrepareOptionsForOpenDB(); + opt.create_if_missing = IsFlagPresent(flags_, ARG_CREATE_IF_MISSING); + return opt; +} + + +ScanCommand::ScanCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, true, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, + ARG_FROM, ARG_TO, ARG_MAX_KEYS})), + start_key_specified_(false), + end_key_specified_(false), + max_keys_scanned_(-1) { + + map::const_iterator itr = options.find(ARG_FROM); + if (itr != options.end()) { + start_key_ = itr->second; + if (is_key_hex_) { + start_key_ = HexToString(start_key_); + } + start_key_specified_ = true; + } + itr = options.find(ARG_TO); + if (itr != options.end()) { + end_key_ = itr->second; + if (is_key_hex_) { + end_key_ = HexToString(end_key_); + } + end_key_specified_ = true; + } + + itr = options.find(ARG_MAX_KEYS); + if (itr != options.end()) { + try { + max_keys_scanned_ = boost::lexical_cast< int >(itr->second); + } catch( const boost::bad_lexical_cast & ) { + exec_state_ = LDBCommandExecuteResult::FAILED(ARG_MAX_KEYS + + " has an invalid value"); + } + } +} + +void ScanCommand::Help(string& ret) { + ret.append(" "); + ret.append(ScanCommand::Name()); + ret.append(HelpRangeCmdArgs()); + ret.append("--" + ARG_MAX_KEYS + "=N] "); + ret.append("\n"); +} + +void ScanCommand::DoCommand() { + + int num_keys_scanned = 0; + Iterator* it = db_->NewIterator(leveldb::ReadOptions()); + if (start_key_specified_) { + it->Seek(start_key_); + } else { + it->SeekToFirst(); + } + for ( ; + it->Valid() && (!end_key_specified_ || it->key().ToString() < end_key_); + it->Next()) { + string key = it->key().ToString(); + string value = it->value().ToString(); + fprintf(stdout, "%s : %s\n", + (is_key_hex_ ? StringToHex(key) : key).c_str(), + (is_value_hex_ ? StringToHex(value) : value).c_str() + ); + num_keys_scanned++; + if (max_keys_scanned_ >= 0 && num_keys_scanned >= max_keys_scanned_) { + break; + } + } + if (!it->status().ok()) { // Check for any errors found during the scan + exec_state_ = LDBCommandExecuteResult::FAILED(it->status().ToString()); + } + delete it; +} + + +DeleteCommand::DeleteCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, false, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX})) { + + if (params.size() != 1) { + exec_state_ = LDBCommandExecuteResult::FAILED( + "KEY must be specified for the delete command"); + } else { + key_ = params.at(0); + if (is_key_hex_) { + key_ = HexToString(key_); + } + } +} + +void DeleteCommand::Help(string& ret) { + ret.append(" "); + ret.append(DeleteCommand::Name() + " "); + ret.append("\n"); +} + +void DeleteCommand::DoCommand() { + leveldb::Status st = db_->Delete(leveldb::WriteOptions(), key_); + if (st.ok()) { + fprintf(stdout, "OK\n"); + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(st.ToString()); + } +} + + +PutCommand::PutCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, false, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, + ARG_CREATE_IF_MISSING})) { + + if (params.size() != 2) { + exec_state_ = LDBCommandExecuteResult::FAILED( + " and must be specified for the put command"); + } else { + key_ = params.at(0); + value_ = params.at(1); + } + + if (is_key_hex_) { + key_ = HexToString(key_); + } + + if (is_value_hex_) { + value_ = HexToString(value_); + } +} + +void PutCommand::Help(string& ret) { + ret.append(" "); + ret.append(PutCommand::Name()); + ret.append(" "); + ret.append("\n"); +} + +void PutCommand::DoCommand() { + leveldb::Status st = db_->Put(leveldb::WriteOptions(), key_, value_); + if (st.ok()) { + fprintf(stdout, "OK\n"); + } else { + exec_state_ = LDBCommandExecuteResult::FAILED(st.ToString()); + } +} + +leveldb::Options PutCommand::PrepareOptionsForOpenDB() { + leveldb::Options opt = LDBCommand::PrepareOptionsForOpenDB(); + opt.create_if_missing = IsFlagPresent(flags_, ARG_CREATE_IF_MISSING); + return opt; +} + + +const char* DBQuerierCommand::HELP_CMD = "help"; +const char* DBQuerierCommand::GET_CMD = "get"; +const char* DBQuerierCommand::PUT_CMD = "put"; +const char* DBQuerierCommand::DELETE_CMD = "delete"; + +DBQuerierCommand::DBQuerierCommand(const vector& params, + const map& options, const vector& flags) : + LDBCommand(options, flags, false, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX})) { + +} + +void DBQuerierCommand::Help(string& ret) { + ret.append(" "); + ret.append(DBQuerierCommand::Name()); + ret.append("\n"); + ret.append(" Starts a REPL shell. Type help for list of available " + "commands."); + ret.append("\n"); +} + +void DBQuerierCommand::DoCommand() { + if (!db_) { + return; + } + + leveldb::ReadOptions read_options; + leveldb::WriteOptions write_options; + + string line; + string key; + string value; + while (getline(std::cin, line, '\n')) { + + // Parse line into vector + vector tokens; + size_t pos = 0; + while (true) { + size_t pos2 = line.find(' ', pos); + if (pos2 == string::npos) { + break; + } + tokens.push_back(line.substr(pos, pos2-pos)); + pos = pos2 + 1; + } + tokens.push_back(line.substr(pos)); + + const string& cmd = tokens[0]; + + if (cmd == HELP_CMD) { + fprintf(stdout, + "get \n" + "put \n" + "delete \n"); + } else if (cmd == DELETE_CMD && tokens.size() == 2) { + key = (is_key_hex_ ? HexToString(tokens[1]) : tokens[1]); + db_->Delete(write_options, Slice(key)); + fprintf(stdout, "Successfully deleted %s\n", tokens[1].c_str()); + } else if (cmd == PUT_CMD && tokens.size() == 3) { + key = (is_key_hex_ ? HexToString(tokens[1]) : tokens[1]); + value = (is_value_hex_ ? HexToString(tokens[2]) : tokens[2]); + db_->Put(write_options, Slice(key), Slice(value)); + fprintf(stdout, "Successfully put %s %s\n", + tokens[1].c_str(), tokens[2].c_str()); + } else if (cmd == GET_CMD && tokens.size() == 2) { + key = (is_key_hex_ ? HexToString(tokens[1]) : tokens[1]); + if (db_->Get(read_options, Slice(key), &value).ok()) { + fprintf(stdout, "%s\n", PrintKeyValue(key, value, + is_key_hex_, is_value_hex_).c_str()); + } else { + fprintf(stdout, "Not found %s\n", tokens[1].c_str()); + } + } else { + fprintf(stdout, "Unknown command %s\n", line.c_str()); + } + } +} + + } diff --git a/util/ldb_cmd.h b/util/ldb_cmd.h index 06e0e5b9c..aac9905af 100644 --- a/util/ldb_cmd.h +++ b/util/ldb_cmd.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef LEVELDB_UTIL_LDB_H_ -#define LEVELDB_UTIL_LDB_H_ +#ifndef LEVELDB_UTIL_LDB_CMD_H_ +#define LEVELDB_UTIL_LDB_CMD_H_ #include #include @@ -12,6 +12,9 @@ #include #include +#include +#include + #include "leveldb/db.h" #include "leveldb/env.h" #include "leveldb/options.h" @@ -19,90 +22,37 @@ #include "leveldb/slice.h" #include "db/version_set.h" #include "util/logging.h" +#include "util/ldb_cmd_execute_result.h" -namespace leveldb { - -class LDBCommandExecuteResult { -public: - enum State { - EXEC_NOT_STARTED = 0, EXEC_SUCCEED = 1, EXEC_FAILED = 2, - }; - - LDBCommandExecuteResult() { - state_ = EXEC_NOT_STARTED; - message_ = ""; - } - - LDBCommandExecuteResult(State state, std::string& msg) { - state_ = state; - message_ = msg; - } - - std::string ToString() { - std::string ret; - switch (state_) { - case EXEC_SUCCEED: - break; - case EXEC_FAILED: - ret.append("Failed: "); - break; - case EXEC_NOT_STARTED: - ret.append("Not started: "); - } - if (!message_.empty()) { - ret.append(message_); - } - return ret; - } - - void Reset() { - state_ = EXEC_NOT_STARTED; - message_ = ""; - } - - bool IsSucceed() { - return state_ == EXEC_SUCCEED; - } - - bool IsNotStarted() { - return state_ == EXEC_NOT_STARTED; - } - - bool IsFailed() { - return state_ == EXEC_FAILED; - } - - static LDBCommandExecuteResult SUCCEED(std::string msg) { - return LDBCommandExecuteResult(EXEC_SUCCEED, msg); - } +using std::string; +using std::map; +using std::vector; +using std::ostringstream; - static LDBCommandExecuteResult FAILED(std::string msg) { - return LDBCommandExecuteResult(EXEC_FAILED, msg); - } - -private: - State state_; - std::string message_; - - bool operator==(const LDBCommandExecuteResult&); - bool operator!=(const LDBCommandExecuteResult&); -}; +namespace leveldb { class LDBCommand { public: - /* Constructor */ - LDBCommand(std::string& db_name, std::vector& args) : - db_path_(db_name), - db_(NULL) { - parse_open_args(args); - } - - LDBCommand(std::vector& args) : - db_path_(""), - db_(NULL) { - parse_open_args(args); - } + // Command-line arguments + static const string ARG_DB; + static const string ARG_HEX; + static const string ARG_KEY_HEX; + static const string ARG_VALUE_HEX; + static const string ARG_FROM; + static const string ARG_TO; + static const string ARG_MAX_KEYS; + static const string ARG_BLOOM_BITS; + static const string ARG_COMPRESSION_TYPE; + static const string ARG_BLOCK_SIZE; + static const string ARG_AUTO_COMPACTION; + static const string ARG_WRITE_BUFFER_SIZE; + static const string ARG_FILE_SIZE; + static const string ARG_CREATE_IF_MISSING; + + static LDBCommand* InitFromCmdLineArgs(const vector& args); + static LDBCommand* InitFromCmdLineArgs(int argc, char** argv); + bool ValidateCmdLineOptions(); virtual leveldb::Options PrepareOptionsForOpenDB(); @@ -117,23 +67,6 @@ public: } } - /* Print the help message */ - static void Help(std::string& ret) { - ret.append("--db=DB_PATH ["); - ret.append(LDBCommand::BLOOM_ARG); - ret.append("] ["); - ret.append(LDBCommand::COMPRESSION_TYPE_ARG); - ret.append("] ["); - ret.append(LDBCommand::BLOCK_SIZE); - ret.append("] ["); - ret.append(LDBCommand::AUTO_COMPACTION); - ret.append("] ["); - ret.append(LDBCommand::WRITE_BUFFER_SIZE_ARG); - ret.append("] ["); - ret.append(LDBCommand::FILE_SIZE_ARG); - ret.append("] "); - } - /* Run the command, and return the execute result. */ void Run() { if (!exec_state_.IsNotStarted()) { @@ -146,7 +79,7 @@ public: return; } } - + DoCommand(); if (exec_state_.IsNotStarted()) { exec_state_ = LDBCommandExecuteResult::SUCCEED(""); @@ -167,9 +100,15 @@ public: exec_state_.Reset(); } - static std::string HexToString(const std::string& str) { - std::string parsed; - for (unsigned int i = 0; i < str.length();) { + static string HexToString(const string& str) { + string parsed; + if (!boost::starts_with(str, "0x")) { + fprintf(stderr, "Invalid hex input %s. Must start with 0x\n", + str.c_str()); + throw "Invalid hex input"; + } + + for (unsigned int i = 2; i < str.length();) { int c; sscanf(str.c_str() + i, "%2X", &c); parsed.push_back(c); @@ -178,8 +117,8 @@ public: return parsed; } - static std::string StringToHex(const std::string& str) { - std::string result; + static string StringToHex(const string& str) { + string result = "0x"; char buf[10]; for (size_t i = 0; i < str.length(); i++) { snprintf(buf, 10, "%02X", (unsigned char)str[i]); @@ -189,43 +128,74 @@ public: } static const char* DELIM; - static bool ParseKeyValue(const std::string& line, - std::string* key, - std::string* value, - bool hex) { - size_t pos = line.find(DELIM); - if (pos != std::string::npos) { - (*key) = line.substr(0, pos); - (*value) = line.substr(pos + strlen(DELIM)); - if (hex) { - (*key) = HexToString(*key); - (*value) = HexToString(*value); - } - return true; - } else { - return false; + +protected: + + LDBCommandExecuteResult exec_state_; + std::string db_path_; + leveldb::DB* db_; + + /** + * true implies that this command can work if the db is opened in read-only + * mode. + */ + bool is_read_only_; + + /** If true, the key is input/output as hex in get/put/scan/delete etc. */ + bool is_key_hex_; + + /** If true, the value is input/output as hex in get/put/scan/delete etc. */ + bool is_value_hex_; + + /** + * Map of options passed on the command-line. + */ + const map options_; + + /** + * Flags passed on the command-line. + */ + const vector flags_; + + /** List of command-line options valid for this command */ + const vector valid_cmd_line_options_; + + bool ParseKeyValue(const string& line, string* key, string* value, + bool is_key_hex, bool is_value_hex); + + LDBCommand(const map& options, const vector& flags, + bool is_read_only, const vector& valid_cmd_line_options) : + db_(NULL), + is_read_only_(is_read_only), + is_key_hex_(false), + is_value_hex_(false), + options_(options), + flags_(flags), + valid_cmd_line_options_(valid_cmd_line_options) { + + map::const_iterator itr = options.find(ARG_DB); + if (itr != options.end()) { + db_path_ = itr->second; } - } - static std::string PrintKeyValue(const std::string& key, - const std::string& value, - bool hex) { - std::string result; - result.append(hex ? StringToHex(key) : key); - result.append(DELIM); - result.append(hex ? StringToHex(value) : value); - return result; + is_key_hex_ = IsKeyHex(options, flags); + is_value_hex_ = IsValueHex(options, flags); } -protected: - void OpenDB() { leveldb::Options opt = PrepareOptionsForOpenDB(); if (!exec_state_.IsNotStarted()) { return; } // Open the DB. - leveldb::Status st = leveldb::DB::Open(opt, db_path_, &db_); + leveldb::Status st; + if (is_read_only_) { + //st = leveldb::DB::OpenForReadOnly(opt, db_path_, &db_); + // Could not get this to work + st = leveldb::DB::Open(opt, db_path_, &db_); + } else { + st = leveldb::DB::Open(opt, db_path_, &db_); + } if (!st.ok()) { std::string msg = st.ToString(); exec_state_ = LDBCommandExecuteResult::FAILED(msg); @@ -239,109 +209,181 @@ protected: } } - static const char* FROM_ARG; - static const char* END_ARG; - static const char* HEX_ARG; - LDBCommandExecuteResult exec_state_; - std::string db_path_; - leveldb::DB* db_; + static string PrintKeyValue(const string& key, const string& value, + bool is_key_hex, bool is_value_hex) { + string result; + result.append(is_key_hex ? StringToHex(key) : key); + result.append(DELIM); + result.append(is_value_hex ? StringToHex(value) : value); + return result; + } + + static string PrintKeyValue(const string& key, const string& value, + bool is_hex) { + return PrintKeyValue(key, value, is_hex, is_hex); + } + + /** + * Return true if the specified flag is present in the specified flags vector + */ + static bool IsFlagPresent(const vector& flags, const string& flag) { + return (std::find(flags.begin(), flags.end(), flag) != flags.end()); + } + + static string HelpRangeCmdArgs() { + ostringstream str_stream; + str_stream << " "; + str_stream << "[--" << ARG_FROM << "] "; + str_stream << "[--" << ARG_TO << "] "; + return str_stream.str(); + } + + /** + * A helper function that returns a list of command line options + * used by this command. It includes the common options and the ones + * passed in. + */ + vector BuildCmdLineOptions(vector options) { + vector ret = {ARG_DB, ARG_BLOOM_BITS, ARG_BLOCK_SIZE, + ARG_AUTO_COMPACTION, ARG_COMPRESSION_TYPE, + ARG_WRITE_BUFFER_SIZE, ARG_FILE_SIZE}; + ret.insert(ret.end(), options.begin(), options.end()); + return ret; + } + + bool ParseIntOption(const map& options, string option, + int& value, LDBCommandExecuteResult& exec_state); private: - static const char* BLOOM_ARG; - static const char* COMPRESSION_TYPE_ARG; - static const char* BLOCK_SIZE; - static const char* AUTO_COMPACTION; - static const char* WRITE_BUFFER_SIZE_ARG; - static const char* FILE_SIZE_ARG; - std::vector open_args_; - void parse_open_args(std::vector& args); + /** + * Interpret command line options and flags to determine if the key + * should be input/output in hex. + */ + bool IsKeyHex(const map& options, + const vector& flags) { + return (IsFlagPresent(flags, ARG_HEX) || + IsFlagPresent(flags, ARG_KEY_HEX) || + ParseBooleanOption(options, ARG_HEX, false) || + ParseBooleanOption(options, ARG_KEY_HEX, false)); + } + + /** + * Interpret command line options and flags to determine if the value + * should be input/output in hex. + */ + bool IsValueHex(const map& options, + const vector& flags) { + return (IsFlagPresent(flags, ARG_HEX) || + IsFlagPresent(flags, ARG_VALUE_HEX) || + ParseBooleanOption(options, ARG_HEX, false) || + ParseBooleanOption(options, ARG_VALUE_HEX, false)); + } + + /** + * Returns the value of the specified option as a boolean. + * default_val is used if the option is not found in options. + * Throws an exception if the value of the option is not + * "true" or "false" (case insensitive). + */ + bool ParseBooleanOption(const map& options, + const string& option, bool default_val) { + + map::const_iterator itr = options.find(option); + if (itr != options.end()) { + string option_val = itr->second; + return StringToBool(itr->second); + } + return default_val; + } + + /** + * Converts val to a boolean. + * val must be either true or false (case insensitive). + * Otherwise an exception is thrown. + */ + bool StringToBool(string val) { + boost::algorithm::to_lower(val); + if (val == "true") { + return true; + } else if (val == "false") { + return false; + } else { + throw "Invalid value for boolean argument"; + } + } + }; -class Compactor: public LDBCommand { +class CompactorCommand: public LDBCommand { public: - Compactor(std::string& db_name, std::vector& args); + static string Name() { return "compact"; } - virtual ~Compactor() {} + CompactorCommand(const vector& params, + const map& options, const vector& flags); - static void Help(std::string& ret); + static void Help(string& ret); virtual void DoCommand(); private: bool null_from_; - std::string from_; + string from_; bool null_to_; - std::string to_; - bool hex_; + string to_; }; -class DBDumper: public LDBCommand { +class DBDumperCommand: public LDBCommand { public: - DBDumper(std::string& db_name, std::vector& args); - virtual ~DBDumper() {} - static void Help(std::string& ret); + static string Name() { return "dump"; } + + DBDumperCommand(const vector& params, + const map& options, const vector& flags); + + static void Help(string& ret); + virtual void DoCommand(); + private: bool null_from_; - std::string from_; + string from_; bool null_to_; - std::string to_; + string to_; int max_keys_; bool count_only_; bool print_stats_; - bool hex_; - bool hex_output_; - static const char* MAX_KEYS_ARG; - static const char* COUNT_ONLY_ARG; - static const char* STATS_ARG; - static const char* HEX_OUTPUT_ARG; + static const string ARG_COUNT_ONLY; + static const string ARG_STATS; }; -class DBLoader: public LDBCommand { +class DBLoaderCommand: public LDBCommand { public: - DBLoader(std::string& db_name, std::vector& args); - virtual ~DBLoader() {} - static void Help(std::string& ret); + static string Name() { return "load"; } + + DBLoaderCommand(string& db_name, vector& args); + + DBLoaderCommand(const vector& params, + const map& options, const vector& flags); + + static void Help(string& ret); virtual void DoCommand(); virtual leveldb::Options PrepareOptionsForOpenDB(); private: - bool hex_input_; bool create_if_missing_; bool disable_wal_; - static const char* HEX_INPUT_ARG; - static const char* CREATE_IF_MISSING_ARG; - static const char* DISABLE_WAL_ARG; + static const string ARG_DISABLE_WAL; }; -class DBQuerier: public LDBCommand { +class ReduceDBLevelsCommand : public LDBCommand { public: - DBQuerier(std::string& db_name, std::vector& args); - virtual ~DBQuerier() {} - static void Help(std::string& ret); - virtual void DoCommand(); - -private: - bool hex_; - - static const char* HEX_ARG; - - static const char* HELP_CMD; - static const char* GET_CMD; - static const char* PUT_CMD; - static const char* DELETE_CMD; -}; + static string Name() { return "reduce_levels"; } -class ReduceDBLevels : public LDBCommand { -public: - - ReduceDBLevels (std::string& db_name, std::vector& args); - - ~ReduceDBLevels() {} + ReduceDBLevelsCommand(const vector& params, + const map& options, const vector& flags); virtual leveldb::Options PrepareOptionsForOpenDB(); @@ -351,8 +393,9 @@ public: return true; } - static void Help(std::string& msg); - static std::vector PrepareArgs(int new_levels, + static void Help(string& msg); + + static vector PrepareArgs(const string& db_path, int new_levels, bool print_old_level = false); private: @@ -360,30 +403,159 @@ private: int new_levels_; bool print_old_levels_; - static const char* NEW_LEVLES_ARG; - static const char* PRINT_OLD_LEVELS_ARG; + static const string ARG_NEW_LEVELS; + static const string ARG_PRINT_OLD_LEVELS; Status GetOldNumOfLevels(leveldb::Options& opt, int* levels); }; -class WALDumper : public LDBCommand { +class WALDumperCommand : public LDBCommand { public: + static string Name() { return "dump_wal"; } - WALDumper (std::vector& args); - - ~WALDumper() {} + WALDumperCommand(const vector& params, + const map& options, const vector& flags); virtual bool NoDBOpen() { return true; } - static void Help(std::string& ret); + static void Help(string& ret); virtual void DoCommand(); + private: bool print_header_; - std::string wal_file_; + string wal_file_; + + static const string ARG_WAL_FILE; + static const string ARG_PRINT_HEADER; +}; + + +class GetCommand : public LDBCommand { +public: + static string Name() { return "get"; } + + GetCommand(const vector& params, const map& options, + const vector& flags); + + virtual void DoCommand(); + + static void Help(string& ret); + +private: + string key_; +}; + +class ApproxSizeCommand : public LDBCommand { +public: + static string Name() { return "approxsize"; } + + ApproxSizeCommand(const vector& params, + const map& options, const vector& flags); + + virtual void DoCommand(); + + static void Help(string& ret); + +private: + string start_key_; + string end_key_; +}; + +class BatchPutCommand : public LDBCommand { +public: + static string Name() { return "batchput"; } + + BatchPutCommand(const vector& params, + const map& options, const vector& flags); + + virtual void DoCommand(); + + static void Help(string& ret); + + virtual leveldb::Options PrepareOptionsForOpenDB(); + +private: + /** + * The key-values to be inserted. + */ + vector> key_values_; +}; + +class ScanCommand : public LDBCommand { +public: + static string Name() { return "scan"; } - static const char* WAL_FILE_ARG; + ScanCommand(const vector& params, const map& options, + const vector& flags); + + virtual void DoCommand(); + + static void Help(string& ret); + +private: + string start_key_; + string end_key_; + bool start_key_specified_; + bool end_key_specified_; + int max_keys_scanned_; }; + +class DeleteCommand : public LDBCommand { +public: + static string Name() { return "delete"; } + + DeleteCommand(const vector& params, + const map& options, const vector& flags); + + virtual void DoCommand(); + + static void Help(string& ret); + +private: + string key_; +}; + +class PutCommand : public LDBCommand { +public: + static string Name() { return "put"; } + + PutCommand(const vector& params, const map& options, + const vector& flags); + + virtual void DoCommand(); + + static void Help(string& ret); + + virtual leveldb::Options PrepareOptionsForOpenDB(); + +private: + string key_; + string value_; +}; + +/** + * Command that starts up a REPL shell that allows + * get/put/delete. + */ +class DBQuerierCommand: public LDBCommand { +public: + static string Name() { return "query"; } + + DBQuerierCommand(const vector& params, + const map& options, const vector& flags); + + static void Help(string& ret); + + virtual void DoCommand(); + +private: + static const char* HELP_CMD; + static const char* GET_CMD; + static const char* PUT_CMD; + static const char* DELETE_CMD; +}; + } #endif diff --git a/util/ldb_cmd_execute_result.h b/util/ldb_cmd_execute_result.h new file mode 100644 index 000000000..fb94a20b9 --- /dev/null +++ b/util/ldb_cmd_execute_result.h @@ -0,0 +1,74 @@ +#ifndef LEVELDB_UTIL_LDB_CMD_EXECUTE_RESULT_H_ +#define LEVELDB_UTIL_LDB_CMD_EXECUTE_RESULT_H_ + +namespace leveldb { + +class LDBCommandExecuteResult { +public: + enum State { + EXEC_NOT_STARTED = 0, EXEC_SUCCEED = 1, EXEC_FAILED = 2, + }; + + LDBCommandExecuteResult() { + state_ = EXEC_NOT_STARTED; + message_ = ""; + } + + LDBCommandExecuteResult(State state, std::string& msg) { + state_ = state; + message_ = msg; + } + + std::string ToString() { + std::string ret; + switch (state_) { + case EXEC_SUCCEED: + break; + case EXEC_FAILED: + ret.append("Failed: "); + break; + case EXEC_NOT_STARTED: + ret.append("Not started: "); + } + if (!message_.empty()) { + ret.append(message_); + } + return ret; + } + + void Reset() { + state_ = EXEC_NOT_STARTED; + message_ = ""; + } + + bool IsSucceed() { + return state_ == EXEC_SUCCEED; + } + + bool IsNotStarted() { + return state_ == EXEC_NOT_STARTED; + } + + bool IsFailed() { + return state_ == EXEC_FAILED; + } + + static LDBCommandExecuteResult SUCCEED(std::string msg) { + return LDBCommandExecuteResult(EXEC_SUCCEED, msg); + } + + static LDBCommandExecuteResult FAILED(std::string msg) { + return LDBCommandExecuteResult(EXEC_FAILED, msg); + } + +private: + State state_; + std::string message_; + + bool operator==(const LDBCommandExecuteResult&); + bool operator!=(const LDBCommandExecuteResult&); +}; + +} + +#endif diff --git a/util/options.cc b/util/options.cc index 66724d4e6..ea58893ab 100644 --- a/util/options.cc +++ b/util/options.cc @@ -23,11 +23,9 @@ Options::Options() write_buffer_size(4<<20), max_write_buffer_number(2), max_open_files(1000), - block_cache(NULL), block_size(4096), block_restart_interval(16), compression(kSnappyCompression), - compression_per_level(NULL), filter_policy(NULL), num_levels(7), level0_file_num_compaction_trigger(4), @@ -57,31 +55,32 @@ Options::Options() compaction_filter_args(NULL), CompactionFilter(NULL), disable_auto_compactions(false), - WAL_ttl_seconds(0){ + WAL_ttl_seconds(0), + manifest_preallocation_size(4 * 1024 * 1024) { + } void -Options::Dump( - Logger * log) const +Options::Dump(Logger* log) const { Log(log," Options.comparator: %s", comparator->Name()); Log(log," Options.create_if_missing: %d", create_if_missing); Log(log," Options.error_if_exists: %d", error_if_exists); Log(log," Options.paranoid_checks: %d", paranoid_checks); Log(log," Options.env: %p", env); - Log(log," Options.info_log: %p", info_log); + Log(log," Options.info_log: %p", info_log.get()); Log(log," Options.write_buffer_size: %zd", write_buffer_size); Log(log," Options.max_write_buffer_number: %d", max_write_buffer_number); Log(log," Options.max_open_files: %d", max_open_files); - Log(log," Options.block_cache: %p", block_cache); + Log(log," Options.block_cache: %p", block_cache.get()); if (block_cache) { Log(log," Options.block_cache_size: %zd", block_cache->GetCapacity()); } Log(log," Options.block_size: %zd", block_size); Log(log," Options.block_restart_interval: %d", block_restart_interval); - if (compression_per_level != NULL) { - for (int i = 0; i < num_levels; i++){ + if (!compression_per_level.empty()) { + for (int i = 0; i < compression_per_level.size(); i++) { Log(log," Options.compression[%d]: %d", i, compression_per_level[i]); } @@ -146,8 +145,10 @@ Options::Dump( CompactionFilter); Log(log," Options.disable_auto_compactions: %d", disable_auto_compactions); - Log(log," Options.WAL_ttl_seconds: %ld", + Log(log," Options.WAL_ttl_seconds: %ld", WAL_ttl_seconds); + Log(log," Options.manifest_preallocation_size: %ld", + manifest_preallocation_size); } // Options::Dump } // namespace leveldb diff --git a/util/posix_logger.h b/util/posix_logger.h index 513528314..5443c7733 100644 --- a/util/posix_logger.h +++ b/util/posix_logger.h @@ -12,19 +12,27 @@ #include #include #include +#include +#include +#ifdef OS_LINUX +#include +#endif #include "leveldb/env.h" namespace leveldb { +const int kDebugLogChunkSize = 128 * 1024; + class PosixLogger : public Logger { private: FILE* file_; uint64_t (*gettid_)(); // Return the thread id for the current thread size_t log_size_; + int fd_; public: PosixLogger(FILE* f, uint64_t (*gettid)()) : - file_(f), gettid_(gettid), log_size_(0) { } + file_(f), gettid_(gettid), log_size_(0), fd_(fileno(f)) { } virtual ~PosixLogger() { fclose(file_); } @@ -86,9 +94,27 @@ class PosixLogger : public Logger { } assert(p <= limit); - fwrite(base, 1, p - base, file_); + +#ifdef OS_LINUX + // If this write would cross a boundary of kDebugLogChunkSize + // space, pre-allocate more space to avoid overly large + // allocations from filesystem allocsize options. + const size_t write_size = p - base; + const int last_allocation_chunk = + ((kDebugLogChunkSize - 1 + log_size_) / kDebugLogChunkSize); + const int desired_allocation_chunk = + ((kDebugLogChunkSize - 1 + log_size_ + write_size) / + kDebugLogChunkSize); + if (last_allocation_chunk != desired_allocation_chunk) { + fallocate(fd_, FALLOC_FL_KEEP_SIZE, 0, + desired_allocation_chunk * kDebugLogChunkSize); + } +#endif + + fwrite(base, 1, write_size, file_); fflush(file_); - log_size_ += (p - base); + + log_size_ += write_size; if (base != buffer) { delete[] base; diff --git a/util/testutil.h b/util/testutil.h index 824e655bd..8b200db97 100644 --- a/util/testutil.h +++ b/util/testutil.h @@ -37,10 +37,10 @@ class ErrorEnv : public EnvWrapper { num_writable_file_errors_(0) { } virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { + unique_ptr* result) { + result->reset(); if (writable_file_error_) { ++num_writable_file_errors_; - *result = NULL; return Status::IOError(fname, "fake error"); } return target()->NewWritableFile(fname, result);