From: Yu Zhang Date: Tue, 8 Oct 2024 18:31:51 +0000 (-0700) Subject: Fix a bug for surfacing write unix time (#13057) X-Git-Tag: v9.7.2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2fef013616ac8e474d19f6b0815155a37ae9350f;p=rocksdb.git Fix a bug for surfacing write unix time (#13057) Summary: The write unix time from non L0 files are not surfaced properly because the level's wrapper iterator doesn't have a `write_unix_time` implementation that delegates to the corresponding file. The unit test didn't catch this because it incorrectly destroy the old db and reopen to check write time, instead of just reopen and check. This fix also include a change to support ldb's scan command to get write time for easier debugging. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13057 Test Plan: Updated unit tests Reviewed By: pdillinger Differential Revision: D64015107 Pulled By: jowlyzhang fbshipit-source-id: 244474f78a034f80c9235eea2aa8a0f4e54dff59 --- diff --git a/db/compaction/tiered_compaction_test.cc b/db/compaction/tiered_compaction_test.cc index 93b979121..6be3c63eb 100644 --- a/db/compaction/tiered_compaction_test.cc +++ b/db/compaction/tiered_compaction_test.cc @@ -2512,6 +2512,7 @@ TEST_P(IteratorWriteTimeTest, ReadFromMemtables) { start_time + kSecondsPerRecording * (i + 1)); } } + ASSERT_EQ(kNumKeys, i); ASSERT_OK(iter->status()); } @@ -2531,12 +2532,13 @@ TEST_P(IteratorWriteTimeTest, ReadFromMemtables) { } } ASSERT_OK(iter->status()); + ASSERT_EQ(-1, i); } // Reopen the DB and disable the seqno to time recording, data with user // specified write time can still get a write time before it's flushed. options.preserve_internal_time_seconds = 0; - DestroyAndReopen(options); + Reopen(options); ASSERT_OK(TimedPut(Key(kKeyWithWriteTime), rnd.RandomString(100), kUserSpecifiedWriteTime)); { @@ -2613,6 +2615,7 @@ TEST_P(IteratorWriteTimeTest, ReadFromSstFile) { } } ASSERT_OK(iter->status()); + ASSERT_EQ(kNumKeys, i); } // Backward iteration @@ -2632,12 +2635,13 @@ TEST_P(IteratorWriteTimeTest, ReadFromSstFile) { } } ASSERT_OK(iter->status()); + ASSERT_EQ(-1, i); } // Reopen the DB and disable the seqno to time recording. Data retrieved from // SST files still have write time available. options.preserve_internal_time_seconds = 0; - DestroyAndReopen(options); + Reopen(options); dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(kSecondsPerRecording); }); @@ -2663,6 +2667,7 @@ TEST_P(IteratorWriteTimeTest, ReadFromSstFile) { start_time + kSecondsPerRecording * (i + 1)); } } + ASSERT_EQ(kNumKeys, i); ASSERT_OK(iter->status()); } @@ -2686,6 +2691,7 @@ TEST_P(IteratorWriteTimeTest, ReadFromSstFile) { VerifyKeyAndWriteTime(iter.get(), Key(i), 0); } ASSERT_OK(iter->status()); + ASSERT_EQ(kNumKeys, i); } Close(); } diff --git a/db/forward_iterator.cc b/db/forward_iterator.cc index 0bf7c15ab..528978d18 100644 --- a/db/forward_iterator.cc +++ b/db/forward_iterator.cc @@ -167,6 +167,10 @@ class ForwardLevelIterator : public InternalIterator { assert(valid_); return file_iter_->value(); } + uint64_t write_unix_time() const override { + assert(valid_); + return file_iter_->write_unix_time(); + } Status status() const override { if (!status_.ok()) { return status_; diff --git a/db/version_set.cc b/db/version_set.cc index d28b2e2d9..eeaf5b492 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1050,6 +1050,11 @@ class LevelIterator final : public InternalIterator { return file_iter_.value(); } + uint64_t write_unix_time() const override { + assert(Valid()); + return file_iter_.write_unix_time(); + } + Status status() const override { return file_iter_.iter() ? file_iter_.status() : Status::OK(); } diff --git a/include/rocksdb/utilities/ldb_cmd.h b/include/rocksdb/utilities/ldb_cmd.h index c3a12b694..55d5663be 100644 --- a/include/rocksdb/utilities/ldb_cmd.h +++ b/include/rocksdb/utilities/ldb_cmd.h @@ -74,6 +74,7 @@ class LDBCommand { static const std::string ARG_DECODE_BLOB_INDEX; static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS; static const std::string ARG_READ_TIMESTAMP; + static const std::string ARG_GET_WRITE_UNIX_TIME; struct ParsedParams { std::string cmd; diff --git a/table/iterator.cc b/table/iterator.cc index 8306f5a04..999522575 100644 --- a/table/iterator.cc +++ b/table/iterator.cc @@ -74,6 +74,10 @@ class EmptyInternalIterator : public InternalIteratorBase { assert(false); return TValue(); } + uint64_t write_unix_time() const override { + assert(false); + return std::numeric_limits::max(); + } Status status() const override { return status_; } private: diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index e954e7007..3ce811972 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -27,6 +27,7 @@ #include "db/write_batch_internal.h" #include "file/filename.h" #include "rocksdb/cache.h" +#include "rocksdb/comparator.h" #include "rocksdb/experimental.h" #include "rocksdb/file_checksum.h" #include "rocksdb/filter_policy.h" @@ -110,6 +111,7 @@ const std::string LDBCommand::ARG_DECODE_BLOB_INDEX = "decode_blob_index"; const std::string LDBCommand::ARG_DUMP_UNCOMPRESSED_BLOBS = "dump_uncompressed_blobs"; const std::string LDBCommand::ARG_READ_TIMESTAMP = "read_timestamp"; +const std::string LDBCommand::ARG_GET_WRITE_UNIX_TIME = "get_write_unix_time"; const char* LDBCommand::DELIM = " ==> "; @@ -3622,11 +3624,12 @@ void BatchPutCommand::OverrideBaseOptions() { ScanCommand::ScanCommand(const std::vector& /*params*/, const std::map& options, const std::vector& flags) - : LDBCommand(options, flags, true, - BuildCmdLineOptions( - {ARG_TTL, ARG_NO_VALUE, ARG_HEX, ARG_KEY_HEX, ARG_TO, - ARG_VALUE_HEX, ARG_FROM, ARG_TIMESTAMP, ARG_MAX_KEYS, - ARG_TTL_START, ARG_TTL_END, ARG_READ_TIMESTAMP})), + : LDBCommand( + options, flags, true, + BuildCmdLineOptions({ARG_TTL, ARG_NO_VALUE, ARG_HEX, ARG_KEY_HEX, + ARG_TO, ARG_VALUE_HEX, ARG_FROM, ARG_TIMESTAMP, + ARG_MAX_KEYS, ARG_TTL_START, ARG_TTL_END, + ARG_READ_TIMESTAMP, ARG_GET_WRITE_UNIX_TIME})), start_key_specified_(false), end_key_specified_(false), max_keys_scanned_(-1), @@ -3670,6 +3673,7 @@ ScanCommand::ScanCommand(const std::vector& /*params*/, ARG_MAX_KEYS + " has a value out-of-range"); } } + get_write_unix_time_ = IsFlagPresent(flags_, ARG_GET_WRITE_UNIX_TIME); } void ScanCommand::Help(std::string& ret) { @@ -3683,6 +3687,7 @@ void ScanCommand::Help(std::string& ret) { ret.append(" [--" + ARG_TTL_END + "=:- is exclusive]"); ret.append(" [--" + ARG_NO_VALUE + "]"); ret.append(" [--" + ARG_READ_TIMESTAMP + "=] "); + ret.append(" [--" + ARG_GET_WRITE_UNIX_TIME + "]"); ret.append("\n"); } @@ -3765,6 +3770,22 @@ void ScanCommand::DoCommand() { fprintf(stdout, "%s\n", str.c_str()); } + if (get_write_unix_time_) { + std::string write_unix_time; + uint64_t write_time_int = std::numeric_limits::max(); + Status s = + it->GetProperty("rocksdb.iterator.write-time", &write_unix_time); + if (s.ok()) { + s = DecodeU64Ts(write_unix_time, &write_time_int); + } + if (!s.ok()) { + fprintf(stdout, " Failed to get write unix time: %s\n", + s.ToString().c_str()); + } else { + fprintf(stdout, " write unix time: %s\n", + std::to_string(write_time_int).c_str()); + } + } num_keys_scanned++; if (max_keys_scanned_ >= 0 && num_keys_scanned >= max_keys_scanned_) { break; diff --git a/tools/ldb_cmd_impl.h b/tools/ldb_cmd_impl.h index 73130401e..3f7273dd5 100644 --- a/tools/ldb_cmd_impl.h +++ b/tools/ldb_cmd_impl.h @@ -511,6 +511,7 @@ class ScanCommand : public LDBCommand { bool end_key_specified_; int max_keys_scanned_; bool no_value_; + bool get_write_unix_time_; }; class DeleteCommand : public LDBCommand {