* Add transaction `get_pinned` and `multi_get` to C API.
* Add two-phase commit support to C API.
* Add `rocksdb_transaction_get_writebatch_wi` and `rocksdb_transaction_rebuild_from_writebatch` to C API.
+* Add `rocksdb_options_get_blob_file_starting_level` and `rocksdb_options_set_blob_file_starting_level` to C API.
+* Add `blobFileStartingLevel` and `setBlobFileStartingLevel` to Java API.
* Add SingleDelete for DB in C API
* Add User Defined Timestamp in C API.
* `rocksdb_comparator_with_ts_create` to create timestamp aware comparator
* Add FileSystem::ReadAsync API in io_tracing
* Add blob garbage collection parameters `blob_garbage_collection_policy` and `blob_garbage_collection_age_cutoff` to both force-enable and force-disable GC, as well as selectively override age cutoff when using CompactRange.
* Add an extra sanity check in `GetSortedWalFiles()` (also used by `GetLiveFilesStorageInfo()`, `BackupEngine`, and `Checkpoint`) to reduce risk of successfully created backup or checkpoint failing to open because of missing WAL file.
+* Add a new column family option `blob_file_starting_level` to enable writing blob files during flushes and compactions starting from the specified LSM tree level.
### Behavior changes
* DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984)
Close();
}
+TEST_F(DBBlobCompactionTest, BlobCompactWithStartingLevel) {
+ Options options = GetDefaultOptions();
+
+ options.enable_blob_files = true;
+ options.min_blob_size = 1000;
+ options.blob_file_starting_level = 5;
+ options.create_if_missing = true;
+
+ // Open DB with fixed-prefix sst-partitioner so that compaction will cut
+ // new table file when encountering a new key whose 1-byte prefix changes.
+ constexpr size_t key_len = 1;
+ options.sst_partitioner_factory =
+ NewSstPartitionerFixedPrefixFactory(key_len);
+
+ ASSERT_OK(TryReopen(options));
+
+ constexpr size_t blob_size = 3000;
+
+ constexpr char first_key[] = "a";
+ const std::string first_blob(blob_size, 'a');
+ ASSERT_OK(Put(first_key, first_blob));
+
+ constexpr char second_key[] = "b";
+ const std::string second_blob(2 * blob_size, 'b');
+ ASSERT_OK(Put(second_key, second_blob));
+
+ constexpr char third_key[] = "d";
+ const std::string third_blob(blob_size, 'd');
+ ASSERT_OK(Put(third_key, third_blob));
+
+ ASSERT_OK(Flush());
+
+ constexpr char fourth_key[] = "c";
+ const std::string fourth_blob(blob_size, 'c');
+ ASSERT_OK(Put(fourth_key, fourth_blob));
+
+ ASSERT_OK(Flush());
+
+ ASSERT_EQ(0, GetBlobFileNumbers().size());
+ ASSERT_EQ(2, NumTableFilesAtLevel(/*level=*/0));
+ ASSERT_EQ(0, NumTableFilesAtLevel(/*level=*/1));
+
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
+ /*end=*/nullptr));
+
+ // No blob file should be created since blob_file_starting_level is 5.
+ ASSERT_EQ(0, GetBlobFileNumbers().size());
+ ASSERT_EQ(0, NumTableFilesAtLevel(/*level=*/0));
+ ASSERT_EQ(4, NumTableFilesAtLevel(/*level=*/1));
+
+ {
+ options.blob_file_starting_level = 1;
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put(first_key, first_blob));
+ ASSERT_OK(Put(second_key, second_blob));
+ ASSERT_OK(Put(third_key, third_blob));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put(fourth_key, fourth_blob));
+ ASSERT_OK(Flush());
+
+ ASSERT_EQ(0, GetBlobFileNumbers().size());
+ ASSERT_EQ(2, NumTableFilesAtLevel(/*level=*/0));
+ ASSERT_EQ(0, NumTableFilesAtLevel(/*level=*/1));
+
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
+ /*end=*/nullptr));
+ // The compaction's output level equals to blob_file_starting_level.
+ ASSERT_EQ(1, GetBlobFileNumbers().size());
+ ASSERT_EQ(0, NumTableFilesAtLevel(/*level=*/0));
+ ASSERT_EQ(4, NumTableFilesAtLevel(/*level=*/1));
+ }
+
+ Close();
+}
+
TEST_F(DBBlobCompactionTest, BlindWriteFilter) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
snapshots.empty() ? 0 : snapshots.back(), snapshot_checker);
std::unique_ptr<BlobFileBuilder> blob_file_builder(
- (mutable_cf_options.enable_blob_files && blob_file_additions)
+ (mutable_cf_options.enable_blob_files &&
+ tboptions.level_at_creation >=
+ mutable_cf_options.blob_file_starting_level &&
+ blob_file_additions)
? new BlobFileBuilder(
versions, fs, &ioptions, &mutable_cf_options, &file_options,
job_id, tboptions.column_family_id,
return opt->rep.blob_compaction_readahead_size;
}
+void rocksdb_options_set_blob_file_starting_level(rocksdb_options_t* opt,
+ int val) {
+ opt->rep.blob_file_starting_level = val;
+}
+
+int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) {
+ return opt->rep.blob_file_starting_level;
+}
+
void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
opt->rep.num_levels = n;
}
CheckCondition(262144 ==
rocksdb_options_get_blob_compaction_readahead_size(o));
+ rocksdb_options_set_blob_file_starting_level(o, 5);
+ CheckCondition(5 == rocksdb_options_get_blob_file_starting_level(o));
+
// Create a copy that should be equal to the original.
rocksdb_options_t* copy;
copy = rocksdb_options_create_copy(o);
std::vector<std::string> blob_file_paths;
std::unique_ptr<BlobFileBuilder> blob_file_builder(
- mutable_cf_options->enable_blob_files
+ (mutable_cf_options->enable_blob_files &&
+ sub_compact->compaction->output_level() >=
+ mutable_cf_options->blob_file_starting_level)
? new BlobFileBuilder(
versions_, fs_.get(),
sub_compact->compaction->immutable_options(),
DECLARE_double(blob_garbage_collection_age_cutoff);
DECLARE_double(blob_garbage_collection_force_threshold);
DECLARE_uint64(blob_compaction_readahead_size);
+DECLARE_int32(blob_file_starting_level);
DECLARE_int32(approximate_size_one_in);
DECLARE_bool(sync_fault_injection);
"24 bytes. If not specified, it will be evenly distributed");
DEFINE_int32(key_window_scale_factor, 10,
- "This value will be multiplied by 100 to come up with a window "
- "size for varying the key length");
+ "This value will be multiplied by 100 to come up with a window "
+ "size for varying the key length");
DEFINE_int32(column_families, 10, "Number of column families");
.blob_compaction_readahead_size,
"[Integrated BlobDB] Compaction readahead for blob files.");
+DEFINE_int32(
+ blob_file_starting_level,
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_starting_level,
+ "[Integrated BlobDB] Enable writing blob files during flushes and "
+ "compactions starting from the specified level.");
+
static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);
" random key ranges.");
DEFINE_int32(read_fault_one_in, 1000,
- "On non-zero, enables fault injection on read");
+ "On non-zero, enables fault injection on read");
DEFINE_int32(get_property_one_in, 1000,
"If non-zero, then DB::GetProperty() will be called to get various"
std::vector<std::string>{"0.5", "0.75", "1.0"});
options_tbl.emplace("blob_compaction_readahead_size",
std::vector<std::string>{"0", "1M", "4M"});
+ options_tbl.emplace("blob_file_starting_level",
+ std::vector<std::string>{"0", "1", "2"});
}
options_table_ = std::move(options_tbl);
"Integrated BlobDB: blob files enabled %d, min blob size %" PRIu64
", blob file size %" PRIu64
", blob compression type %s, blob GC enabled %d, cutoff %f, force "
- "threshold %f, blob compaction readahead size %" PRIu64 "\n",
+ "threshold %f, blob compaction readahead size %" PRIu64
+ ", blob file starting level %d\n",
options_.enable_blob_files, options_.min_blob_size,
options_.blob_file_size,
CompressionTypeToString(options_.blob_compression_type).c_str(),
options_.enable_blob_garbage_collection,
options_.blob_garbage_collection_age_cutoff,
options_.blob_garbage_collection_force_threshold,
- options_.blob_compaction_readahead_size);
+ options_.blob_compaction_readahead_size,
+ options_.blob_file_starting_level);
fprintf(stdout, "DB path: [%s]\n", FLAGS_db.c_str());
options.blob_garbage_collection_force_threshold =
FLAGS_blob_garbage_collection_force_threshold;
options.blob_compaction_readahead_size = FLAGS_blob_compaction_readahead_size;
+ options.blob_file_starting_level = FLAGS_blob_file_starting_level;
options.wal_compression =
StringToCompressionType(FLAGS_wal_compression.c_str());
// Dynamically changeable through the SetOptions() API
uint64_t blob_compaction_readahead_size = 0;
+ // Enable blob files starting from a certain LSM tree level.
+ //
+ // For certain use cases that have a mix of short-lived and long-lived values,
+ // it might make sense to support extracting large values only during
+ // compactions whose output level is greater than or equal to a specified LSM
+ // tree level (e.g. compactions into L1/L2/... or above). This could reduce
+ // the space amplification caused by large values that are turned into garbage
+ // shortly after being written at the price of some write amplification
+ // incurred by long-lived values whose extraction to blob files is delayed.
+ //
+ // Default: 0
+ //
+ // Dynamically changeable through the SetOptions() API
+ int blob_file_starting_level = 0;
+
// Create ColumnFamilyOptions with default values for all fields
AdvancedColumnFamilyOptions();
// Create ColumnFamilyOptions from Options
extern ROCKSDB_LIBRARY_API uint64_t
rocksdb_options_get_blob_compaction_readahead_size(rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_file_starting_level(
+ rocksdb_options_t* opt, int val);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_file_starting_level(
+ rocksdb_options_t* opt);
+
/* returns a pointer to a malloc()-ed, null terminated string */
extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string(
rocksdb_options_t* opt);
static const std::string ARG_BLOB_GARBAGE_COLLECTION_AGE_CUTOFF;
static const std::string ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD;
static const std::string ARG_BLOB_COMPACTION_READAHEAD_SIZE;
+ static const std::string ARG_BLOB_FILE_STARTING_LEVEL;
static const std::string ARG_DECODE_BLOB_INDEX;
static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS;
return static_cast<jlong>(opts->blob_compaction_readahead_size);
}
+/*
+ * Class: org_rocksdb_Options
+ * Method: setBlobFileStartingLevel
+ * Signature: (JJ)V
+ */
+void Java_org_rocksdb_Options_setBlobFileStartingLevel(
+ JNIEnv*, jobject, jlong jhandle, jint jblob_file_starting_level) {
+ auto* opts = reinterpret_cast<ROCKSDB_NAMESPACE::Options*>(jhandle);
+ opts->blob_file_starting_level = jblob_file_starting_level;
+}
+
+/*
+ * Class: org_rocksdb_Options
+ * Method: blobFileStartingLevel
+ * Signature: (J)J
+ */
+jint Java_org_rocksdb_Options_blobFileStartingLevel(JNIEnv*, jobject,
+ jlong jhandle) {
+ auto* opts = reinterpret_cast<ROCKSDB_NAMESPACE::Options*>(jhandle);
+ return static_cast<jint>(opts->blob_file_starting_level);
+}
+
//////////////////////////////////////////////////////////////////////////////
// ROCKSDB_NAMESPACE::ColumnFamilyOptions
*/
long blobCompactionReadaheadSize();
+ /**
+ * Set a certain LSM tree level to enable blob files.
+ *
+ * Default: 0
+ *
+ * Dynamically changeable through
+ * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}.
+ *
+ * @param setBlobFileStartingLevel the starting level to enable blob files
+ *
+ * @return the reference to the current options.
+ */
+ T setBlobFileStartingLevel(final int blobFileStartingLevel);
+
+ /**
+ * Get the starting LSM tree level to enable blob files.
+ *
+ * Default: 0
+ *
+ * @return the current LSM tree level to enable blob files.
+ */
+ int blobFileStartingLevel();
+
//
// END options for blobs (integrated BlobDB)
//
return blobCompactionReadaheadSize(nativeHandle_);
}
+ /**
+ * Set a certain LSM tree level to enable blob files.
+ *
+ * Default: 0
+ *
+ * Dynamically changeable through
+ * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}.
+ *
+ * @param setBlobFileStartingLevel the starting level to enable blob files
+ *
+ * @return the reference to the current options.
+ */
+ @Override
+ public ColumnFamilyOptions setBlobFileStartingLevel(final int blobFileStartingLevel) {
+ setBlobFileStartingLevel(nativeHandle_, blobFileStartingLevel);
+ return this;
+ }
+
+ /**
+ * Get the starting LSM tree level to enable blob files.
+ *
+ * Default: 0
+ *
+ * @return the current LSM tree level to enable blob files.
+ */
+ @Override
+ public int blobFileStartingLevel() {
+ return blobFileStartingLevel(nativeHandle_);
+ }
+
//
// END options for blobs (integrated BlobDB)
//
private native void setBlobCompactionReadaheadSize(
final long nativeHandle_, final long blobCompactionReadaheadSize);
private native long blobCompactionReadaheadSize(final long nativeHandle_);
+ private native void setBlobFileStartingLevel(
+ final long nativeHandle_, final int blobFileStartingLevel);
+ private native int blobFileStartingLevel(final long nativeHandle_);
// instance variables
// NOTE: If you add new member variables, please update the copy constructor above!
enable_blob_garbage_collection(ValueType.BOOLEAN),
blob_garbage_collection_age_cutoff(ValueType.DOUBLE),
blob_garbage_collection_force_threshold(ValueType.DOUBLE),
- blob_compaction_readahead_size(ValueType.LONG);
+ blob_compaction_readahead_size(ValueType.LONG),
+ blob_file_starting_level(ValueType.INT);
private final ValueType valueType;
BlobOption(final ValueType valueType) {
public long blobCompactionReadaheadSize() {
return getLong(BlobOption.blob_compaction_readahead_size);
}
+
+ @Override
+ public MutableColumnFamilyOptionsBuilder setBlobFileStartingLevel(
+ final int blobFileStartingLevel) {
+ return setInt(BlobOption.blob_file_starting_level, blobFileStartingLevel);
+ }
+
+ @Override
+ public int blobFileStartingLevel() {
+ return getInt(BlobOption.blob_file_starting_level);
+ }
}
}
return blobCompactionReadaheadSize(nativeHandle_);
}
+ @Override
+ public Options setBlobFileStartingLevel(final int blobFileStartingLevel) {
+ setBlobFileStartingLevel(nativeHandle_, blobFileStartingLevel);
+ return this;
+ }
+
+ @Override
+ public int blobFileStartingLevel() {
+ return blobFileStartingLevel(nativeHandle_);
+ }
+
//
// END options for blobs (integrated BlobDB)
//
private native void setBlobCompactionReadaheadSize(
final long nativeHandle_, final long blobCompactionReadaheadSize);
private native long blobCompactionReadaheadSize(final long nativeHandle_);
+ private native void setBlobFileStartingLevel(
+ final long nativeHandle_, final int blobFileStartingLevel);
+ private native int blobFileStartingLevel(final long nativeHandle_);
// instance variables
// NOTE: If you add new member variables, please update the copy constructor above!
assertThat(options.setBlobGarbageCollectionAgeCutoff(0.89)).isEqualTo(options);
assertThat(options.setBlobGarbageCollectionForceThreshold(0.80)).isEqualTo(options);
assertThat(options.setBlobCompactionReadaheadSize(262144L)).isEqualTo(options);
+ assertThat(options.setBlobFileStartingLevel(0)).isEqualTo(options);
assertThat(options.enableBlobFiles()).isEqualTo(true);
assertThat(options.minBlobSize()).isEqualTo(132768L);
assertThat(options.blobGarbageCollectionAgeCutoff()).isEqualTo(0.89);
assertThat(options.blobGarbageCollectionForceThreshold()).isEqualTo(0.80);
assertThat(options.blobCompactionReadaheadSize()).isEqualTo(262144L);
+ assertThat(options.blobFileStartingLevel()).isEqualTo(0);
}
}
.isEqualTo(columnFamilyOptions);
assertThat(columnFamilyOptions.setBlobCompactionReadaheadSize(262144L))
.isEqualTo(columnFamilyOptions);
+ assertThat(columnFamilyOptions.setBlobFileStartingLevel(0)).isEqualTo(columnFamilyOptions);
assertThat(columnFamilyOptions.enableBlobFiles()).isEqualTo(true);
assertThat(columnFamilyOptions.minBlobSize()).isEqualTo(132768L);
assertThat(columnFamilyOptions.blobGarbageCollectionAgeCutoff()).isEqualTo(0.89);
assertThat(columnFamilyOptions.blobGarbageCollectionForceThreshold()).isEqualTo(0.80);
assertThat(columnFamilyOptions.blobCompactionReadaheadSize()).isEqualTo(262144L);
+ assertThat(columnFamilyOptions.blobFileStartingLevel()).isEqualTo(0);
}
}
.setEnableBlobGarbageCollection(true)
.setBlobGarbageCollectionAgeCutoff(0.89)
.setBlobGarbageCollectionForceThreshold(0.80)
- .setBlobCompactionReadaheadSize(262144);
+ .setBlobCompactionReadaheadSize(262144)
+ .setBlobFileStartingLevel(1);
assertThat(builder.enableBlobFiles()).isEqualTo(true);
assertThat(builder.minBlobSize()).isEqualTo(1024);
assertThat(builder.blobGarbageCollectionAgeCutoff()).isEqualTo(0.89);
assertThat(builder.blobGarbageCollectionForceThreshold()).isEqualTo(0.80);
assertThat(builder.blobCompactionReadaheadSize()).isEqualTo(262144);
+ assertThat(builder.blobFileStartingLevel()).isEqualTo(1);
builder.setEnableBlobFiles(false)
.setMinBlobSize(4096)
.setEnableBlobGarbageCollection(false)
.setBlobGarbageCollectionAgeCutoff(0.91)
.setBlobGarbageCollectionForceThreshold(0.96)
- .setBlobCompactionReadaheadSize(1024);
+ .setBlobCompactionReadaheadSize(1024)
+ .setBlobFileStartingLevel(0);
assertThat(builder.enableBlobFiles()).isEqualTo(false);
assertThat(builder.minBlobSize()).isEqualTo(4096);
assertThat(builder.blobGarbageCollectionAgeCutoff()).isEqualTo(0.91);
assertThat(builder.blobGarbageCollectionForceThreshold()).isEqualTo(0.96);
assertThat(builder.blobCompactionReadaheadSize()).isEqualTo(1024);
+ assertThat(builder.blobFileStartingLevel()).isEqualTo(0);
final MutableColumnFamilyOptions options = builder.build();
assertThat(options.getKeys())
.isEqualTo(new String[] {"enable_blob_files", "min_blob_size", "blob_file_size",
"blob_compression_type", "enable_blob_garbage_collection",
"blob_garbage_collection_age_cutoff", "blob_garbage_collection_force_threshold",
- "blob_compaction_readahead_size"});
+ "blob_compaction_readahead_size", "blob_file_starting_level"});
assertThat(options.getValues())
.isEqualTo(new String[] {
"false", "4096", "2048", "LZ4_COMPRESSION", "false", "0.91", "0.96", "1024"});
final String optionsString =
"bottommost_compression=kDisableCompressionOption; sample_for_compression=0; "
+ "blob_garbage_collection_age_cutoff=0.250000; blob_garbage_collection_force_threshold=0.800000; arena_block_size=1048576; enable_blob_garbage_collection=false; "
- + "level0_stop_writes_trigger=36; min_blob_size=65536; blob_compaction_readahead_size=262144; "
+ + "level0_stop_writes_trigger=36; min_blob_size=65536; blob_compaction_readahead_size=262144; blob_file_starting_level=5; "
+ "compaction_options_universal={allow_trivial_move=false;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;"
+ "compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;size_ratio=1;}; "
+ "target_file_size_base=67108864; max_bytes_for_level_base=268435456; memtable_whole_key_filtering=false; "
assertThat(cf.level0StopWritesTrigger()).isEqualTo(36);
assertThat(cf.minBlobSize()).isEqualTo(65536);
assertThat(cf.blobCompactionReadaheadSize()).isEqualTo(262144);
+ assertThat(cf.blobFileStartingLevel()).isEqualTo(5);
assertThat(cf.targetFileSizeBase()).isEqualTo(67108864);
assertThat(cf.maxBytesForLevelBase()).isEqualTo(268435456);
assertThat(cf.softPendingCompactionBytesLimit()).isEqualTo(68719476736L);
.setBlobGarbageCollectionAgeCutoff(0.25)
.setBlobGarbageCollectionForceThreshold(0.80)
.setBlobCompactionReadaheadSize(262144)
+ .setBlobFileStartingLevel(2)
.setArenaBlockSize(42)
.setMemtablePrefixBloomSizeRatio(0.17)
.setMemtableWholeKeyFiltering(false)
assertThat(builder1.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25);
assertThat(builder1.blobGarbageCollectionForceThreshold()).isEqualTo(0.80);
assertThat(builder1.blobCompactionReadaheadSize()).isEqualTo(262144);
+ assertThat(builder1.blobFileStartingLevel()).isEqualTo(2);
assertThat(builder1.minBlobSize()).isEqualTo(minBlobSize);
assertThat(builder1.arenaBlockSize()).isEqualTo(42);
assertThat(builder1.memtablePrefixBloomSizeRatio()).isEqualTo(0.17);
.setBlobGarbageCollectionAgeCutoff(0.25)
.setBlobGarbageCollectionForceThreshold(0.80)
.setBlobCompactionReadaheadSize(262144)
+ .setBlobFileStartingLevel(3)
.setArenaBlockSize(42)
.setMemtablePrefixBloomSizeRatio(0.17)
.setMemtableWholeKeyFiltering(false)
assertThat(builder1.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25);
assertThat(builder1.blobGarbageCollectionForceThreshold()).isEqualTo(0.80);
assertThat(builder1.blobCompactionReadaheadSize()).isEqualTo(262144);
+ assertThat(builder1.blobFileStartingLevel()).isEqualTo(3);
assertThat(builder1.minBlobSize()).isEqualTo(minBlobSize);
assertThat(builder1.arenaBlockSize()).isEqualTo(42);
assertThat(builder1.memtablePrefixBloomSizeRatio()).isEqualTo(0.17);
.setBlobGarbageCollectionAgeCutoff(0.25)
.setBlobGarbageCollectionForceThreshold(0.80)
.setBlobCompactionReadaheadSize(131072)
+ .setBlobFileStartingLevel(4)
.setArenaBlockSize(42)
.setMemtablePrefixBloomSizeRatio(0.17)
.setMemtableWholeKeyFiltering(false)
assertThat(builder1.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25);
assertThat(builder1.blobGarbageCollectionForceThreshold()).isEqualTo(0.80);
assertThat(builder1.blobCompactionReadaheadSize()).isEqualTo(131072);
+ assertThat(builder1.blobFileStartingLevel()).isEqualTo(4);
assertThat(builder1.minBlobSize()).isEqualTo(minBlobSize);
assertThat(builder1.arenaBlockSize()).isEqualTo(42);
assertThat(builder1.memtablePrefixBloomSizeRatio()).isEqualTo(0.17);
{offsetof(struct MutableCFOptions, blob_compaction_readahead_size),
OptionType::kUInt64T, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
+ {"blob_file_starting_level",
+ {offsetof(struct MutableCFOptions, blob_file_starting_level),
+ OptionType::kInt, OptionVerificationType::kNormal,
+ OptionTypeFlags::kMutable}},
{"sample_for_compression",
{offsetof(struct MutableCFOptions, sample_for_compression),
OptionType::kUInt64T, OptionVerificationType::kNormal,
blob_garbage_collection_force_threshold);
ROCKS_LOG_INFO(log, " blob_compaction_readahead_size: %" PRIu64,
blob_compaction_readahead_size);
+ ROCKS_LOG_INFO(log, " blob_file_starting_level: %d",
+ blob_file_starting_level);
ROCKS_LOG_INFO(log, " bottommost_temperature: %d",
static_cast<int>(bottommost_temperature));
blob_garbage_collection_force_threshold(
options.blob_garbage_collection_force_threshold),
blob_compaction_readahead_size(options.blob_compaction_readahead_size),
+ blob_file_starting_level(options.blob_file_starting_level),
max_sequential_skip_in_iterations(
options.max_sequential_skip_in_iterations),
check_flush_compaction_key_order(
blob_garbage_collection_age_cutoff(0.0),
blob_garbage_collection_force_threshold(0.0),
blob_compaction_readahead_size(0),
+ blob_file_starting_level(0),
max_sequential_skip_in_iterations(0),
check_flush_compaction_key_order(true),
paranoid_file_checks(false),
double blob_garbage_collection_age_cutoff;
double blob_garbage_collection_force_threshold;
uint64_t blob_compaction_readahead_size;
+ int blob_file_starting_level;
// Misc options
uint64_t max_sequential_skip_in_iterations;
options.blob_garbage_collection_age_cutoff),
blob_garbage_collection_force_threshold(
options.blob_garbage_collection_force_threshold),
- blob_compaction_readahead_size(options.blob_compaction_readahead_size) {
+ blob_compaction_readahead_size(options.blob_compaction_readahead_size),
+ blob_file_starting_level(options.blob_file_starting_level) {
assert(memtable_factory.get() != nullptr);
if (max_bytes_for_level_multiplier_additional.size() <
static_cast<unsigned int>(num_levels)) {
ROCKS_LOG_HEADER(
log, " Options.blob_compaction_readahead_size: %" PRIu64,
blob_compaction_readahead_size);
+ ROCKS_LOG_HEADER(log, " Options.blob_file_starting_level: %d",
+ blob_file_starting_level);
} // ColumnFamilyOptions::Dump
void Options::Dump(Logger* log) const {
moptions.blob_garbage_collection_force_threshold;
cf_opts->blob_compaction_readahead_size =
moptions.blob_compaction_readahead_size;
+ cf_opts->blob_file_starting_level = moptions.blob_file_starting_level;
// Misc options
cf_opts->max_sequential_skip_in_iterations =
"blob_garbage_collection_age_cutoff=0.5;"
"blob_garbage_collection_force_threshold=0.75;"
"blob_compaction_readahead_size=262144;"
+ "blob_file_starting_level=1;"
"bottommost_temperature=kWarm;"
"compaction_options_fifo={max_table_files_size=3;allow_"
"compaction=false;age_for_warm=1;};",
{"blob_garbage_collection_age_cutoff", "0.5"},
{"blob_garbage_collection_force_threshold", "0.75"},
{"blob_compaction_readahead_size", "256K"},
+ {"blob_file_starting_level", "1"},
{"bottommost_temperature", "kWarm"},
};
ASSERT_EQ(new_cf_opt.blob_garbage_collection_age_cutoff, 0.5);
ASSERT_EQ(new_cf_opt.blob_garbage_collection_force_threshold, 0.75);
ASSERT_EQ(new_cf_opt.blob_compaction_readahead_size, 262144);
+ ASSERT_EQ(new_cf_opt.blob_file_starting_level, 1);
ASSERT_EQ(new_cf_opt.bottommost_temperature, Temperature::kWarm);
cf_options_map["write_buffer_size"] = "hello";
{"blob_garbage_collection_age_cutoff", "0.5"},
{"blob_garbage_collection_force_threshold", "0.75"},
{"blob_compaction_readahead_size", "256K"},
+ {"blob_file_starting_level", "1"},
{"bottommost_temperature", "kWarm"},
};
ASSERT_EQ(new_cf_opt.blob_garbage_collection_age_cutoff, 0.5);
ASSERT_EQ(new_cf_opt.blob_garbage_collection_force_threshold, 0.75);
ASSERT_EQ(new_cf_opt.blob_compaction_readahead_size, 262144);
+ ASSERT_EQ(new_cf_opt.blob_file_starting_level, 1);
ASSERT_EQ(new_cf_opt.bottommost_temperature, Temperature::kWarm);
cf_options_map["write_buffer_size"] = "hello";
.blob_compaction_readahead_size,
"[Integrated BlobDB] Compaction readahead for blob files.");
+DEFINE_int32(
+ blob_file_starting_level,
+ ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_starting_level,
+ "[Integrated BlobDB] The starting level for blob files.");
+
#ifndef ROCKSDB_LITE
// Secondary DB instance Options
FLAGS_blob_garbage_collection_force_threshold;
options.blob_compaction_readahead_size =
FLAGS_blob_compaction_readahead_size;
+ options.blob_file_starting_level = FLAGS_blob_file_starting_level;
#ifndef ROCKSDB_LITE
if (FLAGS_readonly && FLAGS_transaction_db) {
blob_garbage_collection_age_cutoff=0.5
blob_garbage_collection_force_threshold=0.75
blob_compaction_readahead_size=262144
+ blob_file_starting_level=0
[TableOptions/BlockBasedTable "default"]
format_version=0
"blob_garbage_collection_age_cutoff": lambda: random.choice([0.0, 0.25, 0.5, 0.75, 1.0]),
"blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]),
"blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]),
+ "blob_file_starting_level": lambda: random.choice([0] * 4 + [1] * 3 + [2] * 2 + [3]),
}
ts_params = {
"blob_garbage_collection_force_threshold";
const std::string LDBCommand::ARG_BLOB_COMPACTION_READAHEAD_SIZE =
"blob_compaction_readahead_size";
+const std::string LDBCommand::ARG_BLOB_FILE_STARTING_LEVEL =
+ "blob_file_starting_level";
const std::string LDBCommand::ARG_DECODE_BLOB_INDEX = "decode_blob_index";
const std::string LDBCommand::ARG_DUMP_UNCOMPRESSED_BLOBS =
"dump_uncompressed_blobs";
ARG_BLOB_GARBAGE_COLLECTION_AGE_CUTOFF,
ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD,
ARG_BLOB_COMPACTION_READAHEAD_SIZE,
+ ARG_BLOB_FILE_STARTING_LEVEL,
ARG_IGNORE_UNKNOWN_OPTIONS,
ARG_CF_NAME};
ret.insert(ret.end(), options.begin(), options.end());
}
}
+ int blob_file_starting_level;
+ if (ParseIntOption(option_map_, ARG_BLOB_FILE_STARTING_LEVEL,
+ blob_file_starting_level, exec_state_)) {
+ if (blob_file_starting_level >= 0) {
+ cf_opts->blob_file_starting_level = blob_file_starting_level;
+ } else {
+ exec_state_ = LDBCommandExecuteResult::Failed(
+ ARG_BLOB_FILE_STARTING_LEVEL + " must be >= 0.");
+ }
+ }
+
auto itr = option_map_.find(ARG_AUTO_COMPACTION);
if (itr != option_map_.end()) {
cf_opts->disable_auto_compactions = !StringToBool(itr->second);
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) >= 1)
+ def testBlobStartingLevel(self):
+ print("Running testBlobStartingLevel...")
+
+ dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
+ self.assertRunOK("put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1", "OK")
+ self.assertRunOK("get x1", "y1")
+
+ blob_files = self.getBlobFiles(dbPath)
+ self.assertTrue(len(blob_files) == 0)
+
+ self.assertRunOK("put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK")
+ self.assertRunOK("get x1", "y1")
+ self.assertRunOK("get x2", "y2")
+ self.assertRunFAIL("get x3")
+
+ blob_files = self.getBlobFiles(dbPath)
+ self.assertTrue(len(blob_files) >= 1)
+
def testCountDelimDump(self):
print("Running testCountDelimDump...")
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
echo -e "\tBLOB_GC_AGE_CUTOFF\t\tBlob garbage collection age cutoff (default: 0.25)"
echo -e "\tBLOB_GC_FORCE_THRESHOLD\t\tThreshold for forcing garbage collection of the oldest blob files (default: 1.0)"
echo -e "\tBLOB_COMPACTION_READAHEAD_SIZE\tBlob compaction readahead size (default: 0)"
+ echo -e "\tBLOB_FILE_STARTING_LEVEL\t\tBlob file starting level (default: 0)"
echo -e "\tTARGET_FILE_SIZE_BASE\t\tTarget SST file size for compactions (default: write buffer size, scaled down if blob files are enabled)"
echo -e "\tMAX_BYTES_FOR_LEVEL_BASE\tMaximum size for the base level (default: 8 * target SST file size)"
}
blob_garbage_collection_age_cutoff=${BLOB_GC_AGE_CUTOFF:-0.25}
blob_garbage_collection_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1.0}
blob_compaction_readahead_size=${BLOB_COMPACTION_READAHEAD_SIZE:-0}
+blob_file_starting_level=${BLOB_FILE_STARTING_LEVEL:-0}
if [ "$enable_blob_files" == "1" ]; then
target_file_size_base=${TARGET_FILE_SIZE_BASE:-$((32 * write_buffer_size / value_size))}
echo -e "Blob GC age cutoff:\t\t\t$blob_garbage_collection_age_cutoff"
echo -e "Blob GC force threshold:\t\t$blob_garbage_collection_force_threshold"
echo -e "Blob compaction readahead size:\t\t$blob_compaction_readahead_size"
+echo -e "Blob file starting level:\t\t$blob_file_starting_level"
echo -e "Target SST file size:\t\t\t$target_file_size_base"
echo -e "Maximum size of base level:\t\t$max_bytes_for_level_base"
echo "================================================================="
--min_blob_size=$min_blob_size \
--blob_file_size=$blob_file_size \
--blob_compression_type=$blob_compression_type \
+ --blob_file_starting_level=$blob_file_starting_level \
--write_buffer_size=$write_buffer_size \
--target_file_size_base=$target_file_size_base \
--max_bytes_for_level_base=$max_bytes_for_level_base"