#ifndef ROCKSDB_LITE
-#include "rocksdb/utilities/backupable_db.h"
-#include "file/filename.h"
-#include "logging/logging.h"
-#include "port/port.h"
-#include "rocksdb/rate_limiter.h"
-#include "rocksdb/transaction_log.h"
-#include "test_util/sync_point.h"
-#include "util/channel.h"
-#include "util/coding.h"
-#include "util/crc32c.h"
-#include "util/file_reader_writer.h"
-#include "util/string_util.h"
-#include "utilities/checkpoint/checkpoint_impl.h"
-
-#include <cinttypes>
#include <stdlib.h>
#include <algorithm>
#include <atomic>
+#include <cinttypes>
#include <functional>
#include <future>
#include <limits>
#include <unordered_set>
#include <vector>
+#include "file/filename.h"
+#include "logging/logging.h"
+#include "port/port.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/transaction_log.h"
+#include "rocksdb/utilities/backupable_db.h"
+#include "test_util/sync_point.h"
+#include "util/channel.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/file_reader_writer.h"
+#include "util/string_util.h"
+#include "utilities/checkpoint/checkpoint_impl.h"
+
namespace rocksdb {
void BackupStatistics::IncrementNumberSuccessBackup() {
private:
void DeleteChildren(const std::string& dir, uint32_t file_type_filter = 0);
+ Status DeleteBackupInternal(BackupID backup_id);
// Extends the "result" map with pathname->size mappings for the contents of
// "dir" in "env". Pathnames are prefixed with "dir".
std::mutex byte_report_mutex_;
channel<CopyOrCreateWorkItem> files_to_copy_or_create_;
std::vector<port::Thread> threads_;
+ // Certain operations like PurgeOldBackups and DeleteBackup will trigger
+ // automatic GarbageCollect (true) unless we've already done one in this
+ // session and have not failed to delete backup files since then (false).
+ bool might_need_garbage_collect_ = true;
// Adds a file to the backup work queue to be copied or created if it doesn't
// already exist.
options_.Dump(options_.info_log);
if (!read_only_) {
+ // we might need to clean up from previous crash or I/O errors
+ might_need_garbage_collect_ = true;
+
// gather the list of directories that we need to create
std::vector<std::pair<std::string, std::unique_ptr<Directory>*>>
directories;
ROCKS_LOG_INFO(options_.info_log, "Backup Statistics %s\n",
backup_statistics_.ToString().c_str());
// delete files that we might have already written
+ might_need_garbage_collect_ = true;
DeleteBackup(new_backup_id);
- GarbageCollect();
return s;
}
Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
assert(initialized_);
assert(!read_only_);
+
+ // Best effort deletion even with errors
+ Status overall_status = Status::OK();
+
ROCKS_LOG_INFO(options_.info_log, "Purging old backups, keeping %u",
num_backups_to_keep);
std::vector<BackupID> to_delete;
itr++;
}
for (auto backup_id : to_delete) {
- auto s = DeleteBackup(backup_id);
+ auto s = DeleteBackupInternal(backup_id);
if (!s.ok()) {
- return s;
+ overall_status = s;
}
}
- return Status::OK();
+ // Clean up after any incomplete backup deletion, potentially from
+ // earlier session.
+ if (might_need_garbage_collect_) {
+ auto s = GarbageCollect();
+ if (!s.ok() && overall_status.ok()) {
+ overall_status = s;
+ }
+ }
+ return overall_status;
}
Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
+ auto s1 = DeleteBackupInternal(backup_id);
+ auto s2 = Status::OK();
+
+ // Clean up after any incomplete backup deletion, potentially from
+ // earlier session.
+ if (might_need_garbage_collect_) {
+ s2 = GarbageCollect();
+ }
+
+ if (!s1.ok()) {
+ return s1;
+ } else {
+ return s2;
+ }
+}
+
+// Does not auto-GarbageCollect
+Status BackupEngineImpl::DeleteBackupInternal(BackupID backup_id) {
assert(initialized_);
assert(!read_only_);
+
ROCKS_LOG_INFO(options_.info_log, "Deleting backup %u", backup_id);
auto backup = backups_.find(backup_id);
if (backup != backups_.end()) {
corrupt_backups_.erase(corrupt);
}
+ // After removing meta file, best effort deletion even with errors.
+ // (Don't delete other files if we can't delete the meta file right
+ // now.)
+
if (options_.max_valid_backups_to_open == port::kMaxInt32) {
std::vector<std::string> to_delete;
for (auto& itr : backuped_file_infos_) {
ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
itr.first.c_str(), s.ToString().c_str());
to_delete.push_back(itr.first);
+ if (!s.ok()) {
+ // Trying again later might work
+ might_need_garbage_collect_ = true;
+ }
}
}
for (auto& td : to_delete) {
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
ROCKS_LOG_INFO(options_.info_log, "Deleting private dir %s -- %s",
private_dir.c_str(), s.ToString().c_str());
+ if (!s.ok()) {
+ // Full gc or trying again later might work
+ might_need_garbage_collect_ = true;
+ }
return Status::OK();
}
Status BackupEngineImpl::GarbageCollect() {
assert(!read_only_);
+
+ // We will make a best effort to remove all garbage even in the presence
+ // of inconsistencies or I/O failures that inhibit finding garbage.
+ Status overall_status = Status::OK();
+ // If all goes well, we don't need another auto-GC this session
+ might_need_garbage_collect_ = false;
+
ROCKS_LOG_INFO(options_.info_log, "Starting garbage collection");
- if (options_.max_valid_backups_to_open == port::kMaxInt32) {
+ if (options_.max_valid_backups_to_open != port::kMaxInt32) {
ROCKS_LOG_WARN(
options_.info_log,
"Garbage collection is limited since `max_valid_backups_to_open` "
s = Status::OK();
}
if (!s.ok()) {
- return s;
+ overall_status = s;
+ // Trying again later might work
+ might_need_garbage_collect_ = true;
}
}
for (auto& child : shared_children) {
ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
rel_fname.c_str(), s.ToString().c_str());
backuped_file_infos_.erase(rel_fname);
+ if (!s.ok()) {
+ // Trying again later might work
+ might_need_garbage_collect_ = true;
+ }
}
}
}
auto s = backup_env_->GetChildren(GetAbsolutePath(GetPrivateDirRel()),
&private_children);
if (!s.ok()) {
- return s;
+ overall_status = s;
+ // Trying again later might work
+ might_need_garbage_collect_ = true;
}
}
for (auto& child : private_children) {
ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
(full_private_path + subchild).c_str(),
s.ToString().c_str());
+ if (!s.ok()) {
+ // Trying again later might work
+ might_need_garbage_collect_ = true;
+ }
}
// finally delete the private dir
Status s = backup_env_->DeleteDir(full_private_path);
ROCKS_LOG_INFO(options_.info_log, "Deleting dir %s -- %s",
full_private_path.c_str(), s.ToString().c_str());
+ if (!s.ok()) {
+ // Trying again later might work
+ might_need_garbage_collect_ = true;
+ }
}
- return Status::OK();
+ assert(overall_status.ok() || might_need_garbage_collect_);
+ return overall_status;
}
// ------- BackupMeta class --------
::testing::Bool());
// this will make sure that backup does not copy the same file twice
-TEST_F(BackupableDBTest, NoDoubleCopy) {
+TEST_F(BackupableDBTest, NoDoubleCopy_And_AutoGC) {
OpenDBAndBackupEngine(true, true);
// should write 5 DB files + one meta file
AppendPath(backupdir_, should_have_written);
test_backup_env_->AssertWrittenFiles(should_have_written);
- // should write 4 new DB files + one meta file
- // should not write/copy 00010.sst, since it's already there!
- test_backup_env_->SetLimitWrittenFiles(6);
- test_backup_env_->ClearWrittenFiles();
+ char db_number = '1';
- dummy_db_->live_files_ = {"/00010.sst", "/00015.sst", "/CURRENT",
- "/MANIFEST-01"};
- dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}};
- test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_);
- ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
- // should not open 00010.sst - it's already there
+ for (std::string other_sst : {"00015.sst", "00017.sst", "00019.sst"}) {
+ // should write 4 new DB files + one meta file
+ // should not write/copy 00010.sst, since it's already there!
+ test_backup_env_->SetLimitWrittenFiles(6);
+ test_backup_env_->ClearWrittenFiles();
- should_have_written = {"/shared/.00015.sst.tmp", "/private/2/CURRENT",
- "/private/2/MANIFEST-01", "/private/2/00011.log",
- "/meta/.2.tmp"};
- AppendPath(backupdir_, should_have_written);
- test_backup_env_->AssertWrittenFiles(should_have_written);
+ dummy_db_->live_files_ = {"/00010.sst", "/" + other_sst, "/CURRENT",
+ "/MANIFEST-01"};
+ dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}};
+ test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_);
+ ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
+ // should not open 00010.sst - it's already there
+
+ ++db_number;
+ std::string private_dir = std::string("/private/") + db_number;
+ should_have_written = {
+ "/shared/." + other_sst + ".tmp", private_dir + "/CURRENT",
+ private_dir + "/MANIFEST-01", private_dir + "/00011.log",
+ std::string("/meta/.") + db_number + ".tmp"};
+ AppendPath(backupdir_, should_have_written);
+ test_backup_env_->AssertWrittenFiles(should_have_written);
+ }
ASSERT_OK(backup_engine_->DeleteBackup(1));
ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00010.sst"));
test_backup_env_->GetFileSize(backupdir_ + "/shared/00015.sst", &size);
ASSERT_EQ(200UL, size);
+ CloseBackupEngine();
+
+ //
+ // Now simulate incomplete delete by removing just meta
+ //
+ ASSERT_OK(test_backup_env_->DeleteFile(backupdir_ + "/meta/2"));
+
+ OpenBackupEngine();
+
+ // 1 appears to be removed, so
+ // 2 non-corrupt and 0 corrupt seen
+ std::vector<BackupInfo> backup_info;
+ std::vector<BackupID> corrupt_backup_ids;
+ backup_engine_->GetBackupInfo(&backup_info);
+ backup_engine_->GetCorruptedBackups(&corrupt_backup_ids);
+ ASSERT_EQ(2UL, backup_info.size());
+ ASSERT_EQ(0UL, corrupt_backup_ids.size());
+
+ // Keep the two we see, but this should suffice to purge unreferenced
+ // shared files from incomplete delete.
+ ASSERT_OK(backup_engine_->PurgeOldBackups(2));
+
+ // Make sure dangling sst file has been removed (somewhere along this
+ // process). GarbageCollect should not be needed.
+ ASSERT_EQ(Status::NotFound(),
+ test_backup_env_->FileExists(backupdir_ + "/shared/00015.sst"));
+ ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00017.sst"));
+ ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00019.sst"));
+
+ // Now actually purge a good one
+ ASSERT_OK(backup_engine_->PurgeOldBackups(1));
+
+ ASSERT_EQ(Status::NotFound(),
+ test_backup_env_->FileExists(backupdir_ + "/shared/00017.sst"));
+ ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00019.sst"));
+
CloseDBAndBackupEngine();
}
ASSERT_OK(backup_engine_->DeleteBackup(4));
ASSERT_OK(backup_engine_->DeleteBackup(3));
ASSERT_OK(backup_engine_->DeleteBackup(2));
- (void)backup_engine_->GarbageCollect();
+ // Should not be needed anymore with auto-GC on DeleteBackup
+ //(void)backup_engine_->GarbageCollect();
ASSERT_EQ(Status::NotFound(),
file_manager_->FileExists(backupdir_ + "/meta/5"));
ASSERT_EQ(Status::NotFound(),
}
TEST_F(BackupableDBTest, DeleteTmpFiles) {
- for (bool shared_checksum : {false, true}) {
- if (shared_checksum) {
+ for (int cleanup_fn : {1, 2, 3}) {
+ for (bool shared_checksum : {false, true}) {
OpenDBAndBackupEngineShareWithChecksum(
false /* destroy_old_data */, false /* dummy */,
- true /* share_table_files */, true /* share_with_checksums */);
- } else {
- OpenDBAndBackupEngine();
- }
- CloseDBAndBackupEngine();
- std::string shared_tmp = backupdir_;
- if (shared_checksum) {
- shared_tmp += "/shared_checksum";
- } else {
- shared_tmp += "/shared";
- }
- shared_tmp += "/.00006.sst.tmp";
- std::string private_tmp_dir = backupdir_ + "/private/10";
- std::string private_tmp_file = private_tmp_dir + "/00003.sst";
- file_manager_->WriteToFile(shared_tmp, "tmp");
- file_manager_->CreateDir(private_tmp_dir);
- file_manager_->WriteToFile(private_tmp_file, "tmp");
- ASSERT_OK(file_manager_->FileExists(private_tmp_dir));
- if (shared_checksum) {
- OpenDBAndBackupEngineShareWithChecksum(
- false /* destroy_old_data */, false /* dummy */,
- true /* share_table_files */, true /* share_with_checksums */);
- } else {
- OpenDBAndBackupEngine();
+ true /* share_table_files */, shared_checksum);
+ CloseDBAndBackupEngine();
+ std::string shared_tmp = backupdir_;
+ if (shared_checksum) {
+ shared_tmp += "/shared_checksum";
+ } else {
+ shared_tmp += "/shared";
+ }
+ shared_tmp += "/.00006.sst.tmp";
+ std::string private_tmp_dir = backupdir_ + "/private/10";
+ std::string private_tmp_file = private_tmp_dir + "/00003.sst";
+ file_manager_->WriteToFile(shared_tmp, "tmp");
+ file_manager_->CreateDir(private_tmp_dir);
+ file_manager_->WriteToFile(private_tmp_file, "tmp");
+ ASSERT_OK(file_manager_->FileExists(private_tmp_dir));
+ if (shared_checksum) {
+ OpenDBAndBackupEngineShareWithChecksum(
+ false /* destroy_old_data */, false /* dummy */,
+ true /* share_table_files */, true /* share_with_checksums */);
+ } else {
+ OpenDBAndBackupEngine();
+ }
+ // Need to call one of these explicitly to delete tmp files
+ switch (cleanup_fn) {
+ case 1:
+ (void)backup_engine_->GarbageCollect();
+ break;
+ case 2:
+ (void)backup_engine_->DeleteBackup(1);
+ break;
+ case 3:
+ (void)backup_engine_->PurgeOldBackups(1);
+ break;
+ default:
+ assert(false);
+ }
+ CloseDBAndBackupEngine();
+ ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(shared_tmp));
+ ASSERT_EQ(Status::NotFound(),
+ file_manager_->FileExists(private_tmp_file));
+ ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(private_tmp_dir));
}
- // Need to call this explicitly to delete tmp files
- (void)backup_engine_->GarbageCollect();
- CloseDBAndBackupEngine();
- ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(shared_tmp));
- ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(private_tmp_file));
- ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(private_tmp_dir));
}
}