From 747c85320396e5d8aa1ebdb880888fe1b4494404 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 20 Apr 2018 17:23:34 -0700 Subject: [PATCH] Avoid directory renames in BackupEngine Summary: We used to name private directories like "1.tmp" while BackupEngine populated them, and then rename without the ".tmp" suffix (i.e., rename "1.tmp" to "1") after all files were copied. On glusterfs, directory renames like this require operations across many hosts, and partial failures have caused operational problems. Fortunately we don't need to rename private directories. We already have a meta-file that uses the tempfile-rename pattern to commit a backup atomically after all its files have been successfully copied. So we can copy private files directly to their final location, so now there's no directory rename. Closes https://github.com/facebook/rocksdb/pull/3749 Differential Revision: D7705610 Pulled By: ajkr fbshipit-source-id: fd724a28dd2bf993ce323a5f2cb7e7d6980cc346 --- utilities/backupable/backupable_db.cc | 78 +++++++++++----------- utilities/backupable/backupable_db_test.cc | 15 ++--- 2 files changed, 47 insertions(+), 46 deletions(-) diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index bccb7efd2..7198c1072 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -747,6 +747,19 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( BackupID new_backup_id = latest_backup_id_ + 1; assert(backups_.find(new_backup_id) == backups_.end()); + + auto private_dir = GetAbsolutePath(GetPrivateFileRel(new_backup_id)); + Status s = backup_env_->FileExists(private_dir); + if (s.ok()) { + // maybe last backup failed and left partial state behind, clean it up. + // need to do this before updating backups_ such that a private dir + // named after new_backup_id will be cleaned up + s = GarbageCollect(); + } else if (s.IsNotFound()) { + // normal case, the new backup's private dir doesn't exist yet + s = Status::OK(); + } + auto ret = backups_.insert(std::make_pair( new_backup_id, unique_ptr(new BackupMeta( GetBackupMetaFile(new_backup_id, false /* tmp */), @@ -757,23 +770,13 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( new_backup->RecordTimestamp(); new_backup->SetAppMetadata(app_metadata); - auto start_backup = backup_env_-> NowMicros(); + auto start_backup = backup_env_->NowMicros(); ROCKS_LOG_INFO(options_.info_log, "Started the backup process -- creating backup %u", new_backup_id); - - auto private_tmp_dir = GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)); - Status s = backup_env_->FileExists(private_tmp_dir); if (s.ok()) { - // maybe last backup failed and left partial state behind, clean it up - s = GarbageCollect(); - } else if (s.IsNotFound()) { - // normal case, the new backup's private dir doesn't exist yet - s = Status::OK(); - } - if (s.ok()) { - s = backup_env_->CreateDir(private_tmp_dir); + s = backup_env_->CreateDir(private_dir); } RateLimiter* rate_limiter = options_.backup_rate_limiter.get(); @@ -859,18 +862,6 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( // we copied all the files, enable file deletions db->EnableFileDeletions(false); - if (s.ok()) { - // move tmp private backup to real backup folder - ROCKS_LOG_INFO( - options_.info_log, - "Moving tmp backup directory to the real one: %s -> %s\n", - GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)).c_str(), - GetAbsolutePath(GetPrivateFileRel(new_backup_id, false)).c_str()); - s = backup_env_->RenameFile( - GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)), // tmp - GetAbsolutePath(GetPrivateFileRel(new_backup_id, false))); - } - auto backup_time = backup_env_->NowMicros() - start_backup; if (s.ok()) { @@ -1307,22 +1298,33 @@ Status BackupEngineImpl::AddBackupFileWorkItem( dst_relative_tmp = GetSharedFileRel(dst_relative, true); dst_relative = GetSharedFileRel(dst_relative, false); } else { - dst_relative_tmp = GetPrivateFileRel(backup_id, true, dst_relative); dst_relative = GetPrivateFileRel(backup_id, false, dst_relative); } - std::string dst_path = GetAbsolutePath(dst_relative); - std::string dst_path_tmp = GetAbsolutePath(dst_relative_tmp); + + // We copy into `temp_dest_path` and, once finished, rename it to + // `final_dest_path`. This allows files to atomically appear at + // `final_dest_path`. We can copy directly to the final path when atomicity + // is unnecessary, like for files in private backup directories. + const std::string* copy_dest_path; + std::string temp_dest_path; + std::string final_dest_path = GetAbsolutePath(dst_relative); + if (!dst_relative_tmp.empty()) { + temp_dest_path = GetAbsolutePath(dst_relative_tmp); + copy_dest_path = &temp_dest_path; + } else { + copy_dest_path = &final_dest_path; + } // if it's shared, we also need to check if it exists -- if it does, no need // to copy it again. bool need_to_copy = true; - // true if dst_path is the same path as another live file + // true if final_dest_path is the same path as another live file const bool same_path = - live_dst_paths.find(dst_path) != live_dst_paths.end(); + live_dst_paths.find(final_dest_path) != live_dst_paths.end(); bool file_exists = false; if (shared && !same_path) { - Status exist = backup_env_->FileExists(dst_path); + Status exist = backup_env_->FileExists(final_dest_path); if (exist.ok()) { file_exists = true; } else if (exist.IsNotFound()) { @@ -1351,7 +1353,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem( "overwrite the file.", fname.c_str()); need_to_copy = true; - backup_env_->DeleteFile(dst_path); + backup_env_->DeleteFile(final_dest_path); } else { // the file is present and referenced by a backup ROCKS_LOG_INFO(options_.info_log, @@ -1360,25 +1362,25 @@ Status BackupEngineImpl::AddBackupFileWorkItem( &checksum_value); } } - live_dst_paths.insert(dst_path); + live_dst_paths.insert(final_dest_path); if (!contents.empty() || need_to_copy) { ROCKS_LOG_INFO(options_.info_log, "Copying %s to %s", fname.c_str(), - dst_path_tmp.c_str()); + copy_dest_path->c_str()); CopyOrCreateWorkItem copy_or_create_work_item( - src_dir.empty() ? "" : src_dir + fname, dst_path_tmp, contents, db_env_, - backup_env_, options_.sync, rate_limiter, size_limit, + src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents, + db_env_, backup_env_, options_.sync, rate_limiter, size_limit, progress_callback); BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item( copy_or_create_work_item.result.get_future(), shared, need_to_copy, - backup_env_, dst_path_tmp, dst_path, dst_relative); + backup_env_, temp_dest_path, final_dest_path, dst_relative); files_to_copy_or_create_.write(std::move(copy_or_create_work_item)); backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item)); } else { std::promise promise_result; BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item( promise_result.get_future(), shared, need_to_copy, backup_env_, - dst_path_tmp, dst_path, dst_relative); + temp_dest_path, final_dest_path, dst_relative); backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item)); CopyOrCreateResult result; result.status = s; @@ -1533,7 +1535,7 @@ Status BackupEngineImpl::GarbageCollect() { } // here we have to delete the dir and all its children std::string full_private_path = - GetAbsolutePath(GetPrivateFileRel(backup_id, tmp_dir)); + GetAbsolutePath(GetPrivateFileRel(backup_id)); std::vector subchildren; backup_env_->GetChildren(full_private_path, &subchildren); for (auto& subchild : subchildren) { diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index f2657be4a..5f252dcd2 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -811,9 +811,8 @@ TEST_F(BackupableDBTest, NoDoubleCopy) { test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); std::vector should_have_written = { - "/shared/.00010.sst.tmp", "/shared/.00011.sst.tmp", - "/private/1.tmp/CURRENT", "/private/1.tmp/MANIFEST-01", - "/private/1.tmp/00011.log", "/meta/.1.tmp"}; + "/shared/.00010.sst.tmp", "/shared/.00011.sst.tmp", "/private/1/CURRENT", + "/private/1/MANIFEST-01", "/private/1/00011.log", "/meta/.1.tmp"}; AppendPath(backupdir_, should_have_written); test_backup_env_->AssertWrittenFiles(should_have_written); @@ -829,9 +828,9 @@ TEST_F(BackupableDBTest, NoDoubleCopy) { ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); // should not open 00010.sst - it's already there - should_have_written = {"/shared/.00015.sst.tmp", "/private/2.tmp/CURRENT", - "/private/2.tmp/MANIFEST-01", - "/private/2.tmp/00011.log", "/meta/.2.tmp"}; + should_have_written = {"/shared/.00015.sst.tmp", "/private/2/CURRENT", + "/private/2/MANIFEST-01", "/private/2/00011.log", + "/meta/.2.tmp"}; AppendPath(backupdir_, should_have_written); test_backup_env_->AssertWrittenFiles(should_have_written); @@ -976,7 +975,7 @@ TEST_F(BackupableDBTest, InterruptCreationTest) { backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)).ok()); CloseDBAndBackupEngine(); // should also fail cleanup so the tmp directory stays behind - ASSERT_OK(backup_chroot_env_->FileExists(backupdir_ + "/private/1.tmp/")); + ASSERT_OK(backup_chroot_env_->FileExists(backupdir_ + "/private/1/")); OpenDBAndBackupEngine(false /* destroy_old_data */); test_backup_env_->SetLimitWrittenFiles(1000000); @@ -1171,7 +1170,7 @@ TEST_F(BackupableDBTest, DeleteTmpFiles) { shared_tmp += "/shared"; } shared_tmp += "/.00006.sst.tmp"; - std::string private_tmp_dir = backupdir_ + "/private/10.tmp"; + std::string private_tmp_dir = backupdir_ + "/private/10"; std::string private_tmp_file = private_tmp_dir + "/00003.sst"; file_manager_->WriteToFile(shared_tmp, "tmp"); file_manager_->CreateDir(private_tmp_dir); -- 2.47.3