From: Patrick Donnelly Date: Thu, 10 Nov 2022 13:22:35 +0000 (-0500) Subject: mds: revert standby-replay trimming changes X-Git-Tag: v19.0.0~29^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=589e59af11e40164695ca13f4ce4f2bc140b18b8;p=ceph-ci.git mds: revert standby-replay trimming changes Revert "mds: do not trim the inodes from the lru list in standby_replay" Revert "mds: trim cache during standby replay" This reverts commit 79bb44c1b9f1715378a9550a81984e949e454ff4. This reverts commit c0fe25bb2a87856c1281eddcb4da2efe0d7fbf75. standby-replay daemons were changed to keep minimal metadata from the journal in cache but the original intent of standby-replay was to have a cache that's as warm as the rank itself. This reverts the two commits which changed that behavior. Part of these reason for this is that the new rapid cache trimming behavior was not correct at all. The trimming loop would break when it runs into a dentry with non-null linkage. This would nearly always be the case. It was thought that this was a problem introduced by [2] as MDCache::standby_trim_segment has a different trim check [4] but the original issue (tracker 48673) is as old as [1], indicating the problem predates [2]. So, this commit reverts all of that. I have lingering suspicions that the standby-replay daemon is not pinning some dentries properly which causes [5] but this did not show up unless the MDS was rapidly evicting some dentries. More research needs done there. [1] c0fe25bb2a87856c1281eddcb4da2efe0d7fbf75 [2] 79bb44c1b9f1715378a9550a81984e949e454ff4 [3] https://github.com/ceph/ceph/blob/84fba097049ec4f72549588eaacc64f30c7a88a8/src/mds/MDCache.cc#L6816-L6820 [4] https://github.com/ceph/ceph/blob/84fba097049ec4f72549588eaacc64f30c7a88a8/src/mds/MDCache.cc#L7476-L7481 [5] https://tracker.ceph.com/issues/50246 Fixes: https://tracker.ceph.com/issues/48673 Signed-off-by: Patrick Donnelly --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 59a8f0739e2..d44ea8187c1 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -6821,19 +6821,17 @@ std::pair MDCache::trim_lru(uint64_t count, expiremap& expiremap << dendl; // trim dentries from the LRU until count is reached - // if mds is in standby_replay and skip trimming the inodes - while (!throttled && (cache_toofull() || count > 0 || is_standby_replay)) { + while (!throttled && (cache_toofull() || count > 0)) { throttled |= trim_counter_start+trimmed >= trim_threshold; if (throttled) break; CDentry *dn = static_cast(lru.lru_expire()); if (!dn) { break; } - if (is_standby_replay && dn->get_linkage()->inode) { - // we move the inodes that need to be trimmed to the end of the lru queue. - // refer to MDCache::standby_trim_segment - lru.lru_insert_bot(dn); - break; + if ((is_standby_replay && dn->get_linkage()->inode && + dn->get_linkage()->inode->item_open_file.is_on_list())) { + dout(20) << "unexpirable: " << *dn << dendl; + unexpirables.push_back(dn); } else if (trim_dentry(dn, expiremap)) { unexpirables.push_back(dn); } else { @@ -7479,69 +7477,42 @@ void MDCache::try_trim_non_auth_subtree(CDir *dir) void MDCache::standby_trim_segment(LogSegment *ls) { - auto try_trim_inode = [this](CInode *in) { - if (in->get_num_ref() == 0 && - !in->item_open_file.is_on_list() && - in->parent != NULL && - in->parent->get_num_ref() == 0){ - touch_dentry_bottom(in->parent); - } - }; - - auto try_trim_dentry = [this](CDentry *dn) { - if (dn->get_num_ref() > 0) - return; - auto in = dn->get_linkage()->inode; - if(in && in->item_open_file.is_on_list()) - return; - touch_dentry_bottom(dn); - }; - ls->new_dirfrags.clear_list(); ls->open_files.clear_list(); while (!ls->dirty_dirfrags.empty()) { CDir *dir = ls->dirty_dirfrags.front(); dir->mark_clean(); - if (dir->inode) - try_trim_inode(dir->inode); } while (!ls->dirty_inodes.empty()) { CInode *in = ls->dirty_inodes.front(); in->mark_clean(); - try_trim_inode(in); } while (!ls->dirty_dentries.empty()) { CDentry *dn = ls->dirty_dentries.front(); dn->mark_clean(); - try_trim_dentry(dn); } while (!ls->dirty_parent_inodes.empty()) { CInode *in = ls->dirty_parent_inodes.front(); in->clear_dirty_parent(); - try_trim_inode(in); } while (!ls->dirty_dirfrag_dir.empty()) { CInode *in = ls->dirty_dirfrag_dir.front(); in->filelock.remove_dirty(); - try_trim_inode(in); } while (!ls->dirty_dirfrag_nest.empty()) { CInode *in = ls->dirty_dirfrag_nest.front(); in->nestlock.remove_dirty(); - try_trim_inode(in); } while (!ls->dirty_dirfrag_dirfragtree.empty()) { CInode *in = ls->dirty_dirfrag_dirfragtree.front(); in->dirfragtreelock.remove_dirty(); - try_trim_inode(in); } while (!ls->truncating_inodes.empty()) { auto it = ls->truncating_inodes.begin(); CInode *in = *it; ls->truncating_inodes.erase(it); in->put(CInode::PIN_TRUNCATING); - try_trim_inode(in); } }