From 7e4424a821f1f0162cc1f2bfddb1eb501e321702 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 6 Apr 2022 08:12:26 +0800 Subject: [PATCH] ceph-fuse: add dedicated snap stag map for each directory This will fix the fino colliding bug, which is caused when the snapid is later than 0xffff. From mds 'mds_max_snaps_per_dir' option, we can see that the max snapshots for each directory is 4_K, and in ceph-fuse we have around 64_K, which is from 0xffff - 2, stags could be used to make the fake fuse inode numbers for each directory. Fixes: https://tracker.ceph.com/issues/54653 Signed-off-by: Xiubo Li --- src/client/fuse_ll.cc | 123 ++++++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 41 deletions(-) diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc index f878636a923a7..b725a20e1af7f 100644 --- a/src/client/fuse_ll.cc +++ b/src/client/fuse_ll.cc @@ -52,6 +52,8 @@ #define FINO_STAG(x) ((x) >> 48) #define MAKE_FINO(i,s) ((i) | ((int64_t)(s) << 48)) #define STAG_MASK 0xffff +#define G_NOSNAP_STAG 0 // for all CEPH_NOSNAP +#define G_SNAPDIR_STAG 1 // for all CEPH_SNAPDIR #define MINORBITS 20 #define MINORMASK ((1U << MINORBITS) - 1) @@ -68,6 +70,18 @@ #define _CEPH_CLIENT_ID "ceph.client_id" #endif +/* + * The dedicated struct for snapid <-> stag map for each ceph + * inode, and the stag is a number in range [2, 0xffff], and + * the stag number 0 is reserved for CEPH_NOSNAP and 1 is + * reserved for CEPH_SNAPDIR. + */ +struct ceph_fuse_fake_inode_stag { + ceph::unordered_map snap_stag_map; // + ceph::unordered_map stag_snap_map; // + int last_stag = 1; +}; + using namespace std; static const ceph::unordered_map cephfs_errno_to_system_errno = { @@ -166,10 +180,9 @@ public: #endif ceph::mutex stag_lock = ceph::make_mutex("fuse_ll.cc stag_lock"); - int last_stag; - ceph::unordered_map snap_stag_map; - ceph::unordered_map stag_snap_map; + // a map of + ceph::unordered_map g_fino_maps; pthread_key_t fuse_req_key = 0; void set_fuse_req(fuse_req_t); @@ -1361,18 +1374,9 @@ CephFuse::Handle::Handle(Client *c, int fd) : se(NULL), #if FUSE_VERSION < FUSE_MAKE_VERSION(3, 0) ch(NULL), - mountpoint(NULL), + mountpoint(NULL) #endif - last_stag(0) { - /* - * stag 0 and 1 are always reserved for the - * inodes of CEPH_NOSNAP and CEPH_SNAPDIR - */ - snap_stag_map[CEPH_NOSNAP] = 0; - snap_stag_map[CEPH_SNAPDIR] = 1; - stag_snap_map[0] = CEPH_NOSNAP; - stag_snap_map[1] = CEPH_SNAPDIR; memset(&args, 0, sizeof(args)); #if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) memset(&opts, 0, sizeof(opts)); @@ -1632,9 +1636,25 @@ uint64_t CephFuse::Handle::fino_snap(uint64_t fino) } else { std::lock_guard l(stag_lock); uint64_t stag = FINO_STAG(fino); - if (!stag_snap_map.count(stag)) + if (stag == 0) + return CEPH_NOSNAP; + else if (stag == 1) + return CEPH_SNAPDIR; + + inodeno_t ino = FINO_INO(fino); + + // does the fino_maps for the ino exist ? + if (!g_fino_maps.count(ino)) return CEPH_MAXSNAP; - return stag_snap_map[stag]; + + auto &fino_maps = g_fino_maps[ino]; + + // does the stagid <--> snapid map exist ? + if (!fino_maps.stag_snap_map.count(stag)) + return CEPH_MAXSNAP; + + // get the snapid + return fino_maps.stag_snap_map[stag]; } } @@ -1656,7 +1676,7 @@ Inode * CephFuse::Handle::iget(fuse_ino_t fino) void CephFuse::Handle::iput(Inode *in) { - client->ll_put(in); + client->ll_put(in); } uint64_t CephFuse::Handle::make_fake_ino(inodeno_t ino, snapid_t snapid) @@ -1671,36 +1691,57 @@ uint64_t CephFuse::Handle::make_fake_ino(inodeno_t ino, snapid_t snapid) if (snapid == CEPH_NOSNAP && ino == client->get_root_ino()) return FUSE_ROOT_ID; - std::lock_guard l(stag_lock); - auto p = snap_stag_map.find(snapid); - if (p != snap_stag_map.end()) { - inodeno_t fino = MAKE_FINO(ino, p->second); - return fino; - } + int stag; + if (snapid == CEPH_NOSNAP) { + stag = G_NOSNAP_STAG; + } else if (snapid == CEPH_SNAPDIR) { + stag = G_SNAPDIR_STAG; + } else { + std::lock_guard l(stag_lock); + auto &fino_maps = g_fino_maps[ino]; // will insert it anyway if not exists - int first = last_stag & STAG_MASK; - int stag = (++last_stag) & STAG_MASK; - for (; stag != first; stag = (++last_stag) & STAG_MASK) { - // two reserved stags: 0 for CEPH_NOSNAP and 1 for CEPH_SNAPDIR - if (stag == 0 || stag == 1) - continue; - - auto p = stag_snap_map.find(stag); - if (p == stag_snap_map.end()) { - snap_stag_map[snapid] = stag; - stag_snap_map[stag] = snapid; - break; + // already exist ? + if (fino_maps.snap_stag_map.count(snapid)) { + inodeno_t fino = MAKE_FINO(ino, fino_maps.snap_stag_map[snapid]); + return fino; } - if (!client->ll_get_snap_ref(p->second)) { - snap_stag_map.erase(p->second); - snap_stag_map[snapid] = stag; - p->second = snapid; - break; + // create a new snapid <--> stagid map + int first = fino_maps.last_stag & STAG_MASK; + stag = (++fino_maps.last_stag) & STAG_MASK; + for (; stag != first; stag = (++fino_maps.last_stag) & STAG_MASK) { + // stag 0 is reserved for CEPH_NOSNAP and 1 for CEPH_SNAPDIR + if (stag == 0 || stag == 1) + continue; + + // the new stag is not used ? + if (!fino_maps.stag_snap_map.count(stag)) { + fino_maps.snap_stag_map[snapid] = stag; + fino_maps.stag_snap_map[stag] = snapid; + break; + } + + // the stag is already used by a snpaid, + // try to free it + auto _snapid = fino_maps.stag_snap_map[stag]; + if (!client->ll_get_snap_ref(_snapid)) { + fino_maps.snap_stag_map.erase(_snapid); + fino_maps.snap_stag_map[snapid] = stag; + fino_maps.stag_snap_map[stag] = snapid; + break; + } + } + if (stag == first) { + /* + * It shouldn't be here because the max snapshots for each + * directory is 4_K, and here we have around 64_K, which is + * from 0xffff - 2, stags could be used for each directory. + * + * More detail please see mds 'mds_max_snaps_per_dir' option. + */ + ceph_abort_msg("run out of stag"); } } - if (stag == first) - ceph_abort_msg("run out of stag"); inodeno_t fino = MAKE_FINO(ino, stag); //cout << "make_fake_ino " << ino << "." << snapid << " -> " << fino << std::endl; -- 2.39.5