From: Yan, Zheng Date: Wed, 21 Nov 2018 12:22:25 +0000 (+0800) Subject: mds: track if client has writeable range in Capability X-Git-Tag: v14.1.0~293^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=370ae1cb3e5dc07867d80e998082bc514e8fccfd;p=ceph.git mds: track if client has writeable range in Capability Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/Capability.h b/src/mds/Capability.h index cf6abb343edf..44c8f98ce9a2 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -110,11 +110,10 @@ public: static void generate_test_instances(list& ls); }; - const static unsigned STATE_NEW = (1<<1); const static unsigned STATE_IMPORTING = (1<<2); const static unsigned STATE_NEEDSNAPFLUSH = (1<<3); - + const static unsigned STATE_CLIENTWRITEABLE = (1<<4); Capability(CInode *i=nullptr, Session *s=nullptr, uint64_t id=0); Capability(const Capability& other) = delete; @@ -236,6 +235,18 @@ public: void mark_needsnapflush() { state |= STATE_NEEDSNAPFLUSH; } void clear_needsnapflush() { state &= ~STATE_NEEDSNAPFLUSH; } + bool is_clientwriteable() const { return state & STATE_CLIENTWRITEABLE; } + void mark_clientwriteable() { + if (!is_clientwriteable()) { + state |= STATE_CLIENTWRITEABLE; + } + } + void clear_clientwriteable() { + if (is_clientwriteable()) { + state &= ~STATE_CLIENTWRITEABLE; + } + } + CInode *get_inode() const { return inode; } Session *get_session() const { return session; } client_t get_client() const; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 95615344aa7c..b902b737f7c6 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1901,10 +1901,9 @@ Capability* Locker::issue_new_caps(CInode *in, bool is_new; // if replay, try to reconnect cap, and otherwise do nothing. - if (is_replay) { - mds->mdcache->try_reconnect_cap(in, session); - return 0; - } + if (is_replay) + return mds->mdcache->try_reconnect_cap(in, session); + // my needs ceph_assert(session->info.inst.name.is_client()); @@ -2293,13 +2292,13 @@ uint64_t Locker::calc_new_max_size(CInode::mempool_inode *pi, uint64_t size) return round_up_to(new_max, pi->get_layout_size_increment()); } -void Locker::calc_new_client_ranges(CInode *in, uint64_t size, +void Locker::calc_new_client_ranges(CInode *in, uint64_t size, bool update, CInode::mempool_inode::client_range_map *new_ranges, bool *max_increased) { auto latest = in->get_projected_inode(); uint64_t ms; - if(latest->has_layout()) { + if (latest->has_layout()) { ms = calc_new_max_size(latest, size); } else { // Layout-less directories like ~mds0/, have zero size @@ -2308,7 +2307,7 @@ void Locker::calc_new_client_ranges(CInode *in, uint64_t size, // increase ranges as appropriate. // shrink to 0 if no WR|BUFFER caps issued. - for (const auto &p : in->get_client_caps()) { + for (auto &p : in->client_caps) { if ((p.second.issued() | p.second.wanted()) & (CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER)) { client_writeable_range_t& nr = (*new_ranges)[p.first]; nr.range.first = 0; @@ -2323,6 +2322,11 @@ void Locker::calc_new_client_ranges(CInode *in, uint64_t size, nr.range.last = ms; nr.follows = in->first - 1; } + if (update) + p.second.mark_clientwriteable(); + } else { + if (update) + p.second.clear_clientwriteable(); } } } @@ -2348,7 +2352,23 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, update_size = false; } - calc_new_client_ranges(in, std::max(new_max_size, size), &new_ranges, &max_increased); + int can_update = 1; + if (in->is_frozen()) { + can_update = -1; + } else if (!force_wrlock && !in->filelock.can_wrlock(in->get_loner())) { + // lock? + if (in->filelock.is_stable()) { + if (in->get_target_loner() >= 0) + file_excl(&in->filelock); + else + simple_lock(&in->filelock); + } + if (!in->filelock.can_wrlock(in->get_loner())) + can_update = -2; + } + + calc_new_client_ranges(in, std::max(new_max_size, size), can_update > 0, + &new_ranges, &max_increased); if (max_increased || latest->client_ranges != new_ranges) update_max = true; @@ -2362,34 +2382,16 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, << " update_size " << update_size << " on " << *in << dendl; - if (in->is_frozen()) { - dout(10) << "check_inode_max_size frozen, waiting on " << *in << dendl; - C_MDL_CheckMaxSize *cms = new C_MDL_CheckMaxSize(this, in, - new_max_size, - new_size, - new_mtime); - in->add_waiter(CInode::WAIT_UNFREEZE, cms); - return false; - } - if (!force_wrlock && !in->filelock.can_wrlock(in->get_loner())) { - // lock? - if (in->filelock.is_stable()) { - if (in->get_target_loner() >= 0) - file_excl(&in->filelock); - else - simple_lock(&in->filelock); - } - if (!in->filelock.can_wrlock(in->get_loner())) { - // try again later - C_MDL_CheckMaxSize *cms = new C_MDL_CheckMaxSize(this, in, - new_max_size, - new_size, - new_mtime); - + if (can_update < 0) { + auto cms = new C_MDL_CheckMaxSize(this, in, new_max_size, new_size, new_mtime); + if (can_update == -1) { + dout(10) << "check_inode_max_size frozen, waiting on " << *in << dendl; + in->add_waiter(CInode::WAIT_UNFREEZE, cms); + } else { in->filelock.add_waiter(SimpleLock::WAIT_STABLE, cms); dout(10) << "check_inode_max_size can't wrlock, waiting on " << *in << dendl; - return false; } + return false; } MutationRef mut(new MutationImpl()); @@ -3405,8 +3407,13 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, cr.range.first = 0; cr.range.last = new_max; cr.follows = in->first - 1; - } else + if (cap) + cap->mark_clientwriteable(); + } else { pi.inode.client_ranges.erase(client); + if (cap) + cap->clear_clientwriteable(); + } } if (change_max || (dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 38133be2f094..015a87993e7e 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -258,7 +258,7 @@ protected: private: uint64_t calc_new_max_size(CInode::mempool_inode *pi, uint64_t size); public: - void calc_new_client_ranges(CInode *in, uint64_t size, + void calc_new_client_ranges(CInode *in, uint64_t size, bool update, CInode::mempool_inode::client_range_map* new_ranges, bool *max_increased); bool check_inode_max_size(CInode *in, bool force_wrlock=false, diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 5005bd8acbd5..02221180e85b 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5733,12 +5733,13 @@ void MDCache::export_remaining_imported_caps() } } -void MDCache::try_reconnect_cap(CInode *in, Session *session) +Capability* MDCache::try_reconnect_cap(CInode *in, Session *session) { client_t client = session->info.get_client(); + Capability *cap = nullptr; const cap_reconnect_t *rc = get_replay_cap_reconnect(in->ino(), client); if (rc) { - in->reconnect_cap(client, *rc, session); + cap = in->reconnect_cap(client, *rc, session); dout(10) << "try_reconnect_cap client." << client << " reconnect wanted " << ccap_string(rc->capinfo.wanted) << " issue " << ccap_string(rc->capinfo.issued) @@ -5766,6 +5767,7 @@ void MDCache::try_reconnect_cap(CInode *in, Session *session) cap_reconnect_waiters.erase(it); } } + return cap; } @@ -6280,7 +6282,9 @@ void MDCache::identify_files_to_recover() p != in->inode.client_ranges.end(); ++p) { Capability *cap = in->get_client_cap(p->first); - if (!cap) { + if (cap) { + cap->mark_clientwriteable(); + } else { dout(10) << " client." << p->first << " has range " << p->second << " but no cap on " << *in << dendl; recover = true; break; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index e161b539fa95..96eb95a22ff1 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -680,7 +680,7 @@ public: Capability* rejoin_import_cap(CInode *in, client_t client, const cap_reconnect_t& icr, mds_rank_t frommds); void finish_snaprealm_reconnect(client_t client, SnapRealm *realm, snapid_t seq, map& updates); - void try_reconnect_cap(CInode *in, Session *session); + Capability* try_reconnect_cap(CInode *in, Session *session); void export_remaining_imported_caps(); // realm inodes diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 0171b3b2eb33..b864505a9ca5 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4031,20 +4031,26 @@ void Server::handle_client_openc(MDRequestRef& mdr) if (layout.pool_id != mdcache->default_file_layout.pool_id) in->inode.add_old_pool(mdcache->default_file_layout.pool_id); in->inode.update_backtrace(); + in->inode.rstat.rfiles = 1; SnapRealm *realm = diri->find_snaprealm(); snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq(); ceph_assert(follows >= realm->get_newest_seq()); - if (cmode & CEPH_FILE_MODE_WR) { + ceph_assert(dn->first == follows+1); + in->first = dn->first; + + // do the open + Capability *cap = mds->locker->issue_new_caps(in, cmode, mdr->session, realm, req->is_replay()); + in->authlock.set_state(LOCK_EXCL); + in->xattrlock.set_state(LOCK_EXCL); + + if (cap && (cmode & CEPH_FILE_MODE_WR)) { in->inode.client_ranges[client].range.first = 0; in->inode.client_ranges[client].range.last = in->inode.get_layout_size_increment(); in->inode.client_ranges[client].follows = follows; + cap->mark_clientwriteable(); } - in->inode.rstat.rfiles = 1; - - ceph_assert(dn->first == follows+1); - in->first = dn->first; // prepare finisher mdr->ls = mdlog->get_current_segment(); @@ -4055,11 +4061,6 @@ void Server::handle_client_openc(MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, in, true, true, true); - // do the open - mds->locker->issue_new_caps(in, cmode, mdr->session, realm, req->is_replay()); - in->authlock.set_state(LOCK_EXCL); - in->xattrlock.set_state(LOCK_EXCL); - // make sure this inode gets into the journal le->metablob.add_opened_ino(in->ino()); @@ -4636,7 +4637,7 @@ void Server::handle_client_setattr(MDRequestRef& mdr) // adjust client's max_size? CInode::mempool_inode::client_range_map new_ranges; bool max_increased = false; - mds->locker->calc_new_client_ranges(cur, pi.inode.size, &new_ranges, &max_increased); + mds->locker->calc_new_client_ranges(cur, pi.inode.size, true, &new_ranges, &max_increased); if (pi.inode.client_ranges != new_ranges) { dout(10) << " client_ranges " << pi.inode.client_ranges << " -> " << new_ranges << dendl; pi.inode.client_ranges = new_ranges; @@ -4674,7 +4675,7 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) dout(10) << "do_open_truncate " << *in << dendl; SnapRealm *realm = in->find_snaprealm(); - mds->locker->issue_new_caps(in, cmode, mdr->session, realm, mdr->client_request->is_replay()); + Capability *cap = mds->locker->issue_new_caps(in, cmode, mdr->session, realm, mdr->client_request->is_replay()); mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "open_truncate"); @@ -4695,11 +4696,12 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) } bool changed_ranges = false; - if (cmode & CEPH_FILE_MODE_WR) { + if (cap && (cmode & CEPH_FILE_MODE_WR)) { pi.inode.client_ranges[client].range.first = 0; pi.inode.client_ranges[client].range.last = pi.inode.get_layout_size_increment(); pi.inode.client_ranges[client].follows = realm->get_newest_seq(); changed_ranges = true; + cap->mark_clientwriteable(); } le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); @@ -5606,11 +5608,6 @@ void Server::handle_client_mknod(MDRequestRef& mdr) // if the client created a _regular_ file via MKNOD, it's highly likely they'll // want to write to it (e.g., if they are reexporting NFS) if (S_ISREG(newi->inode.mode)) { - dout(15) << " setting a client_range too, since this is a regular file" << dendl; - newi->inode.client_ranges[client].range.first = 0; - newi->inode.client_ranges[client].range.last = newi->inode.get_layout_size_increment(); - newi->inode.client_ranges[client].follows = follows; - // issue a cap on the file int cmode = CEPH_FILE_MODE_RDWR; Capability *cap = mds->locker->issue_new_caps(newi, cmode, mdr->session, realm, req->is_replay()); @@ -5621,6 +5618,12 @@ void Server::handle_client_mknod(MDRequestRef& mdr) newi->filelock.set_state(LOCK_EXCL); newi->authlock.set_state(LOCK_EXCL); newi->xattrlock.set_state(LOCK_EXCL); + + dout(15) << " setting a client_range too, since this is a regular file" << dendl; + newi->inode.client_ranges[client].range.first = 0; + newi->inode.client_ranges[client].range.last = newi->inode.get_layout_size_increment(); + newi->inode.client_ranges[client].follows = follows; + cap->mark_clientwriteable(); } }