From f62efd0536ab25274b7a43e549aefb15ef6fda7b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 21 Nov 2018 20:22:25 +0800 Subject: [PATCH] mds: track if client has writeable range in Capability Signed-off-by: "Yan, Zheng" (cherry picked from commit 370ae1cb3e5dc07867d80e998082bc514e8fccfd) Conflicts: src/mds/Capability.h src/mds/Locker.cc src/mds/MDCache.h src/mds/Server.cc --- src/mds/Capability.h | 15 +++++++-- src/mds/Locker.cc | 73 ++++++++++++++++++++++++-------------------- src/mds/Locker.h | 2 +- src/mds/MDCache.cc | 10 ++++-- src/mds/MDCache.h | 2 +- src/mds/Server.cc | 46 +++++++++++++++------------- 6 files changed, 87 insertions(+), 61 deletions(-) diff --git a/src/mds/Capability.h b/src/mds/Capability.h index 1d06cd8d06157..af7ca015a4664 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -110,10 +110,9 @@ public: static void generate_test_instances(list& ls); }; - const static unsigned STATE_NEW = (1<<1); const static unsigned STATE_IMPORTING = (1<<2); - + const static unsigned STATE_CLIENTWRITEABLE = (1<<4); Capability(CInode *i=nullptr, Session *s=nullptr, uint64_t id=0); Capability(const Capability& other); // no copying @@ -232,6 +231,18 @@ public: void mark_importing() { state |= STATE_IMPORTING; } void clear_importing() { state &= ~STATE_IMPORTING; } + bool is_clientwriteable() const { return state & STATE_CLIENTWRITEABLE; } + void mark_clientwriteable() { + if (!is_clientwriteable()) { + state |= STATE_CLIENTWRITEABLE; + } + } + void clear_clientwriteable() { + if (is_clientwriteable()) { + state &= ~STATE_CLIENTWRITEABLE; + } + } + CInode *get_inode() const { return inode; } Session *get_session() const { return session; } client_t get_client() const; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 69d7ffe76f736..e89a1aae5ae35 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1914,10 +1914,9 @@ Capability* Locker::issue_new_caps(CInode *in, bool is_new; // if replay, try to reconnect cap, and otherwise do nothing. - if (is_replay) { - mds->mdcache->try_reconnect_cap(in, session); - return 0; - } + if (is_replay) + return mds->mdcache->try_reconnect_cap(in, session); + // my needs assert(session->info.inst.name.is_client()); @@ -2306,13 +2305,13 @@ uint64_t Locker::calc_new_max_size(CInode::mempool_inode *pi, uint64_t size) return ROUND_UP_TO(new_max, pi->get_layout_size_increment()); } -void Locker::calc_new_client_ranges(CInode *in, uint64_t size, +void Locker::calc_new_client_ranges(CInode *in, uint64_t size, bool update, CInode::mempool_inode::client_range_map *new_ranges, bool *max_increased) { auto latest = in->get_projected_inode(); uint64_t ms; - if(latest->has_layout()) { + if (latest->has_layout()) { ms = calc_new_max_size(latest, size); } else { // Layout-less directories like ~mds0/, have zero size @@ -2338,6 +2337,11 @@ void Locker::calc_new_client_ranges(CInode *in, uint64_t size, nr.range.last = ms; nr.follows = in->first - 1; } + if (update) + p->second->mark_clientwriteable(); + } else { + if (update) + p->second->clear_clientwriteable(); } } } @@ -2363,7 +2367,23 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, update_size = false; } - calc_new_client_ranges(in, max(new_max_size, size), &new_ranges, &max_increased); + int can_update = 1; + if (in->is_frozen()) { + can_update = -1; + } else if (!force_wrlock && !in->filelock.can_wrlock(in->get_loner())) { + // lock? + if (in->filelock.is_stable()) { + if (in->get_target_loner() >= 0) + file_excl(&in->filelock); + else + simple_lock(&in->filelock); + } + if (!in->filelock.can_wrlock(in->get_loner())) + can_update = -2; + } + + calc_new_client_ranges(in, std::max(new_max_size, size), can_update > 0, + &new_ranges, &max_increased); if (max_increased || latest->client_ranges != new_ranges) update_max = true; @@ -2377,34 +2397,16 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, << " update_size " << update_size << " on " << *in << dendl; - if (in->is_frozen()) { - dout(10) << "check_inode_max_size frozen, waiting on " << *in << dendl; - C_MDL_CheckMaxSize *cms = new C_MDL_CheckMaxSize(this, in, - new_max_size, - new_size, - new_mtime); - in->add_waiter(CInode::WAIT_UNFREEZE, cms); - return false; - } - if (!force_wrlock && !in->filelock.can_wrlock(in->get_loner())) { - // lock? - if (in->filelock.is_stable()) { - if (in->get_target_loner() >= 0) - file_excl(&in->filelock); - else - simple_lock(&in->filelock); - } - if (!in->filelock.can_wrlock(in->get_loner())) { - // try again later - C_MDL_CheckMaxSize *cms = new C_MDL_CheckMaxSize(this, in, - new_max_size, - new_size, - new_mtime); - + if (can_update < 0) { + auto cms = new C_MDL_CheckMaxSize(this, in, new_max_size, new_size, new_mtime); + if (can_update == -1) { + dout(10) << "check_inode_max_size frozen, waiting on " << *in << dendl; + in->add_waiter(CInode::WAIT_UNFREEZE, cms); + } else { in->filelock.add_waiter(SimpleLock::WAIT_STABLE, cms); dout(10) << "check_inode_max_size can't wrlock, waiting on " << *in << dendl; - return false; } + return false; } MutationRef mut(new MutationImpl()); @@ -3377,8 +3379,13 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, cr.range.first = 0; cr.range.last = new_max; cr.follows = in->first - 1; - } else + if (cap) + cap->mark_clientwriteable(); + } else { pi.inode.client_ranges.erase(client); + if (cap) + cap->clear_clientwriteable(); + } } if (change_max || (dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) diff --git a/src/mds/Locker.h b/src/mds/Locker.h index f0a9a4ce40c09..ddbd321f2a760 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -258,7 +258,7 @@ protected: private: uint64_t calc_new_max_size(CInode::mempool_inode *pi, uint64_t size); public: - void calc_new_client_ranges(CInode *in, uint64_t size, + void calc_new_client_ranges(CInode *in, uint64_t size, bool update, CInode::mempool_inode::client_range_map* new_ranges, bool *max_increased); bool check_inode_max_size(CInode *in, bool force_wrlock=false, diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index d7b40a3126f65..2e02fd650dc86 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5688,12 +5688,13 @@ void MDCache::export_remaining_imported_caps() } } -void MDCache::try_reconnect_cap(CInode *in, Session *session) +Capability* MDCache::try_reconnect_cap(CInode *in, Session *session) { client_t client = session->info.get_client(); + Capability *cap = nullptr; const cap_reconnect_t *rc = get_replay_cap_reconnect(in->ino(), client); if (rc) { - in->reconnect_cap(client, *rc, session); + cap = in->reconnect_cap(client, *rc, session); dout(10) << "try_reconnect_cap client." << client << " reconnect wanted " << ccap_string(rc->capinfo.wanted) << " issue " << ccap_string(rc->capinfo.issued) @@ -5721,6 +5722,7 @@ void MDCache::try_reconnect_cap(CInode *in, Session *session) cap_reconnect_waiters.erase(it); } } + return cap; } @@ -6228,7 +6230,9 @@ void MDCache::identify_files_to_recover() p != in->inode.client_ranges.end(); ++p) { Capability *cap = in->get_client_cap(p->first); - if (!cap) { + if (cap) { + cap->mark_clientwriteable(); + } else { dout(10) << " client." << p->first << " has range " << p->second << " but no cap on " << *in << dendl; recover = true; break; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 49d8fc738c72d..12e75e43edbbf 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -651,7 +651,7 @@ public: void send_snaps(map& splits); Capability* rejoin_import_cap(CInode *in, client_t client, const cap_reconnect_t& icr, mds_rank_t frommds); void finish_snaprealm_reconnect(client_t client, SnapRealm *realm, snapid_t seq); - void try_reconnect_cap(CInode *in, Session *session); + Capability* try_reconnect_cap(CInode *in, Session *session); void export_remaining_imported_caps(); // cap imports. delayed snap parent opens. diff --git a/src/mds/Server.cc b/src/mds/Server.cc index e3fe19aa321d9..3a85815251d4e 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3676,9 +3676,6 @@ void Server::handle_client_openc(MDRequestRef& mdr) } // create inode. - SnapRealm *realm = diri->find_snaprealm(); // use directory's realm; inode isn't attached yet. - snapid_t follows = realm->get_newest_seq(); - CInode *in = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino), req->head.args.open.mode | S_IFREG, &layout); assert(in); @@ -3690,15 +3687,25 @@ void Server::handle_client_openc(MDRequestRef& mdr) if (layout.pool_id != mdcache->default_file_layout.pool_id) in->inode.add_old_pool(mdcache->default_file_layout.pool_id); in->inode.update_backtrace(); - if (cmode & CEPH_FILE_MODE_WR) { + in->inode.rstat.rfiles = 1; + + SnapRealm *realm = diri->find_snaprealm(); + snapid_t follows = realm->get_newest_seq(); + + ceph_assert(dn->first == follows+1); + in->first = dn->first; + + // do the open + Capability *cap = mds->locker->issue_new_caps(in, cmode, mdr->session, realm, req->is_replay()); + in->authlock.set_state(LOCK_EXCL); + in->xattrlock.set_state(LOCK_EXCL); + + if (cap && (cmode & CEPH_FILE_MODE_WR)) { in->inode.client_ranges[client].range.first = 0; in->inode.client_ranges[client].range.last = in->inode.get_layout_size_increment(); in->inode.client_ranges[client].follows = follows; + cap->mark_clientwriteable(); } - in->inode.rstat.rfiles = 1; - - assert(dn->first == follows+1); - in->first = dn->first; // prepare finisher mdr->ls = mdlog->get_current_segment(); @@ -3709,11 +3716,6 @@ void Server::handle_client_openc(MDRequestRef& mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, in, true, true, true); - // do the open - mds->locker->issue_new_caps(in, cmode, mdr->session, realm, req->is_replay()); - in->authlock.set_state(LOCK_EXCL); - in->xattrlock.set_state(LOCK_EXCL); - // make sure this inode gets into the journal le->metablob.add_opened_ino(in->ino()); LogSegment *ls = mds->mdlog->get_current_segment(); @@ -4278,7 +4280,7 @@ void Server::handle_client_setattr(MDRequestRef& mdr) // adjust client's max_size? CInode::mempool_inode::client_range_map new_ranges; bool max_increased = false; - mds->locker->calc_new_client_ranges(cur, pi.inode.size, &new_ranges, &max_increased); + mds->locker->calc_new_client_ranges(cur, pi.inode.size, true, &new_ranges, &max_increased); if (pi.inode.client_ranges != new_ranges) { dout(10) << " client_ranges " << pi.inode.client_ranges << " -> " << new_ranges << dendl; pi.inode.client_ranges = new_ranges; @@ -4316,7 +4318,7 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) dout(10) << "do_open_truncate " << *in << dendl; SnapRealm *realm = in->find_snaprealm(); - mds->locker->issue_new_caps(in, cmode, mdr->session, realm, mdr->client_request->is_replay()); + Capability *cap = mds->locker->issue_new_caps(in, cmode, mdr->session, realm, mdr->client_request->is_replay()); mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "open_truncate"); @@ -4337,11 +4339,12 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) } bool changed_ranges = false; - if (cmode & CEPH_FILE_MODE_WR) { + if (cap && (cmode & CEPH_FILE_MODE_WR)) { pi.inode.client_ranges[client].range.first = 0; pi.inode.client_ranges[client].range.last = pi.inode.get_layout_size_increment(); pi.inode.client_ranges[client].follows = in->find_snaprealm()->get_newest_seq(); changed_ranges = true; + cap->mark_clientwriteable(); } le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); @@ -5223,11 +5226,6 @@ void Server::handle_client_mknod(MDRequestRef& mdr) // if the client created a _regular_ file via MKNOD, it's highly likely they'll // want to write to it (e.g., if they are reexporting NFS) if (S_ISREG(newi->inode.mode)) { - dout(15) << " setting a client_range too, since this is a regular file" << dendl; - newi->inode.client_ranges[client].range.first = 0; - newi->inode.client_ranges[client].range.last = newi->inode.get_layout_size_increment(); - newi->inode.client_ranges[client].follows = follows; - // issue a cap on the file int cmode = CEPH_FILE_MODE_RDWR; Capability *cap = mds->locker->issue_new_caps(newi, cmode, mdr->session, realm, req->is_replay()); @@ -5238,6 +5236,12 @@ void Server::handle_client_mknod(MDRequestRef& mdr) newi->filelock.set_state(LOCK_EXCL); newi->authlock.set_state(LOCK_EXCL); newi->xattrlock.set_state(LOCK_EXCL); + + dout(15) << " setting a client_range too, since this is a regular file" << dendl; + newi->inode.client_ranges[client].range.first = 0; + newi->inode.client_ranges[client].range.last = newi->inode.get_layout_size_increment(); + newi->inode.client_ranges[client].follows = follows; + cap->mark_clientwriteable(); } } -- 2.39.5