From: John Spray Date: Thu, 17 Jul 2014 23:44:38 +0000 (+0100) Subject: mds: separate inode recovery queue from MDCache X-Git-Tag: v0.84~40^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6be80873c363bbc82889a08f84f9e2b260753902;p=ceph.git mds: separate inode recovery queue from MDCache Refactor to: * have somewhere to put some logic for doing background recovery in future. * trim a few lines from the oversized MDCache.cc whereever we can. Signed-off-by: John Spray --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 49b3ff0315a7..d651816ef6fa 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -128,6 +128,7 @@ set SimpleLock::empty_gather_set; MDCache::MDCache(MDS *m) : + recovery_queue(m), delayed_eval_stray(member_offset(CDentry, item_stray)) { mds = m; @@ -5803,6 +5804,7 @@ struct C_MDC_QueuedCow : public Context { } }; + void MDCache::queue_file_recover(CInode *in) { dout(10) << "queue_file_recover " << *in << dendl; @@ -5832,7 +5834,7 @@ void MDCache::queue_file_recover(CInode *in) CInode *cow_inode = 0; journal_cow_inode(mut, &le->metablob, in, snapid-1, &cow_inode); assert(cow_inode); - _queue_file_recover(cow_inode); + recovery_queue.enqueue(cow_inode); s.erase(*s.begin()); } @@ -5842,7 +5844,7 @@ void MDCache::queue_file_recover(CInode *in) mds->mdlog->flush(); } - _queue_file_recover(in); + recovery_queue.enqueue(in); } void MDCache::_queued_file_recover_cow(CInode *in, MutationRef& mut) @@ -5853,25 +5855,6 @@ void MDCache::_queued_file_recover_cow(CInode *in, MutationRef& mut) mut->cleanup(); } -void MDCache::_queue_file_recover(CInode *in) -{ - dout(15) << "_queue_file_recover " << *in << dendl; - assert(in->is_auth()); - in->state_clear(CInode::STATE_NEEDSRECOVER); - if (!in->state_test(CInode::STATE_RECOVERING)) { - in->state_set(CInode::STATE_RECOVERING); - in->auth_pin(this); - } - file_recover_queue.insert(in); -} - -void MDCache::unqueue_file_recover(CInode *in) -{ - dout(15) << "unqueue_file_recover " << *in << dendl; - in->state_clear(CInode::STATE_RECOVERING); - in->auth_unpin(this); - file_recover_queue.erase(in); -} /* * called after recovery to recover file sizes for previously opened (for write) @@ -5925,83 +5908,9 @@ void MDCache::start_files_to_recover(vector& recover_q, vector } } -struct C_MDC_Recover : public Context { - MDCache *mdc; - CInode *in; - uint64_t size; - utime_t mtime; - C_MDC_Recover(MDCache *m, CInode *i) : mdc(m), in(i), size(0) {} - void finish(int r) { - mdc->_recovered(in, r, size, mtime); - } -}; - void MDCache::do_file_recover() { - dout(10) << "do_file_recover " << file_recover_queue.size() << " queued, " - << file_recovering.size() << " recovering" << dendl; - - while (file_recovering.size() < 5 && - !file_recover_queue.empty()) { - CInode *in = *file_recover_queue.begin(); - file_recover_queue.erase(in); - - inode_t *pi = in->get_projected_inode(); - - // blech - if (pi->client_ranges.size() && !pi->get_max_size()) { - mds->clog.warn() << "bad client_range " << pi->client_ranges - << " on ino " << pi->ino << "\n"; - } - - if (pi->client_ranges.size() && pi->get_max_size()) { - dout(10) << "do_file_recover starting " << in->inode.size << " " << pi->client_ranges - << " " << *in << dendl; - file_recovering.insert(in); - - C_MDC_Recover *fin = new C_MDC_Recover(this, in); - mds->filer->probe(in->inode.ino, &in->inode.layout, in->last, - pi->get_max_size(), &fin->size, &fin->mtime, false, - 0, fin); - } else { - dout(10) << "do_file_recover skipping " << in->inode.size - << " " << *in << dendl; - in->state_clear(CInode::STATE_RECOVERING); - mds->locker->eval(in, CEPH_LOCK_IFILE); - in->auth_unpin(this); - } - } -} - -void MDCache::_recovered(CInode *in, int r, uint64_t size, utime_t mtime) -{ - dout(10) << "_recovered r=" << r << " size=" << size << " mtime=" << mtime - << " for " << *in << dendl; - - if (r != 0) { - dout(0) << "recovery error! " << r << dendl; - if (r == -EBLACKLISTED) { - mds->suicide(); - return; - } - assert(0 == "unexpected error from osd during recovery"); - } - - file_recovering.erase(in); - in->state_clear(CInode::STATE_RECOVERING); - - if (!in->get_parent_dn() && !in->get_projected_parent_dn()) { - dout(10) << " inode has no parents, killing it off" << dendl; - in->auth_unpin(this); - remove_inode(in); - } else { - // journal - mds->locker->check_inode_max_size(in, true, true, size, false, 0, mtime); - mds->locker->eval(in, CEPH_LOCK_IFILE); - in->auth_unpin(this); - } - - do_file_recover(); + recovery_queue.advance(); } void MDCache::purge_prealloc_ino(inodeno_t ino, Context *fin) @@ -6014,13 +5923,9 @@ void MDCache::purge_prealloc_ino(inodeno_t ino, Context *fin) mds->objecter->remove(oid, oloc, snapc, ceph_clock_now(g_ceph_context), 0, 0, fin); } - - - // =============================================================================== - // ---------------------------- // truncate diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 17b038ebb488..43d83093155d 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -26,6 +26,7 @@ #include "CDir.h" #include "include/Context.h" #include "events/EMetaBlob.h" +#include "RecoveryQueue.h" #include "messages/MClientRequest.h" #include "messages/MMDSSlaveRequest.h" @@ -526,26 +527,18 @@ public: friend class MDBalancer; - // file size recovery - set file_recover_queue; - set file_recovering; - - void queue_file_recover(CInode *in); - void unqueue_file_recover(CInode *in); - void _queued_file_recover_cow(CInode *in, MutationRef& mut); - void _queue_file_recover(CInode *in); + // File size recovery +private: + RecoveryQueue recovery_queue; void identify_files_to_recover(vector& recover_q, vector& check_q); void start_files_to_recover(vector& recover_q, vector& check_q); - +public: void do_file_recover(); - void _recovered(CInode *in, int r, uint64_t size, utime_t mtime); + void queue_file_recover(CInode *in); + void _queued_file_recover_cow(CInode *in, MutationRef& mut); void purge_prealloc_ino(inodeno_t ino, Context *fin); - - - public: - // subsystems Migrator *migrator; diff --git a/src/mds/Makefile.am b/src/mds/Makefile.am index 806f42117264..9cb725cbabcb 100644 --- a/src/mds/Makefile.am +++ b/src/mds/Makefile.am @@ -7,6 +7,7 @@ libmds_la_SOURCES = \ mds/Server.cc \ mds/Mutation.cc \ mds/MDCache.cc \ + mds/RecoveryQueue.cc \ mds/Locker.cc \ mds/Migrator.cc \ mds/MDBalancer.cc \ @@ -45,6 +46,7 @@ noinst_HEADERS += \ mds/LogSegment.h \ mds/MDBalancer.h \ mds/MDCache.h \ + mds/RecoveryQueue.h \ mds/MDLog.h \ mds/MDS.h \ mds/MDSMap.h \ diff --git a/src/mds/RecoveryQueue.cc b/src/mds/RecoveryQueue.cc new file mode 100644 index 000000000000..caf5ef6e3fe0 --- /dev/null +++ b/src/mds/RecoveryQueue.cc @@ -0,0 +1,132 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "CInode.h" +#include "MDCache.h" +#include "MDS.h" +#include "Locker.h" +#include "osdc/Filer.h" + +#include "RecoveryQueue.h" + + +#define dout_subsys ceph_subsys_mds + + +struct C_MDC_Recover : public Context { + RecoveryQueue *rq; + CInode *in; + uint64_t size; + utime_t mtime; + C_MDC_Recover(RecoveryQueue *rq_, CInode *i) : rq(rq_), in(i), size(0) {} + void finish(int r) { + rq->_recovered(in, r, size, mtime); + } +}; + + +/** + * Progress the queue. Call this after enqueuing something or on + * completion of something. + */ +void RecoveryQueue::advance() +{ + dout(10) << "RecoveryQueue::advance " << file_recover_queue.size() << " queued, " + << file_recovering.size() << " recovering" << dendl; + + while (file_recovering.size() < 5 && + !file_recover_queue.empty()) { + CInode *in = *file_recover_queue.begin(); + file_recover_queue.erase(in); + + inode_t *pi = in->get_projected_inode(); + + // blech + if (pi->client_ranges.size() && !pi->get_max_size()) { + mds->clog.warn() << "bad client_range " << pi->client_ranges + << " on ino " << pi->ino << "\n"; + } + + if (pi->client_ranges.size() && pi->get_max_size()) { + dout(10) << "do_file_recover starting " << in->inode.size << " " << pi->client_ranges + << " " << *in << dendl; + file_recovering.insert(in); + + C_MDC_Recover *fin = new C_MDC_Recover(this, in); + mds->filer->probe(in->inode.ino, &in->inode.layout, in->last, + pi->get_max_size(), &fin->size, &fin->mtime, false, + 0, fin); + } else { + dout(10) << "do_file_recover skipping " << in->inode.size + << " " << *in << dendl; + in->state_clear(CInode::STATE_RECOVERING); + mds->locker->eval(in, CEPH_LOCK_IFILE); + in->auth_unpin(this); + } + } +} + + +/** + * Given an authoritative inode which is in the cache, + * enqueue it for recovery. + */ +void RecoveryQueue::enqueue(CInode *in) +{ + dout(15) << "RecoveryQueue::enqueue " << *in << dendl; + assert(in->is_auth()); + + in->state_clear(CInode::STATE_NEEDSRECOVER); + if (!in->state_test(CInode::STATE_RECOVERING)) { + in->state_set(CInode::STATE_RECOVERING); + in->auth_pin(this); + } + file_recover_queue.insert(in); +} + + +/** + * Call back on completion of Filer probe on an inode. + */ +void RecoveryQueue::_recovered(CInode *in, int r, uint64_t size, utime_t mtime) +{ + dout(10) << "_recovered r=" << r << " size=" << size << " mtime=" << mtime + << " for " << *in << dendl; + + if (r != 0) { + dout(0) << "recovery error! " << r << dendl; + if (r == -EBLACKLISTED) { + mds->suicide(); + return; + } + assert(0 == "unexpected error from osd during recovery"); + } + + file_recovering.erase(in); + in->state_clear(CInode::STATE_RECOVERING); + + if (!in->get_parent_dn() && !in->get_projected_parent_dn()) { + dout(10) << " inode has no parents, killing it off" << dendl; + in->auth_unpin(this); + mds->mdcache->remove_inode(in); + } else { + // journal + mds->locker->check_inode_max_size(in, true, true, size, false, 0, mtime); + mds->locker->eval(in, CEPH_LOCK_IFILE); + in->auth_unpin(this); + } + + advance(); +} + diff --git a/src/mds/RecoveryQueue.h b/src/mds/RecoveryQueue.h new file mode 100644 index 000000000000..72d94acabaf3 --- /dev/null +++ b/src/mds/RecoveryQueue.h @@ -0,0 +1,40 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +//class C_MDC_Recover; +// +#ifndef RECOVERY_QUEUE_H +#define RECOVERY_QUEUE_H + +#include + +class CInode; +class MDS; + +class RecoveryQueue { + public: + void enqueue(CInode *in); + void advance(); + RecoveryQueue(MDS *mds_) : mds(mds_) {} + + private: + std::set file_recover_queue; + std::set file_recovering; + void _recovered(CInode *in, int r, uint64_t size, utime_t mtime); + MDS *mds; + + friend class C_MDC_Recover; +}; + +#endif // RECOVERY_QUEUE_H