From cf98b543ef14100c9957d10b91fdb09e84bd793c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 31 Jan 2008 09:25:22 -0800 Subject: [PATCH] mds+kernel: more file_max work; factored out intarithh --- src/ebofs/FileJournal.cc | 6 +- src/ebofs/types.h | 12 +-- src/include/intarith.h | 34 +++++++ src/include/types.h | 10 +- src/kernel/inode.c | 2 +- src/mds/CInode.cc | 3 + src/mds/CInode.h | 12 ++- src/mds/Locker.cc | 203 +++++++++++++++++++++++++-------------- src/mds/Locker.h | 5 + src/mds/Server.cc | 2 +- 10 files changed, 192 insertions(+), 97 deletions(-) create mode 100644 src/include/intarith.h diff --git a/src/ebofs/FileJournal.cc b/src/ebofs/FileJournal.cc index fab49274682d3..35d16e9de69ec 100644 --- a/src/ebofs/FileJournal.cc +++ b/src/ebofs/FileJournal.cc @@ -345,7 +345,7 @@ bool FileJournal::prepare_single_dio_write(bufferlist& bl) bufferlist &ebl = writeq.front().second; off64_t size = 2*sizeof(entry_header_t) + ebl.length(); - size = ROUND_UP_2(size, header.alignment); + size = ROUND_UP_TO(size, header.alignment); check_for_wrap(epoch, write_pos, size); if (full) return false; @@ -419,7 +419,7 @@ void FileJournal::do_write(bufferlist& bl) writing = false; if (memcmp(&old_header, &header, sizeof(header)) == 0) { write_pos += bl.length(); - write_pos = ROUND_UP_2(write_pos, header.alignment); + write_pos = ROUND_UP_TO(write_pos, header.alignment); ebofs->queue_finishers(writingq); } else { dout(10) << "do_write finished write but header changed? not moving write_pos." << dendl; @@ -615,7 +615,7 @@ bool FileJournal::read_entry(bufferlist& bl, epoch_t& epoch) epoch = h.epoch; read_pos += 2*sizeof(entry_header_t) + h.len; - read_pos = ROUND_UP_2(read_pos, header.alignment); + read_pos = ROUND_UP_TO(read_pos, header.alignment); return true; } diff --git a/src/ebofs/types.h b/src/ebofs/types.h index b5d9c1fce9c42..34cd0db9e70aa 100644 --- a/src/ebofs/types.h +++ b/src/ebofs/types.h @@ -34,17 +34,7 @@ using namespace __gnu_cxx; #include "csum.h" -#ifndef MIN -# define MIN(a,b) ((a)<=(b) ? (a):(b)) -#endif -#ifndef MAX -# define MAX(a,b) ((a)>=(b) ? (a):(b)) -#endif - -#ifndef DIV_ROUND_UP -# define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) -#endif -#define ROUND_UP_2(n, d) (((n)+(d)-1) & ~((d)-1)) +#include "include/intarith.h" // disk typedef uint64_t block_t; // disk location/sector/block diff --git a/src/include/intarith.h b/src/include/intarith.h new file mode 100644 index 0000000000000..d305b054b5f4d --- /dev/null +++ b/src/include/intarith.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef __CEPH_INTARITH_H +#define __CEPH_INTARITH_H + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a):(b)) +#endif + +#ifndef MAX +# define MAX(a,b) ((a) > (b) ? (a):(b)) +#endif + +#ifndef DIV_ROUND_UP +# define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#endif + +#ifndef ROUND_UP_TO +# define ROUND_UP_TO(n, d) (((n)+(d)-1) & ~((d)-1)) +#endif + +#endif diff --git a/src/include/types.h b/src/include/types.h index 7e397179a4d20..de051046d14b3 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -42,15 +42,7 @@ using namespace __gnu_cxx; #include "object.h" #include "utime.h" - - -#ifndef MIN -# define MIN(a,b) ((a) < (b) ? (a):(b)) -#endif -#ifndef MAX -# define MAX(a,b) ((a) > (b) ? (a):(b)) -#endif - +#include "intarith.h" // -- stl crap -- diff --git a/src/kernel/inode.c b/src/kernel/inode.c index dc67030939c13..9ccf478054566 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -465,7 +465,7 @@ int ceph_handle_cap_grant(struct inode *inode, struct ceph_mds_file_caps *grant, /* grant or no-op */ if (cap->caps == newcaps) { - dout(10, "no-op: %d -> %d\n", cap->caps, newcaps); + dout(10, "caps unchanged: %d -> %d\n", cap->caps, newcaps); } else { dout(10, "grant: %d -> %d\n", cap->caps, newcaps); cap->caps = newcaps; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index d65a0a2c219bd..a5db68f70b2f3 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -75,6 +75,9 @@ ostream& operator<<(ostream& out, CInode& in) out << " " << in.filelock; out << " " << in.dirlock; + if (in.inode.max_size) + out << " size=" << in.inode.size << "/" << in.inode.max_size; + // hack: spit out crap on which clients have caps if (!in.get_client_caps().empty()) { out << " caps={"; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index f78116203e85c..9c4ac07b15220 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -69,7 +69,6 @@ class CInode : public MDSCacheObject { static const int PIN_FREEZING = 13; static const int PIN_FROZEN = 14; static const int PIN_IMPORTINGCAPS = 15; - static const int PIN_FILEUPDATE = -16; const char *pin_name(int p) { switch (p) { @@ -87,7 +86,6 @@ class CInode : public MDSCacheObject { case PIN_FREEZING: return "freezing"; case PIN_FROZEN: return "frozen"; case PIN_IMPORTINGCAPS: return "importingcaps"; - case PIN_FILEUPDATE: return "fileupdate"; default: return generic_pin_name(p); } } @@ -151,6 +149,12 @@ class CInode : public MDSCacheObject { return projected_inode.back()->version; } + inode_t *get_projected_inode() { + if (projected_inode.empty()) + return &inode; + else + return projected_inode.back(); + } inode_t *project_inode(); void pop_and_dirty_projected_inode(LogSegment *ls); @@ -282,6 +286,10 @@ public: return ino() < ((CInode*)r)->ino(); } + int64_t get_layout_size_increment() { + return inode.layout.fl_object_size * inode.layout.fl_stripe_count; + } + // -- misc -- void make_path_string(string& s); void make_path(filepath& s); diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 56e6b63ab528b..c6d8514fc99db 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -445,6 +445,20 @@ version_t Locker::issue_file_data_version(CInode *in) return in->inode.file_data_version; } +struct C_Locker_FileUpdate_finish : public Context { + Locker *locker; + CInode *in; + LogSegment *ls; + bool share; + C_Locker_FileUpdate_finish(Locker *l, CInode *i, LogSegment *s, bool e=false) : + locker(l), in(i), ls(s), share(e) {} + void finish(int r) { + in->pop_and_dirty_projected_inode(ls); + in->put(CInode::PIN_PTRWAITER); + if (share && in->is_auth() && in->filelock.is_stable()) + locker->share_new_file_max(in); + } +}; Capability* Locker::issue_new_caps(CInode *in, int mode, @@ -491,15 +505,28 @@ Capability* Locker::issue_new_caps(CInode *in, // re-issue whatever we can cap->issue(cap->pending()); cap->set_last_open(); - - int now = cap->pending(); - if (before != now && - (before & CEPH_CAP_WR) == 0 && - (now & CEPH_CAP_WR)) { - // FIXME FIXME FIXME + + // increase max_size? + inode_t *latest = in->get_projected_inode(); + if (latest->max_size == 0 && + !in->is_base() && + (mode & FILE_MODE_W)) { + int64_t inc = in->get_layout_size_increment(); + int64_t new_max = ROUND_UP_TO(latest->size + inc/2, inc); + dout(10) << "hey, wr caps wanted, max_size 0 -> " << new_max << dendl; + inode_t *pi = in->project_inode(); + pi->version = in->pre_dirty(); + pi->max_size = new_max; + EUpdate *le = new EUpdate(mds->mdlog, "max_size increase on open"); + le->metablob.add_dir_context(in->get_parent_dir()); + le->metablob.add_primary_dentry(in->parent, true, 0, pi); + LogSegment *ls = mds->mdlog->get_current_segment(); + in->get(CInode::PIN_PTRWAITER); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, true)); } // twiddle file_data_version? + int now = cap->pending(); if ((before & CEPH_CAP_WRBUFFER) == 0 && (now & CEPH_CAP_WRBUFFER)) { in->inode.file_data_version++; @@ -706,17 +733,104 @@ void Locker::handle_inode_file_caps(MInodeFileCaps *m) -struct C_Locker_FileUpdate_finish : public Context { - Locker *locker; - CInode *in; - LogSegment *ls; - C_Locker_FileUpdate_finish(Locker *l, CInode *i, LogSegment *s) : - locker(l), in(i), ls(s) {} - void finish(int r) { - in->pop_and_dirty_projected_inode(ls); - in->put(CInode::PIN_FILEUPDATE); + + + +void Locker::maybe_journal_inode_update(CInode *in, bool had_or_has_wr, + int64_t size, utime_t mtime, utime_t atime) +{ + inode_t *latest = in->get_projected_inode(); + + // no more writers? + int wanted = in->get_caps_wanted(); + bool no_wr = false; + if (latest->max_size && (wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0) + no_wr = true; + + // atime|mtime|size? + bool dirty = false; + if (atime > latest->atime) + dirty = true; + if (had_or_has_wr) { + if (mtime > latest->mtime) + dirty = true; + if (size > latest->size) + dirty = true; + } + + + // increase max_size? + bool increase_max = false; + int64_t inc = in->get_layout_size_increment(); + if ((wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER|CEPH_CAP_WREXTEND)) && + size > latest->max_size + inc) { + dout(10) << "hey, wr caps wanted, and size " << size + << " > max " << latest->max_size << " *2, increasing" << dendl; + increase_max = true; } -}; + + if ((dirty || no_wr || increase_max) && + !in->is_base()) { // FIXME.. what about root inode mtime/atime? + EUpdate *le = new EUpdate(mds->mdlog, "size|max_size|mtime|atime update"); + inode_t *pi = in->project_inode(); + pi->version = in->pre_dirty(); + if (no_wr) { + dout(7) << " last wr-wanted cap, max_size=0" << dendl; + pi->max_size = 0; + } else if (increase_max) { + int64_t inc = in->get_layout_size_increment(); + int64_t new_max = ROUND_UP_TO(latest->size + inc, inc); + dout(7) << " increasing max_size " << pi->max_size << " to " << new_max << dendl; + pi->max_size = new_max; + } + if (mtime > latest->mtime) { + dout(7) << " taking mtime " << mtime << " > " + << in->inode.mtime << " for " << *in << dendl; + pi->mtime = mtime; + } + if (size > latest->size) { + dout(7) << " taking size " << size << " > " + << in->inode.size << " for " << *in << dendl; + pi->size = size; + } + if (atime > latest->atime) { + dout(7) << " taking atime " << atime << " > " + << in->inode.atime << " for " << *in << dendl; + pi->atime = atime; + } + le->metablob.add_dir_context(in->get_parent_dir()); + le->metablob.add_primary_dentry(in->parent, true, 0, pi); + LogSegment *ls = mds->mdlog->get_current_segment(); + in->get(CInode::PIN_PTRWAITER); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls)); + } +} + +void Locker::share_new_file_max(CInode *in) +{ + /* + * only share if currently issued a WR cap. if client doesn't have it, + * file_max doesn't matter, and the client will get it if/when they get + * the cap later. + */ + dout(10) << "share_new_file_max on " << *in << dendl; + for (map::iterator it = in->client_caps.begin(); + it != in->client_caps.end(); + it++) { + const int client = it->first; + Capability *cap = it->second; + if (cap->pending() & CEPH_CAP_WR) { + dout(10) << "share_new_file_max with client" << client << dendl; + mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT, + in->inode, + cap->get_last_seq(), + cap->pending(), + cap->wanted()), + client); + } + } +} + /* * note: we only get these from the client if @@ -744,7 +858,6 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) assert(cap); // filter wanted based on what we could ever give out (given auth/replica status) - int old_wanted = in->get_caps_wanted(); int wanted = m->get_wanted() & in->filelock.caps_allowed_ever(); dout(7) << "handle_client_file_caps seq " << m->get_seq() @@ -755,14 +868,11 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) << dendl; // confirm caps - int had2 = cap->issued(); int had = cap->confirm_receipt(m->get_seq(), m->get_caps()); int has = cap->confirmed(); - dout(10) << "had " << cap_string(had) << " " << cap_string(had2) << " has " << cap_string(has) << dendl; - had |= had2; + dout(10) << "client had " << cap_string(had) << ", has " << cap_string(has) << dendl; // update wanted - bool last_wr = false; // last write cap if (cap->wanted() != wanted) { if (m->get_seq() < cap->get_last_open()) { /* this is awkward. @@ -779,15 +889,6 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) dout(7) << " cap for client" << client << " is now null, removing from " << *in << dendl; in->remove_client_cap(client); - // last wr cap? - int new_wanted = in->get_caps_wanted(); - dout(10) << "old_wanted " << cap_string(old_wanted) - << " new_wanted " << cap_string(new_wanted) << dendl; - if ((old_wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER|CEPH_CAP_WREXTEND)) && - !(new_wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER|CEPH_CAP_WREXTEND))) { - last_wr = true; - } - if (!in->is_any_caps()) in->xlist_open_file.remove_myself(); // unpin logsegment if (!in->is_auth()) @@ -797,46 +898,8 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) } } - // atime|mtime|size? - bool dirty = false; - if (m->get_atime() > in->inode.atime) - dirty = true; - if ((has|had) & CEPH_CAP_WR) { - if (m->get_mtime() > in->inode.mtime) - dirty = true; - if (m->get_size() > in->inode.size) - dirty = true; - } - if ((dirty || last_wr) && - !in->is_base()) { // FIXME.. what about root inode mtime/atime? - EUpdate *le = new EUpdate(mds->mdlog, "size|max_size|mtime|atime update"); - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); - if (last_wr) { - dout(7) << " last wr-wanted cap, max_size=0" << dendl; - pi->max_size = 0; - } - if (m->get_mtime() > in->inode.mtime) { - dout(7) << " taking mtime " << m->get_mtime() << " > " - << in->inode.mtime << " for " << *in << dendl; - pi->mtime = m->get_mtime(); - } - if (m->get_size() > in->inode.size) { - dout(7) << " taking size " << m->get_size() << " > " - << in->inode.size << " for " << *in << dendl; - pi->size = m->get_size(); - } - if (m->get_atime() > in->inode.atime) { - dout(7) << " taking atime " << m->get_atime() << " > " - << in->inode.atime << " for " << *in << dendl; - pi->atime = m->get_atime(); - } - le->metablob.add_dir_context(in->get_parent_dir()); - le->metablob.add_primary_dentry(in->parent, true, 0, pi); - LogSegment *ls = mds->mdlog->get_current_segment(); - in->get(CInode::PIN_FILEUPDATE); - mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls)); - } + maybe_journal_inode_update(in, (had|has) & CEPH_CAP_WR, + m->get_size(), m->get_mtime(), m->get_atime()); // reevaluate, waiters if (!in->filelock.is_stable()) diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 5619245e5cf6e..b5c25aa4e29a6 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -190,7 +190,12 @@ protected: void request_inode_file_caps(CInode *in); void handle_inode_file_caps(class MInodeFileCaps *m); + void maybe_journal_inode_update(CInode *in, bool had_or_has_wr, + int64_t size, utime_t mtime, utime_t atime); + void share_new_file_max(CInode *in); + friend class C_MDL_RequestInodeFileCaps; + friend class C_Locker_FileUpdate_finish; }; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 1121d21d2f1a1..b957f3b776b1e 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4099,7 +4099,7 @@ void Server::handle_client_openc(MDRequest *mdr) in->inode.mode = req->head.args.open.mode; in->inode.mode |= S_IFREG; in->inode.version = dn->pre_dirty() - 1; - in->inode.max_size = in->inode.layout.fl_object_size * in->inode.layout.fl_stripe_count; + in->inode.max_size = in->get_layout_size_increment(); // prepare finisher C_MDS_openc_finish *fin = new C_MDS_openc_finish(mds, mdr, dn, in); -- 2.39.5