From c8431884c5eb0d06bc9c6f1cda153289f7c43891 Mon Sep 17 00:00:00 2001 From: sageweil Date: Sat, 31 Mar 2007 15:25:12 +0000 Subject: [PATCH] * mds: more bits into MDSCacheObject * utime_t timestamp precision in inode git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1330 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/TODO | 2 + branches/sage/cephmds2/client/Client.cc | 37 +++-- branches/sage/cephmds2/client/Client.h | 8 +- branches/sage/cephmds2/common/Clock.h | 105 +------------- branches/sage/cephmds2/include/types.h | 8 +- branches/sage/cephmds2/include/utime.h | 132 ++++++++++++++++++ branches/sage/cephmds2/mds/CDentry.h | 18 +-- branches/sage/cephmds2/mds/CDir.cc | 1 - branches/sage/cephmds2/mds/CDir.h | 36 ++--- branches/sage/cephmds2/mds/CInode.cc | 7 - branches/sage/cephmds2/mds/CInode.h | 21 +-- branches/sage/cephmds2/mds/FileLock.h | 2 +- branches/sage/cephmds2/mds/MDCache.cc | 4 +- branches/sage/cephmds2/mds/Migrator.cc | 4 +- branches/sage/cephmds2/mds/Server.cc | 76 +++++----- branches/sage/cephmds2/mds/Server.h | 6 +- branches/sage/cephmds2/mds/SimpleLock.h | 2 +- branches/sage/cephmds2/mds/mdstypes.h | 37 ++++- .../sage/cephmds2/messages/MClientRequest.h | 10 +- branches/sage/cephmds2/messages/MInodeLink.h | 6 +- 20 files changed, 270 insertions(+), 252 deletions(-) create mode 100644 branches/sage/cephmds2/include/utime.h diff --git a/branches/sage/cephmds2/TODO b/branches/sage/cephmds2/TODO index 580248baa3f52..5be953a90737d 100644 --- a/branches/sage/cephmds2/TODO +++ b/branches/sage/cephmds2/TODO @@ -78,6 +78,8 @@ mds - remote rename - open(wr cap), open+create - file capabilities i/o +- filelock to control directory mtime, dentry changes + - hmm, may have to change lock ordering, and Server::rdlock_path_pin_ref() - dirfrag split/merge - client readdir for dirfrags - consistency points/snapshots diff --git a/branches/sage/cephmds2/client/Client.cc b/branches/sage/cephmds2/client/Client.cc index 8e145ce9b3457..66c123253ca24 100644 --- a/branches/sage/cephmds2/client/Client.cc +++ b/branches/sage/cephmds2/client/Client.cc @@ -386,7 +386,7 @@ void Client::update_inode_dist(Inode *in, InodeStat *st) Inode* Client::insert_trace(MClientReply *reply) { Inode *cur = root; - time_t now = time(NULL); + utime_t now = g_clock.now(); dout(10) << "insert_trace got " << reply->get_trace_in().size() << " inodes" << endl; @@ -420,8 +420,10 @@ Inode* Client::insert_trace(MClientReply *reply) update_inode_dist(cur, *pin); // set cache ttl - if (g_conf.client_cache_stat_ttl) - cur->valid_until = now + g_conf.client_cache_stat_ttl; + if (g_conf.client_cache_stat_ttl) { + cur->valid_until = now; + cur->valid_until += g_conf.client_cache_stat_ttl; + } } return cur; @@ -1061,7 +1063,7 @@ void Client::implemented_caps(MClientFileCaps *m, Inode *in) << ", acking to " << m->get_source() << endl; if (in->file_caps() == 0) { - in->file_wr_mtime = 0; + in->file_wr_mtime = utime_t(); in->file_wr_size = 0; } @@ -1095,7 +1097,7 @@ void Client::release_caps(Inode *in, } if (in->file_caps() == 0) { - in->file_wr_mtime = 0; + in->file_wr_mtime = utime_t(); in->file_wr_size = 0; } } @@ -1548,7 +1550,7 @@ int Client::_lstat(const char *path, int mask, Inode **in) Dentry *dn = lookup(fpath); inode_t inode; - time_t now = time(NULL); + utime_t now = g_clock.now(); if (dn && now <= dn->inode->valid_until && ((dn->inode->inode.mask & INODE_MASK_ALL_STAT) == INODE_MASK_ALL_STAT)) { @@ -1556,7 +1558,7 @@ int Client::_lstat(const char *path, int mask, Inode **in) dout(10) << "lstat cache hit w/ sufficient inode.mask, valid until " << dn->inode->valid_until << endl; if (g_conf.client_cache_stat_ttl == 0) - dn->inode->valid_until = 0; // only one stat allowed after each readdir + dn->inode->valid_until = utime_t(); // only one stat allowed after each readdir *in = dn->inode; } else { @@ -1786,7 +1788,10 @@ int Client::utime(const char *relpath, struct utimbuf *buf) MClientRequest *req = new MClientRequest(MDS_OP_UTIME, messenger->get_myinst()); req->set_path(path); - req->args.utime = *buf; + req->args.utime.mtime.tv_sec = buf->modtime; + req->args.utime.mtime.tv_usec = 0; + req->args.utime.atime.tv_sec = buf->actime; + req->args.utime.atime.tv_usec = 0; // FIXME where does FUSE maintain user information req->set_caller_uid(getuid()); @@ -1901,7 +1906,7 @@ int Client::getdir(const char *relpath, map& contents) // only open dir if we're actually adding stuff to it! Dir *dir = diri->open_dir(); assert(dir); - time_t now = time(NULL); + utime_t now = g_clock.now(); list::const_iterator pdn = reply->get_dir_dn().begin(); for (list::const_iterator pin = reply->get_dir_in().begin(); @@ -1917,10 +1922,14 @@ int Client::getdir(const char *relpath, map& contents) // put in cache Inode *in = this->insert_inode(dir, *pin, *pdn); - if (g_conf.client_cache_stat_ttl) - in->valid_until = now + g_conf.client_cache_stat_ttl; - else if (g_conf.client_cache_readdir_ttl) - in->valid_until = now + g_conf.client_cache_readdir_ttl; + if (g_conf.client_cache_stat_ttl) { + in->valid_until = now; + in->valid_until += g_conf.client_cache_stat_ttl; + } + else if (g_conf.client_cache_readdir_ttl) { + in->valid_until = now; + in->valid_until += g_conf.client_cache_readdir_ttl; + } // contents to caller too! contents[*pdn] = in->inode; @@ -2608,7 +2617,7 @@ int Client::write(fh_t fh, const char *buf, off_t size, off_t offset) } // mtime - in->file_wr_mtime = in->inode.mtime = g_clock.gettime(); + in->file_wr_mtime = in->inode.mtime = g_clock.now(); // ok! client_lock.Unlock(); diff --git a/branches/sage/cephmds2/client/Client.h b/branches/sage/cephmds2/client/Client.h index 7726818046100..1bcfbee9134d4 100644 --- a/branches/sage/cephmds2/client/Client.h +++ b/branches/sage/cephmds2/client/Client.h @@ -120,7 +120,7 @@ class InodeCap { class Inode { public: inode_t inode; // the actual inode - time_t valid_until; + utime_t valid_until; // about the dir (if this is one!) int dir_auth; @@ -131,7 +131,7 @@ class Inode { map caps; // mds -> InodeCap map stale_caps; // mds -> cap .. stale - time_t file_wr_mtime; // [writers] time of last write + utime_t file_wr_mtime; // [writers] time of last write off_t file_wr_size; // [writers] largest offset we've written to int num_open_rd, num_open_wr, num_open_lazy; // num readers, writers @@ -163,9 +163,9 @@ class Inode { Inode(inode_t _inode, ObjectCacher *_oc) : inode(_inode), - valid_until(0), + valid_until(0, 0), dir_auth(-1), dir_hashed(false), dir_replicated(false), - file_wr_mtime(0), file_wr_size(0), + file_wr_mtime(0, 0), file_wr_size(0), num_open_rd(0), num_open_wr(0), num_open_lazy(0), ref(0), dir(0), dn(0), symlink(0), fc(_oc, _inode), diff --git a/branches/sage/cephmds2/common/Clock.h b/branches/sage/cephmds2/common/Clock.h index 92a2b2bddf6d0..e5f647e63cccc 100644 --- a/branches/sage/cephmds2/common/Clock.h +++ b/branches/sage/cephmds2/common/Clock.h @@ -21,113 +21,10 @@ #include #include -#include #include "Mutex.h" - -// -------- -// utime_t - -class utime_t { - private: - struct timeval tv; - - struct timeval& timeval() { return tv; } - friend class Clock; - - - public: - void normalize() { - if (tv.tv_usec > 1000*1000) { - tv.tv_sec += tv.tv_usec / (1000*1000); - tv.tv_usec %= 1000*1000; - } - } - - // cons - utime_t() { tv.tv_sec = 0; tv.tv_usec = 0; normalize(); } - utime_t(time_t s, int u) { tv.tv_sec = s; tv.tv_usec = u; normalize(); } - - // accessors - time_t sec() const { return tv.tv_sec; } - long usec() const { return tv.tv_usec; } - int nsec() const { return tv.tv_usec*1000; } - - // ref accessors/modifiers - time_t& sec_ref() { return tv.tv_sec; } - // FIXME: tv.tv_usec is a __darwin_suseconds_t on Darwin. - // is just casting it to long& OK? - long& usec_ref() { return (long&) tv.tv_usec; } - - // cast to double - operator double() { - return (double)sec() + ((double)usec() / 1000000.0L); - } -}; - -// arithmetic operators -inline utime_t operator+(const utime_t& l, const utime_t& r) { - return utime_t( l.sec() + r.sec() + (l.usec()+r.usec())/1000000L, - (l.usec()+r.usec())%1000000L ); -} -inline utime_t& operator+=(utime_t& l, const utime_t& r) { - l.sec_ref() += r.sec() + (l.usec()+r.usec())/1000000L; - l.usec_ref() += r.usec(); - l.usec_ref() %= 1000000L; - return l; -} -inline utime_t& operator+=(utime_t& l, double f) { - double fs = trunc(f); - double us = (f - fs) / (double)1000000.0; - l.sec_ref() += (long)fs; - l.usec_ref() += (long)us; - l.normalize(); - return l; -} - -inline utime_t operator-(const utime_t& l, const utime_t& r) { - return utime_t( l.sec() - r.sec() - (l.usec()= r.usec()) - l.usec_ref() -= r.usec(); - else { - l.usec_ref() += 1000000L - r.usec(); - l.sec_ref()--; - } - return l; -} -inline utime_t& operator-=(utime_t& l, double f) { - l += -f; - return l; -} - -inline bool operator>(const utime_t& a, const utime_t& b) -{ - return (a.sec() > b.sec()) || (a.sec() == b.sec() && a.usec() > b.usec()); -} -inline bool operator<(const utime_t& a, const utime_t& b) -{ - return (a.sec() < b.sec()) || (a.sec() == b.sec() && a.usec() < b.usec()); -} - -// ostream -inline std::ostream& operator<<(std::ostream& out, const utime_t& t) -{ - //return out << t.sec() << "." << t.usec(); - out << (long)t.sec() << "."; - out.setf(std::ios::right); - out.fill('0'); - out << std::setw(6) << t.usec(); - out.unsetf(std::ios::right); - return out; - - //return out << (long)t.sec << "." << ios::setf(ios::right) << ios::fill('0') << t.usec() << ios::usetf(); -} - +#include "include/utime.h" diff --git a/branches/sage/cephmds2/include/types.h b/branches/sage/cephmds2/include/types.h index 98a33062f0d22..cde39b3398615 100644 --- a/branches/sage/cephmds2/include/types.h +++ b/branches/sage/cephmds2/include/types.h @@ -35,6 +35,8 @@ using namespace __gnu_cxx; #include "object.h" +#include "utime.h" + #ifndef MIN # define MIN(a,b) ((a) < (b) ? (a):(b)) @@ -238,7 +240,7 @@ struct inode_t { FileLayout layout; // ?immutable? // affected by any inode change... - time_t ctime; // inode change time + utime_t ctime; // inode change time // nlink int nlink; @@ -251,8 +253,8 @@ struct inode_t { // file (data access) off_t size; - time_t mtime; // file data modify time. - time_t atime; // file data access time. + utime_t mtime; // file data modify time. + utime_t atime; // file data access time. int mask; diff --git a/branches/sage/cephmds2/include/utime.h b/branches/sage/cephmds2/include/utime.h new file mode 100644 index 0000000000000..88083b13dbf90 --- /dev/null +++ b/branches/sage/cephmds2/include/utime.h @@ -0,0 +1,132 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef __UTIME_H +#define __UTIME_H + +#include + + +// -------- +// utime_t + +typedef struct timeval _utime_t; + +class utime_t { + private: + struct timeval tv; + + struct timeval& timeval() { return tv; } + friend class Clock; + + + public: + void normalize() { + if (tv.tv_usec > 1000*1000) { + tv.tv_sec += tv.tv_usec / (1000*1000); + tv.tv_usec %= 1000*1000; + } + } + + // cons + utime_t() { tv.tv_sec = 0; tv.tv_usec = 0; normalize(); } + //utime_t(time_t s) { tv.tv_sec = s; tv.tv_usec = 0; } + utime_t(time_t s, int u) { tv.tv_sec = s; tv.tv_usec = u; normalize(); } + utime_t(const _utime_t &v) : tv(v) {} + /* + utime_t(double d) { + tv.tv_sec = (time_t)trunc(d); + tv.tv_usec = (__suseconds_t)((d - tv.tv_sec) / (double)1000000.0); + } + */ + + // accessors + time_t sec() const { return tv.tv_sec; } + long usec() const { return tv.tv_usec; } + int nsec() const { return tv.tv_usec*1000; } + + // ref accessors/modifiers + time_t& sec_ref() { return tv.tv_sec; } + // FIXME: tv.tv_usec is a __darwin_suseconds_t on Darwin. + // is just casting it to long& OK? + long& usec_ref() { return (long&) tv.tv_usec; } + + // cast to double + operator double() { + return (double)sec() + ((double)usec() / 1000000.0L); + } +}; + +// arithmetic operators +inline utime_t operator+(const utime_t& l, const utime_t& r) { + return utime_t( l.sec() + r.sec() + (l.usec()+r.usec())/1000000L, + (l.usec()+r.usec())%1000000L ); +} +inline utime_t& operator+=(utime_t& l, const utime_t& r) { + l.sec_ref() += r.sec() + (l.usec()+r.usec())/1000000L; + l.usec_ref() += r.usec(); + l.usec_ref() %= 1000000L; + return l; +} +inline utime_t& operator+=(utime_t& l, double f) { + double fs = trunc(f); + double us = (f - fs) / (double)1000000.0; + l.sec_ref() += (long)fs; + l.usec_ref() += (long)us; + l.normalize(); + return l; +} + +inline utime_t operator-(const utime_t& l, const utime_t& r) { + return utime_t( l.sec() - r.sec() - (l.usec()= r.usec()) + l.usec_ref() -= r.usec(); + else { + l.usec_ref() += 1000000L - r.usec(); + l.sec_ref()--; + } + return l; +} +inline utime_t& operator-=(utime_t& l, double f) { + l += -f; + return l; +} + +inline bool operator>(const utime_t& a, const utime_t& b) +{ + return (a.sec() > b.sec()) || (a.sec() == b.sec() && a.usec() > b.usec()); +} +inline bool operator<(const utime_t& a, const utime_t& b) +{ + return (a.sec() < b.sec()) || (a.sec() == b.sec() && a.usec() < b.usec()); +} + +// ostream +inline std::ostream& operator<<(std::ostream& out, const utime_t& t) +{ + //return out << t.sec() << "." << t.usec(); + out << (long)t.sec() << "."; + out.setf(std::ios::right); + out.fill('0'); + out << std::setw(6) << t.usec(); + out.unsetf(std::ios::right); + return out; + + //return out << (long)t.sec << "." << ios::setf(ios::right) << ios::fill('0') << t.usec() << ios::usetf(); +} + +#endif diff --git a/branches/sage/cephmds2/mds/CDentry.h b/branches/sage/cephmds2/mds/CDentry.h index be5c96abe6cda..42253e28aca16 100644 --- a/branches/sage/cephmds2/mds/CDentry.h +++ b/branches/sage/cephmds2/mds/CDentry.h @@ -45,25 +45,16 @@ bool operator<(const CDentry& l, const CDentry& r); class CDentry : public MDSCacheObject, public LRUObject { public: // state - static const int STATE_AUTH = (1<<0); + //static const int STATE_AUTH = (1<<0); static const int STATE_DIRTY = (1<<1); // pins - static const int PIN_INODEPIN = 0; // linked inode is pinned - static const int PIN_REPLICATED = 1; // replicated by another MDS - static const int PIN_DIRTY = 2; // - static const int PIN_PROXY = 3; // - static const int PIN_XLOCK = 4; - static const int PIN_REQUEST = -1; + static const int PIN_INODEPIN = 1; // linked inode is pinned + const char *pin_name(int p) { switch (p) { - case PIN_REQUEST: return "request"; case PIN_INODEPIN: return "inodepin"; - case PIN_REPLICATED: return "replicated"; - case PIN_DIRTY: return "dirty"; - case PIN_PROXY: return "proxy"; - case PIN_XLOCK: return "xlock"; - default: assert(0); + default: return generic_pin_name(p); } }; @@ -171,7 +162,6 @@ public: pair authority(); - bool is_auth() { return state & STATE_AUTH; } bool is_dirty() { return state & STATE_DIRTY; } bool is_clean() { return !is_dirty(); } diff --git a/branches/sage/cephmds2/mds/CDir.cc b/branches/sage/cephmds2/mds/CDir.cc index ecbefd8acee44..31b9c4149eb55 100644 --- a/branches/sage/cephmds2/mds/CDir.cc +++ b/branches/sage/cephmds2/mds/CDir.cc @@ -67,7 +67,6 @@ ostream& operator<<(ostream& out, CDir& dir) out << " ap=" << dir.get_auth_pins() << "+" << dir.get_nested_auth_pins(); out << " state=" << dir.get_state(); - if (dir.state_test(CDir::STATE_PROXY)) out << "|proxy"; if (dir.state_test(CDir::STATE_COMPLETE)) out << "|complete"; if (dir.state_test(CDir::STATE_FREEZINGTREE)) out << "|freezingtree"; if (dir.state_test(CDir::STATE_FROZENTREE)) out << "|frozentree"; diff --git a/branches/sage/cephmds2/mds/CDir.h b/branches/sage/cephmds2/mds/CDir.h index 2f3f2096cee24..c55acb4b527cc 100644 --- a/branches/sage/cephmds2/mds/CDir.h +++ b/branches/sage/cephmds2/mds/CDir.h @@ -67,49 +67,34 @@ typedef map CDir_map_t; class CDir : public MDSCacheObject { public: // -- pins -- - static const int PIN_CHILD = 0; - static const int PIN_OPENED = 1; // open by another node - static const int PIN_WAITER = 2; // waiter(s) - //static const int PIN_IMPORT = 3; + static const int PIN_CHILD = 2; + static const int PIN_WAITER = 3; // waiter(s) static const int PIN_EXPORT = 4; - //static const int PIN_FREEZE = 5; - // static const int PIN_FREEZELEAF = 6; - static const int PIN_PROXY = 7; // auth just changed. static const int PIN_AUTHPIN = 8; static const int PIN_IMPORTING = 9; static const int PIN_EXPORTING = 10; static const int PIN_IMPORTBOUND = 11; static const int PIN_EXPORTBOUND = 12; - static const int PIN_DIRTY = 15; - static const int PIN_REQUEST = 16; static const int PIN_LOGGINGEXPORTFINISH = 17; const char *pin_name(int p) { switch (p) { case PIN_CHILD: return "child"; - case PIN_OPENED: return "opened"; case PIN_WAITER: return "waiter"; - //case PIN_IMPORT: return "import"; case PIN_EXPORT: return "export"; case PIN_EXPORTING: return "exporting"; case PIN_IMPORTING: return "importing"; case PIN_IMPORTBOUND: return "importbound"; case PIN_EXPORTBOUND: return "exportbound"; - //case PIN_FREEZE: return "freeze"; - // case PIN_FREEZELEAF: return "freezeleaf"; - case PIN_PROXY: return "proxy"; case PIN_AUTHPIN: return "authpin"; - case PIN_DIRTY: return "dirty"; - case PIN_REQUEST: return "request"; case PIN_LOGGINGEXPORTFINISH: return "loggingexportfinish"; - default: assert(0); + default: return generic_pin_name(p); } } // -- state -- - static const unsigned STATE_AUTH = (1<< 0); // auth for this dir (hashing doesn't count) - static const unsigned STATE_PROXY = (1<< 1); // proxy auth + //static const unsigned STATE_AUTH = (1<< 0); // auth for this dir (hashing doesn't count) + static const unsigned STATE_DIRTY = (1<< 1); // has been modified since last commit static const unsigned STATE_COMPLETE = (1<< 2); // the complete contents are in cache - static const unsigned STATE_DIRTY = (1<< 3); // has been modified since last commit static const unsigned STATE_FROZENTREE = (1<< 4); // root of tree (bounded by exports) static const unsigned STATE_FREEZINGTREE = (1<< 5); // in process of freezing static const unsigned STATE_FROZENDIR = (1<< 6); @@ -137,14 +122,14 @@ class CDir : public MDSCacheObject { STATE_EXPORT |STATE_IMPORTING |STATE_IMPORTBOUND|STATE_EXPORTBOUND - |STATE_FROZENTREE|STATE_PROXY; + |STATE_FROZENTREE; static const unsigned MASK_STATE_EXPORT_KEPT = STATE_EXPORTING |STATE_IMPORTBOUND|STATE_EXPORTBOUND |STATE_FROZENTREE |STATE_FROZENDIR - |STATE_EXPORT - |STATE_PROXY; + |STATE_EXPORT; + // -- rep spec -- static const int REP_NONE = 0; @@ -327,9 +312,6 @@ class CDir : public MDSCacheObject { bool is_clean() { return !state_test(STATE_DIRTY); } bool is_auth() { return state & STATE_AUTH; } - bool is_proxy() { return state & STATE_PROXY; } - //bool is_import() { return state & STATE_IMPORT; } - //bool is_export() { return state & STATE_EXPORT; } bool is_exporting() { return state & STATE_EXPORTING; } bool is_importing() { return state & STATE_IMPORTING; } @@ -591,7 +573,7 @@ class CDirExport { dir->replicas = replicas; dout(12) << "replicas in export is " << replicas << ", dir now " << dir->replicas << endl; if (!replicas.empty()) - dir->get(CDir::PIN_OPENED); + dir->get(CDir::PIN_REPLICATED); if (dir->is_dirty()) { dir->get(CDir::PIN_DIRTY); } diff --git a/branches/sage/cephmds2/mds/CInode.cc b/branches/sage/cephmds2/mds/CInode.cc index 9cef818b1a90c..e89736026bcbb 100644 --- a/branches/sage/cephmds2/mds/CInode.cc +++ b/branches/sage/cephmds2/mds/CInode.cc @@ -247,13 +247,6 @@ void CInode::close_dirfrags() } -void CInode::set_auth(bool a) -{ - if (a) state_set(STATE_AUTH); - else state_clear(STATE_AUTH); -} - - void CInode::make_path(string& s) { diff --git a/branches/sage/cephmds2/mds/CInode.h b/branches/sage/cephmds2/mds/CInode.h index 4f6a9cfbaa7c3..d0460bfe05030 100644 --- a/branches/sage/cephmds2/mds/CInode.h +++ b/branches/sage/cephmds2/mds/CInode.h @@ -52,47 +52,40 @@ ostream& operator<<(ostream& out, CInode& in); class CInode : public MDSCacheObject { public: // -- pins -- - static const int PIN_CACHED = 1; + //static const int PIN_REPLICATED = 1; static const int PIN_DIR = 2; - static const int PIN_DIRTY = 4; // must flush static const int PIN_PROXY = 5; // can't expire yet static const int PIN_WAITER = 6; // waiter static const int PIN_CAPS = 7; // local fh's static const int PIN_AUTHPIN = 8; static const int PIN_IMPORTING = -9; // importing - static const int PIN_REQUEST = -10; // request is logging, finishing static const int PIN_RENAMESRC = 11; // pinned on dest for foreign rename static const int PIN_ANCHORING = 12; static const int PIN_UNANCHORING = 13; static const int PIN_OPENINGDIR = 14; static const int PIN_REMOTEPARENT = 15; - static const int PIN_DENTRYLOCK = 16; const char *pin_name(int p) { switch (p) { - case PIN_CACHED: return "cached"; case PIN_DIR: return "dir"; - case PIN_DIRTY: return "dirty"; case PIN_PROXY: return "proxy"; case PIN_WAITER: return "waiter"; case PIN_CAPS: return "caps"; case PIN_AUTHPIN: return "authpin"; case PIN_IMPORTING: return "importing"; - case PIN_REQUEST: return "request"; case PIN_RENAMESRC: return "renamesrc"; case PIN_ANCHORING: return "anchoring"; case PIN_UNANCHORING: return "unanchoring"; case PIN_OPENINGDIR: return "openingdir"; case PIN_REMOTEPARENT: return "remoteparent"; - case PIN_DENTRYLOCK: return "dentrylock"; - default: assert(0); + default: return generic_pin_name(p); } } // -- state -- - static const int STATE_AUTH = (1<<0); - static const int STATE_ROOT = (1<<1); - static const int STATE_DIRTY = (1<<2); + //static const int STATE_AUTH = (1<<0); + static const int STATE_DIRTY = (1<<1); + static const int STATE_ROOT = (1<<2); //static const int STATE_UNSAFE = (1<<3); // not logged yet //static const int STATE_DANGLING = (1<<4); // delete me when i expire; i have no dentry static const int STATE_EXPORTING = (1<<6); // on nonauth bystander. @@ -205,8 +198,6 @@ protected: bool is_root() { return state & STATE_ROOT; } bool is_stray() { return MDS_INO_IS_STRAY(inode.ino); } - bool is_auth() { return state & STATE_AUTH; } - void set_auth(bool auth); inodeno_t ino() const { return inode.ino; } inode_t& get_inode() { return inode; } @@ -615,7 +606,7 @@ public: in->replicas = replicas; if (!replicas.empty()) - in->get(CInode::PIN_CACHED); + in->get(CInode::PIN_REPLICATED); int off = 0; in->hardlock._decode(hardlock, off); diff --git a/branches/sage/cephmds2/mds/FileLock.h b/branches/sage/cephmds2/mds/FileLock.h index 1073789a6f195..cf396e47522f6 100644 --- a/branches/sage/cephmds2/mds/FileLock.h +++ b/branches/sage/cephmds2/mds/FileLock.h @@ -208,7 +208,7 @@ class FileLock : public SimpleLock { inline ostream& operator<<(ostream& out, FileLock& l) { out << "(" << get_lock_type_name(l.get_type()) - << get_filelock_state_name(l.get_state()); + << " " << get_filelock_state_name(l.get_state()); if (!l.get_gather_set().empty()) out << " g=" << l.get_gather_set(); if (l.get_num_rdlock()) out << " r=" << l.get_num_rdlock(); diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index 249a627c1cf60..57008fa99c605 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -199,7 +199,7 @@ CInode *MDCache::create_root_inode() root->inode.mode = 0755 | INODE_MODE_DIR; root->inode.size = 0; root->inode.ctime = - root->inode.mtime = g_clock.gettime(); + root->inode.mtime = g_clock.now(); root->inode.nlink = 1; root->inode.layout = g_OSD_MDDirLayout; @@ -263,7 +263,7 @@ CInode *MDCache::create_stray_inode(int whose) stray->inode.mode = 0755 | INODE_MODE_DIR; stray->inode.size = 0; stray->inode.ctime = - stray->inode.mtime = g_clock.gettime(); + stray->inode.mtime = g_clock.now(); stray->inode.nlink = 1; stray->inode.layout = g_OSD_MDDirLayout; diff --git a/branches/sage/cephmds2/mds/Migrator.cc b/branches/sage/cephmds2/mds/Migrator.cc index 46e64cdefadbc..735d2798c8281 100644 --- a/branches/sage/cephmds2/mds/Migrator.cc +++ b/branches/sage/cephmds2/mds/Migrator.cc @@ -791,7 +791,7 @@ void Migrator::encode_export_inode(CInode *in, bufferlist& enc_state, int new_au // mark auth assert(in->is_auth()); - in->set_auth(false); + in->state_clear(CInode::STATE_AUTH); in->replica_nonce = CInode::EXPORT_NONCE; // *** other state too? @@ -1760,7 +1760,7 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist& bl, int& off, int ol in = new CInode(mds->mdcache); added = true; } else { - in->set_auth(true); + in->state_set(CInode::STATE_AUTH); } // state after link -- or not! -sage diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index 3c07af279766b..5377d7c7a858c 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -418,7 +418,7 @@ CInode* Server::prepare_new_inode(MClientRequest *req, CDir *dir) CInode *in = mdcache->create_inode(); in->inode.uid = req->get_caller_uid(); in->inode.gid = req->get_caller_gid(); - in->inode.ctime = in->inode.mtime = in->inode.atime = g_clock.gettime(); // now + in->inode.ctime = in->inode.mtime = in->inode.atime = g_clock.now(); // now dout(10) << "prepare_new_inode " << *in << endl; // bump modify pop @@ -501,10 +501,10 @@ CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, bool want_auth) // open ref inode CInode *ref = 0; - if (mdr->trace.empty()) + if (trace.empty()) ref = mdcache->get_root(); else { - CDentry *dn = mdr->trace[mdr->trace.size()-1]; + CDentry *dn = trace[trace.size()-1]; // if no inode, fw to dentry auth? if (want_auth && @@ -526,6 +526,7 @@ CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, bool want_auth) ref = mdcache->get_dentry_inode(dn, mdr); if (!ref) return 0; } + dout(10) << "ref is " << *ref << endl; // fw to inode auth? if (want_auth && !ref->is_auth()) { @@ -628,11 +629,8 @@ CDentry* Server::rdlock_path_xlock_dentry(MDRequest *mdr, bool okexist, bool mus set rdlocks; set xlocks; - for (unsigned i=0; ilock); - } - dout(10) << "will rd or x lock " << *dn << endl; if (dn->is_null()) xlocks.insert(&dn->lock); // new dn, xlock else @@ -651,8 +649,6 @@ CDentry* Server::rdlock_path_xlock_dentry(MDRequest *mdr, bool okexist, bool mus -// FIXME: this probably should go somewhere else. - CDir* Server::try_open_auth_dir(CInode *diri, frag_t fg, MDRequest *mdr) { CDir *dir = diri->get_dirfrag(fg); @@ -772,9 +768,9 @@ class C_MDS_utime_finish : public Context { MDRequest *mdr; CInode *in; version_t pv; - time_t mtime, atime; + utime_t mtime, atime; public: - C_MDS_utime_finish(MDS *m, MDRequest *r, CInode *i, version_t pdv, time_t mt, time_t at) : + C_MDS_utime_finish(MDS *m, MDRequest *r, CInode *i, version_t pdv, utime_t mt, utime_t at) : mds(m), mdr(r), in(i), pv(pdv), mtime(mt), atime(at) { } @@ -810,8 +806,8 @@ void Server::handle_client_utime(MDRequest *mdr) // prepare version_t pdv = cur->pre_dirty(); - time_t mtime = req->args.utime.modtime; - time_t atime = req->args.utime.actime; + utime_t mtime = req->args.utime.mtime; + utime_t atime = req->args.utime.atime; C_MDS_utime_finish *fin = new C_MDS_utime_finish(mds, mdr, cur, pdv, mtime, atime); @@ -822,7 +818,7 @@ void Server::handle_client_utime(MDRequest *mdr) inode_t *pi = le->metablob.add_dentry(cur->parent, true); pi->mtime = mtime; pi->atime = mtime; - pi->ctime = g_clock.gettime(); + pi->ctime = g_clock.now(); pi->version = pdv; mdlog->submit_entry(le); @@ -887,7 +883,7 @@ void Server::handle_client_chmod(MDRequest *mdr) inode_t *pi = le->metablob.add_dentry(cur->parent, true); pi->mode = mode; pi->version = pdv; - pi->ctime = g_clock.gettime(); + pi->ctime = g_clock.now(); mdlog->submit_entry(le); mdlog->wait_for_sync(fin); @@ -948,7 +944,7 @@ void Server::handle_client_chown(MDRequest *mdr) if (uid >= 0) pi->uid = uid; if (gid >= 0) pi->gid = gid; pi->version = pdv; - pi->ctime = g_clock.gettime(); + pi->ctime = g_clock.now(); mdlog->submit_entry(le); mdlog->wait_for_sync(fin); @@ -1316,10 +1312,10 @@ class C_MDS_link_local_finish : public Context { CDentry *dn; CInode *targeti; version_t dpv; - time_t tctime; - time_t tpv; + utime_t tctime; + version_t tpv; public: - C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, time_t ct) : + C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, utime_t ct) : mds(m), mdr(r), dn(d), targeti(ti), dpv(d->get_projected_version()), tctime(ct), @@ -1352,7 +1348,7 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti) // update journaled target inode pi->nlink++; - pi->ctime = g_clock.gettime(); + pi->ctime = g_clock.now(); pi->version = tpdv; // finisher @@ -1364,7 +1360,7 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti) } void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti, - version_t dpv, time_t tctime, version_t tpv) + version_t dpv, utime_t tctime, version_t tpv) { dout(10) << "_link_local_finish " << *dn << " to " << *targeti << endl; @@ -1562,11 +1558,11 @@ class C_MDS_unlink_local_finish : public Context { CDentry *dn; CDentry *straydn; version_t ipv; // referred inode - time_t ictime; + utime_t ictime; version_t dpv; // deleted dentry public: C_MDS_unlink_local_finish(MDS *m, MDRequest *r, CDentry *d, CDentry *sd, - version_t v, time_t ct) : + version_t v, utime_t ct) : mds(m), mdr(r), dn(d), straydn(sd), ipv(v), ictime(ct), dpv(d->get_projected_version()) { } @@ -1621,7 +1617,7 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn) // update journaled target inode pi->nlink--; - pi->ctime = g_clock.gettime(); + pi->ctime = g_clock.now(); pi->version = ipv; // finisher @@ -1637,7 +1633,7 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn) void Server::_unlink_local_finish(MDRequest *mdr, CDentry *dn, CDentry *straydn, - version_t ipv, time_t ictime, version_t dpv) + version_t ipv, utime_t ictime, version_t dpv) { dout(10) << "_unlink_local " << *dn << endl; @@ -1968,13 +1964,13 @@ class C_MDS_rename_local_finish : public Context { version_t straypv; version_t destpv; version_t srcpv; - time_t ictime; + utime_t ictime; public: version_t atid1; version_t atid2; C_MDS_rename_local_finish(MDS *m, MDRequest *r, CDentry *sdn, CDentry *ddn, CDentry *stdn, - version_t v, time_t ct) : + version_t v, utime_t ct) : mds(m), mdr(r), srcdn(sdn), destdn(ddn), straydn(stdn), ipv(v), @@ -2121,13 +2117,13 @@ void Server::_rename_local(MDRequest *mdr, if (pi) { // update journaled target inode pi->nlink--; - pi->ctime = g_clock.gettime(); + pi->ctime = g_clock.now(); pi->version = ipv; } C_MDS_rename_local_finish *fin = new C_MDS_rename_local_finish(mds, mdr, srcdn, destdn, straydn, - ipv, pi ? pi->ctime:0); + ipv, pi ? pi->ctime:utime_t()); if (anchorfin) { // doing anchor update prepare first @@ -2159,7 +2155,7 @@ void Server::_rename_local_reanchored(LogEvent *le, C_MDS_rename_local_finish *f void Server::_rename_local_finish(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn, version_t srcpv, version_t destpv, version_t straypv, version_t ipv, - time_t ictime, + utime_t ictime, version_t atid1, version_t atid2) { MClientRequest *req = mdr->client_request(); @@ -2427,9 +2423,9 @@ class C_MDS_truncate_purged : public Context { CInode *in; version_t pv; off_t size; - time_t ctime; + utime_t ctime; public: - C_MDS_truncate_purged(MDS *m, MDRequest *r, CInode *i, version_t pdv, off_t sz, time_t ct) : + C_MDS_truncate_purged(MDS *m, MDRequest *r, CInode *i, version_t pdv, off_t sz, utime_t ct) : mds(m), mdr(r), in(i), pv(pdv), size(sz), ctime(ct) { } @@ -2456,9 +2452,9 @@ class C_MDS_truncate_logged : public Context { CInode *in; version_t pv; off_t size; - time_t ctime; + utime_t ctime; public: - C_MDS_truncate_logged(MDS *m, MDRequest *r, CInode *i, version_t pdv, off_t sz, time_t ct) : + C_MDS_truncate_logged(MDS *m, MDRequest *r, CInode *i, version_t pdv, off_t sz, utime_t ct) : mds(m), mdr(r), in(i), pv(pdv), size(sz), ctime(ct) { } @@ -2492,7 +2488,7 @@ void Server::handle_client_truncate(MDRequest *mdr) // prepare version_t pdv = cur->pre_dirty(); - time_t ctime = g_clock.gettime(); + utime_t ctime = g_clock.now(); Context *fin = new C_MDS_truncate_logged(mds, mdr, cur, pdv, req->args.truncate.length, ctime); @@ -2590,9 +2586,9 @@ class C_MDS_open_truncate_purged : public Context { MDRequest *mdr; CInode *in; version_t pv; - time_t ctime; + utime_t ctime; public: - C_MDS_open_truncate_purged(MDS *m, MDRequest *r, CInode *i, version_t pdv, time_t ct) : + C_MDS_open_truncate_purged(MDS *m, MDRequest *r, CInode *i, version_t pdv, utime_t ct) : mds(m), mdr(r), in(i), pv(pdv), ctime(ct) { } @@ -2618,9 +2614,9 @@ class C_MDS_open_truncate_logged : public Context { MDRequest *mdr; CInode *in; version_t pv; - time_t ctime; + utime_t ctime; public: - C_MDS_open_truncate_logged(MDS *m, MDRequest *r, CInode *i, version_t pdv, time_t ct) : + C_MDS_open_truncate_logged(MDS *m, MDRequest *r, CInode *i, version_t pdv, utime_t ct) : mds(m), mdr(r), in(i), pv(pdv), ctime(ct) { } @@ -2642,7 +2638,7 @@ void Server::handle_client_opent(MDRequest *mdr) // prepare version_t pdv = cur->pre_dirty(); - time_t ctime = g_clock.gettime(); + utime_t ctime = g_clock.now(); Context *fin = new C_MDS_open_truncate_logged(mds, mdr, cur, pdv, ctime); diff --git a/branches/sage/cephmds2/mds/Server.h b/branches/sage/cephmds2/mds/Server.h index 0bc1deb9b96b0..22992ad84904b 100644 --- a/branches/sage/cephmds2/mds/Server.h +++ b/branches/sage/cephmds2/mds/Server.h @@ -83,7 +83,7 @@ public: void _link_local(MDRequest *mdr, CDentry *dn, CInode *targeti); void _link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti, - version_t, time_t, version_t); + version_t, utime_t, version_t); void _link_remote(MDRequest *mdr, CDentry *dn, CInode *targeti); // unlink @@ -92,7 +92,7 @@ public: void _unlink_local(MDRequest *mdr, CDentry *dn); void _unlink_local_finish(MDRequest *mdr, CDentry *dn, CDentry *straydn, - version_t, time_t, version_t); + version_t, utime_t, version_t); void _unlink_remote(MDRequest *mdr, CDentry *dn); // rename @@ -109,7 +109,7 @@ public: void _rename_local_finish(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn, version_t srcpv, version_t destpv, version_t straypv, version_t ipv, - time_t ictime, + utime_t ictime, version_t atid1, version_t atid2); diff --git a/branches/sage/cephmds2/mds/SimpleLock.h b/branches/sage/cephmds2/mds/SimpleLock.h index 8787bd5167f43..63fc19599560e 100644 --- a/branches/sage/cephmds2/mds/SimpleLock.h +++ b/branches/sage/cephmds2/mds/SimpleLock.h @@ -216,7 +216,7 @@ public: inline ostream& operator<<(ostream& out, SimpleLock& l) { out << "(" << get_lock_type_name(l.get_type()) - << get_simplelock_state_name(l.get_state()); + << " " << get_simplelock_state_name(l.get_state()); if (!l.get_gather_set().empty()) out << " g=" << l.get_gather_set(); if (l.get_num_rdlock()) out << " r=" << l.get_num_rdlock(); diff --git a/branches/sage/cephmds2/mds/mdstypes.h b/branches/sage/cephmds2/mds/mdstypes.h index 1d228c959a9f3..5985ca768380c 100644 --- a/branches/sage/cephmds2/mds/mdstypes.h +++ b/branches/sage/cephmds2/mds/mdstypes.h @@ -236,14 +236,37 @@ inline mds_load_t operator/( mds_load_t& a, double d ) // ================================================================ -#define MDS_PIN_REPLICATED 1 -#define MDS_STATE_AUTH (1<<0) +//#define MDS_PIN_REPLICATED 1 +//#define MDS_STATE_AUTH (1<<0) class MLock; class Context; class SimpleLock; class MDSCacheObject { + public: + // -- pins -- + const static int PIN_REPLICATED = 1000; + static const int PIN_DIRTY = 1001; + const static int PIN_RDLOCK = -1002; + const static int PIN_XLOCK = 1003; + static const int PIN_REQUEST = -1004; + + const char *generic_pin_name(int p) { + switch (p) { + case PIN_REPLICATED: return "replicated"; + case PIN_DIRTY: return "dirty"; + case PIN_RDLOCK: return "rdlock"; + case PIN_XLOCK: return "xlock"; + case PIN_REQUEST: return "request"; + default: assert(0); + } + } + + // -- state -- + const static int STATE_AUTH = (1<<0); + static const int STATE_DIRTY = (1<<1); + protected: unsigned state; // state bits @@ -268,7 +291,7 @@ class MDSCacheObject { unsigned state_test(unsigned mask) { return state & mask; } void state_reset(unsigned s) { state = s; } - bool is_auth() { return state & MDS_STATE_AUTH; } + bool is_auth() { return state_test(STATE_AUTH); } // -------------------------------------------- // pins @@ -336,12 +359,12 @@ class MDSCacheObject { if (replicas.count(mds)) return ++replicas[mds]; // inc nonce if (replicas.empty()) - get(MDS_PIN_REPLICATED); + get(PIN_REPLICATED); return replicas[mds] = 1; } void add_replica(int mds, int nonce) { if (replicas.empty()) - get(MDS_PIN_REPLICATED); + get(PIN_REPLICATED); replicas[mds] = nonce; } int get_replica_nonce(int mds) { @@ -352,11 +375,11 @@ class MDSCacheObject { assert(replicas.count(mds)); replicas.erase(mds); if (replicas.empty()) - put(MDS_PIN_REPLICATED); + put(PIN_REPLICATED); } void clear_replicas() { if (!replicas.empty()) - put(MDS_PIN_REPLICATED); + put(PIN_REPLICATED); replicas.clear(); } map::iterator replicas_begin() { return replicas.begin(); } diff --git a/branches/sage/cephmds2/messages/MClientRequest.h b/branches/sage/cephmds2/messages/MClientRequest.h index bd70fd50a4d55..72ea8fbcee252 100644 --- a/branches/sage/cephmds2/messages/MClientRequest.h +++ b/branches/sage/cephmds2/messages/MClientRequest.h @@ -51,8 +51,8 @@ #define MDS_OP_LSTAT 101 #define MDS_OP_FSTAT 102 #define MDS_OP_UTIME 1102 -#define MDS_OP_CHMOD 1103 -#define MDS_OP_CHOWN 1104 +#define MDS_OP_CHMOD 1104 +#define MDS_OP_CHOWN 1105 #define MDS_OP_READDIR 200 #define MDS_OP_MKNOD 1201 @@ -103,8 +103,10 @@ class MClientRequest : public Message { struct { _frag_t frag; } readdir; - struct utimbuf utime; - struct timeval utimes; + struct { + _utime_t mtime; + _utime_t atime; + } utime; struct { mode_t mode; } chmod; diff --git a/branches/sage/cephmds2/messages/MInodeLink.h b/branches/sage/cephmds2/messages/MInodeLink.h index 1d03cdf7fc82b..3ca0ad6df5438 100644 --- a/branches/sage/cephmds2/messages/MInodeLink.h +++ b/branches/sage/cephmds2/messages/MInodeLink.h @@ -41,7 +41,7 @@ private: int op; // see above bool inc; // true == ++, false == -- - time_t ctime; + utime_t ctime; } st; public: @@ -50,8 +50,8 @@ public: int get_op() { return st.op; } bool get_inc() { return st.inc; } - time_t get_ctime() { return st.ctime; } - void set_ctime(time_t ct) { st.ctime = ct; } + utime_t get_ctime() { return st.ctime; } + void set_ctime(utime_t ct) { st.ctime = ct; } MInodeLink() {} MInodeLink(int op, inodeno_t ino, bool inc, metareqid_t ri) : -- 2.39.5