From: Sage Weil Date: Thu, 12 Jun 2008 04:00:11 +0000 (-0700) Subject: mds: move inode_t et al to mdstypes.h; remove inode_t from osdc/* X-Git-Tag: v0.3~136 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2615072ee8a9a2cf4473780ad1133c1c1c52fda5;p=ceph.git mds: move inode_t et al to mdstypes.h; remove inode_t from osdc/* --- diff --git a/src/client/Client.cc b/src/client/Client.cc index b4175e75266..83bf5b16bd7 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -3138,7 +3138,7 @@ int Client::_read(Fh *f, __s64 offset, __u64 size, bufferlist *bl) // object cache OFF -- non-atomic sync read from osd // do sync read - Objecter::OSDRead *rd = filer->prepare_read(in->inode, offset, size, bl, 0); + Objecter::OSDRead *rd = filer->prepare_read(in->inode.ino, &in->inode.layout, offset, size, bl, 0); if (in->hack_balance_reads || g_conf.client_hack_balance_reads) rd->flags |= CEPH_OSD_OP_BALANCE_READS; r = objecter->readx(rd, onfinish); @@ -3296,7 +3296,7 @@ int Client::_write(Fh *f, __s64 offset, __u64 size, const char *buf) unsafe_sync_write++; in->get_cap_ref(CEPH_CAP_WRBUFFER); - filer->write(in->inode, offset, size, bl, 0, onfinish, onsafe); + filer->write(in->inode.ino, &in->inode.layout, offset, size, bl, 0, onfinish, onsafe); while (!done) cond.Wait(client_lock); diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc index b1c7343a73d..5d08af20b4f 100644 --- a/src/client/SyntheticClient.cc +++ b/src/client/SyntheticClient.cc @@ -3147,7 +3147,7 @@ int SyntheticClient::chunk_file(string &filename) lock.Lock(); Context *onfinish = new C_SafeCond(&lock, &cond, &done); - filer->read(inode, pos, get, &bl, 0, onfinish); + filer->read(inode.ino, &inode.layout, pos, get, &bl, 0, onfinish); while (!done) cond.Wait(lock); lock.Unlock(); diff --git a/src/dumpjournal.cc b/src/dumpjournal.cc index c9c00bf972e..86e7358fa2c 100644 --- a/src/dumpjournal.cc +++ b/src/dumpjournal.cc @@ -95,7 +95,7 @@ int main(int argc, const char **argv, const char *envp[]) log_inode.layout = g_default_mds_log_layout; objecter = new Objecter(messenger, &monmap, &osdmap, lock); - journaler = new Journaler(log_inode, objecter, 0, &lock); + journaler = new Journaler(log_inode.ino, &log_inode.layout, objecter, 0, &lock); objecter->set_client_incarnation(0); @@ -113,7 +113,7 @@ int main(int argc, const char **argv, const char *envp[]) Filer filer(objecter); bufferlist bl; - filer.read(log_inode, start, len, &bl, 0, new C_SafeCond(&lock, &cond, &done)); + filer.read(log_inode.ino, &log_inode.layout, start, len, &bl, 0, new C_SafeCond(&lock, &cond, &done)); lock.Lock(); while (!done) cond.Wait(lock); diff --git a/src/ebofs/BlockDevice.cc b/src/ebofs/BlockDevice.cc index d0a296d7168..ee02cf2ca12 100644 --- a/src/ebofs/BlockDevice.cc +++ b/src/ebofs/BlockDevice.cc @@ -18,7 +18,7 @@ #include #include -#include +//#include #include #include #include diff --git a/src/include/types.h b/src/include/types.h index 2186085872f..34b011a0a2e 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -140,7 +140,7 @@ typedef __u64 coll_t; // -------------------------------------- -// inode +// ino typedef __u64 _inodeno_t; @@ -171,6 +171,8 @@ namespace __gnu_cxx { } +// file modes + static inline bool file_mode_is_readonly(int mode) { return (mode & CEPH_FILE_MODE_WR) == 0; } @@ -178,212 +180,11 @@ static inline bool file_mode_is_readonly(int mode) { inline int DT_TO_MODE(int dt) { return dt << 12; } + inline unsigned char MODE_TO_DT(int mode) { return mode >> 12; } -struct FileLayout { - /* file -> object mapping */ - __u32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple of page size. */ - __u32 fl_stripe_count; /* over this many objects */ - __u32 fl_object_size; /* until objects are this big, then move to new objects */ - __u32 fl_cas_hash; /* 0 = none; 1 = sha256 */ - - /* pg -> disk layout */ - __u32 fl_object_stripe_unit; /* for per-object parity, if any */ - - /* object -> pg layout */ - __s32 fl_pg_preferred; /* preferred primary for pg, if any (-1 = none) */ - __u8 fl_pg_type; /* pg type; see PG_TYPE_* */ - __u8 fl_pg_size; /* pg size (num replicas, raid stripe width, etc. */ - __u8 fl_pg_pool; /* implies crush ruleset AND object namespace */ -}; - - -struct frag_info_t { - version_t version; - - // this frag - utime_t mtime; - __u64 nfiles; // files - __u64 nsubdirs; // subdirs - __u64 size() const { return nfiles + nsubdirs; } - - // this frag + children - utime_t rctime; - __u64 rbytes; - __u64 rfiles; - __u64 rsubdirs; - __u64 rsize() const { return rfiles + rsubdirs; } - __u64 ranchors; // for dirstat, includes inode's anchored flag. - - void take_diff(const frag_info_t &cur, frag_info_t &acc) { - if (cur.mtime > mtime) - rctime = mtime = cur.mtime; - nfiles += cur.nfiles - acc.nfiles; - nsubdirs += cur.nsubdirs - acc.nsubdirs; - - if (cur.rctime > rctime) - rctime = cur.rctime; - rbytes += cur.rbytes - acc.rbytes; - rfiles += cur.rfiles - acc.rfiles; - rsubdirs += cur.rsubdirs - acc.rsubdirs; - ranchors += cur.ranchors - acc.ranchors; - acc = cur; - acc.version = version; - } - - void encode(bufferlist &bl) const { - ::encode(version, bl); - ::encode(mtime, bl); - ::encode(nfiles, bl); - ::encode(nsubdirs, bl); - ::encode(rbytes, bl); - ::encode(rfiles, bl); - ::encode(rsubdirs, bl); - ::encode(ranchors, bl); - ::encode(rctime, bl); - } - void decode(bufferlist::iterator &bl) { - ::decode(version, bl); - ::decode(mtime, bl); - ::decode(nfiles, bl); - ::decode(nsubdirs, bl); - ::decode(rbytes, bl); - ::decode(rfiles, bl); - ::decode(rsubdirs, bl); - ::decode(ranchors, bl); - ::decode(rctime, bl); - } -}; -WRITE_CLASS_ENCODER(frag_info_t) - -inline bool operator==(const frag_info_t &l, const frag_info_t &r) { - return memcmp(&l, &r, sizeof(l)) == 0; -} - -inline ostream& operator<<(ostream &out, const frag_info_t &f) { - return out << "f(v" << f.version - << " m" << f.mtime - << " " << f.size() << "=" << f.nfiles << "+" << f.nsubdirs - << " rc" << f.rctime - << " b" << f.rbytes - << " a" << f.ranchors - << " " << f.rsize() << "=" << f.rfiles << "+" << f.rsubdirs - << ")"; -} - -struct inode_t { - // base (immutable) - inodeno_t ino; - ceph_file_layout layout; // ?immutable? - uint32_t rdev; // if special file - - // affected by any inode change... - utime_t ctime; // inode change time - - // perm (namespace permissions) - uint32_t mode; - uid_t uid; - gid_t gid; - - // nlink - int32_t nlink; - bool anchored; // auth only? - - // file (data access) - uint64_t size; // on directory, # dentries - uint64_t max_size; // client(s) are auth to write this much... - utime_t mtime; // file data modify time. - utime_t atime; // file data access time. - uint64_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes()) - - // dirfrag, recursive accounting - frag_info_t dirstat; - frag_info_t accounted_dirstat; // what dirfrag has seen - - // special stuff - version_t version; // auth only - version_t file_data_version; // auth only - - // file type - bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } - bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } - bool is_file() const { return (mode & S_IFMT) == S_IFREG; } - - void encode(bufferlist &bl) const { - ::encode(ino, bl); - ::encode(layout, bl); - ::encode(rdev, bl); - ::encode(ctime, bl); - - ::encode(mode, bl); - ::encode(uid, bl); - ::encode(gid, bl); - - ::encode(nlink, bl); - ::encode(anchored, bl); - - ::encode(size, bl); - ::encode(max_size, bl); - ::encode(mtime, bl); - ::encode(atime, bl); - ::encode(time_warp_seq, bl); - - ::encode(dirstat, bl); - ::encode(accounted_dirstat, bl); - - ::encode(version, bl); - ::encode(file_data_version, bl); - } - void decode(bufferlist::iterator &p) { - ::decode(ino, p); - ::decode(layout, p); - ::decode(rdev, p); - ::decode(ctime, p); - - ::decode(mode, p); - ::decode(uid, p); - ::decode(gid, p); - - ::decode(nlink, p); - ::decode(anchored, p); - - ::decode(size, p); - ::decode(max_size, p); - ::decode(mtime, p); - ::decode(atime, p); - ::decode(time_warp_seq, p); - - ::decode(dirstat, p); - ::decode(accounted_dirstat, p); - - ::decode(version, p); - ::decode(file_data_version, p); - } -}; -WRITE_CLASS_ENCODER(inode_t) - -/* - * like an inode, but for a dir frag - */ -struct fnode_t { - version_t version; - frag_info_t fragstat, accounted_fragstat; - - void encode(bufferlist &bl) const { - ::encode(version, bl); - ::encode(fragstat, bl); - ::encode(accounted_fragstat, bl); - } - void decode(bufferlist::iterator &bl) { - ::decode(version, bl); - ::decode(fragstat, bl); - ::decode(accounted_fragstat, bl); - } -}; -WRITE_CLASS_ENCODER(fnode_t) - // dentries diff --git a/src/mds/IdAllocator.cc b/src/mds/IdAllocator.cc index 408fac57c1b..81f3ab988eb 100644 --- a/src/mds/IdAllocator.cc +++ b/src/mds/IdAllocator.cc @@ -106,7 +106,7 @@ void IdAllocator::save(Context *onfinish, version_t v) waitfor_save[version].push_back(onfinish); // write (async) - mds->filer->write(inode, + mds->filer->write(inode.ino, &inode.layout, 0, bl.length(), bl, 0, 0, new C_ID_Save(this, version)); @@ -172,7 +172,7 @@ void IdAllocator::load(Context *onfinish) state = STATE_OPENING; C_ID_Load *c = new C_ID_Load(this, onfinish); - mds->filer->read(inode, + mds->filer->read(inode.ino, &inode.layout, 0, ceph_file_layout_su(inode.layout), &c->bl, 0, c); diff --git a/src/mds/IdAllocator.h b/src/mds/IdAllocator.h index 51001f22366..90dab5c6294 100644 --- a/src/mds/IdAllocator.h +++ b/src/mds/IdAllocator.h @@ -16,7 +16,7 @@ #ifndef __IDALLOCATOR_H #define __IDALLOCATOR_H -#include "include/types.h" +#include "mdstypes.h" #include "include/interval_set.h" #include "include/buffer.h" #include "include/Context.h" diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index bd985a5584f..6cf7187969b 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2967,7 +2967,7 @@ void MDCache::do_file_recover() dout(10) << "do_file_recover starting " << in->inode.size << "/" << in->inode.max_size << " " << *in << dendl; file_recovering.insert(in); - mds->filer->probe(in->inode, in->inode.max_size, &in->inode.size, false, + mds->filer->probe(in->inode.ino, &in->inode.layout, in->inode.max_size, &in->inode.size, false, 0, new C_MDC_Recover(this, in)); } else { dout(10) << "do_file_recover skipping " << in->inode.size << "/" << in->inode.max_size @@ -3063,7 +3063,7 @@ void MDCache::_do_purge_inode(CInode *in, off_t newsize, off_t oldsize) // remove if (newsize < oldsize) { - mds->filer->remove(in->inode, newsize, oldsize-newsize, 0, + mds->filer->remove(in->inode.ino, &in->inode.layout, newsize, oldsize-newsize, 0, 0, new C_MDC_PurgeFinish(this, in, newsize, oldsize)); } else { // no need, empty file, just log it diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index feb728dc07b..56220b89039 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -86,7 +86,7 @@ void MDLog::init_journaler() // log streamer if (journaler) delete journaler; - journaler = new Journaler(log_inode, mds->objecter, logger, &mds->mds_lock); + journaler = new Journaler(log_inode.ino, &log_inode.layout, mds->objecter, logger, &mds->mds_lock); } void MDLog::write_head(Context *c) diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index fbe17bf4494..169965e16af 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -62,7 +62,7 @@ void SessionMap::load(Context *onload) waiting_for_load.push_back(onload); C_SM_Load *c = new C_SM_Load(this); - mds->filer->read(inode, + mds->filer->read(inode.ino, &inode.layout, 0, ceph_file_layout_su(inode.layout), &c->bl, 0, c); @@ -112,7 +112,7 @@ void SessionMap::save(Context *onsave, version_t needv) init_inode(); encode(bl); committing = version; - mds->filer->write(inode, + mds->filer->write(inode.ino, &inode.layout, 0, bl.length(), bl, 0, 0, new C_SM_Save(this, version)); diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index ce657482f91..1b8f5aeca0c 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -49,6 +49,198 @@ using namespace std; #define MDS_TRAVERSE_FAIL 4 + + + +struct frag_info_t { + version_t version; + + // this frag + utime_t mtime; + __u64 nfiles; // files + __u64 nsubdirs; // subdirs + __u64 size() const { return nfiles + nsubdirs; } + + // this frag + children + utime_t rctime; + __u64 rbytes; + __u64 rfiles; + __u64 rsubdirs; + __u64 rsize() const { return rfiles + rsubdirs; } + __u64 ranchors; // for dirstat, includes inode's anchored flag. + + void take_diff(const frag_info_t &cur, frag_info_t &acc) { + if (cur.mtime > mtime) + rctime = mtime = cur.mtime; + nfiles += cur.nfiles - acc.nfiles; + nsubdirs += cur.nsubdirs - acc.nsubdirs; + + if (cur.rctime > rctime) + rctime = cur.rctime; + rbytes += cur.rbytes - acc.rbytes; + rfiles += cur.rfiles - acc.rfiles; + rsubdirs += cur.rsubdirs - acc.rsubdirs; + ranchors += cur.ranchors - acc.ranchors; + acc = cur; + acc.version = version; + } + + void encode(bufferlist &bl) const { + ::encode(version, bl); + ::encode(mtime, bl); + ::encode(nfiles, bl); + ::encode(nsubdirs, bl); + ::encode(rbytes, bl); + ::encode(rfiles, bl); + ::encode(rsubdirs, bl); + ::encode(ranchors, bl); + ::encode(rctime, bl); + } + void decode(bufferlist::iterator &bl) { + ::decode(version, bl); + ::decode(mtime, bl); + ::decode(nfiles, bl); + ::decode(nsubdirs, bl); + ::decode(rbytes, bl); + ::decode(rfiles, bl); + ::decode(rsubdirs, bl); + ::decode(ranchors, bl); + ::decode(rctime, bl); + } +}; +WRITE_CLASS_ENCODER(frag_info_t) + +inline bool operator==(const frag_info_t &l, const frag_info_t &r) { + return memcmp(&l, &r, sizeof(l)) == 0; +} + +inline ostream& operator<<(ostream &out, const frag_info_t &f) { + return out << "f(v" << f.version + << " m" << f.mtime + << " " << f.size() << "=" << f.nfiles << "+" << f.nsubdirs + << " rc" << f.rctime + << " b" << f.rbytes + << " a" << f.ranchors + << " " << f.rsize() << "=" << f.rfiles << "+" << f.rsubdirs + << ")"; +} + +struct inode_t { + // base (immutable) + inodeno_t ino; + ceph_file_layout layout; // ?immutable? + uint32_t rdev; // if special file + + // affected by any inode change... + utime_t ctime; // inode change time + + // perm (namespace permissions) + uint32_t mode; + uid_t uid; + gid_t gid; + + // nlink + int32_t nlink; + bool anchored; // auth only? + + // file (data access) + uint64_t size; // on directory, # dentries + uint64_t max_size; // client(s) are auth to write this much... + utime_t mtime; // file data modify time. + utime_t atime; // file data access time. + uint64_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes()) + + // dirfrag, recursive accounting + frag_info_t dirstat; + frag_info_t accounted_dirstat; // what dirfrag has seen + + // special stuff + version_t version; // auth only + version_t file_data_version; // auth only + + // file type + bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } + bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } + bool is_file() const { return (mode & S_IFMT) == S_IFREG; } + + void encode(bufferlist &bl) const { + ::encode(ino, bl); + ::encode(layout, bl); + ::encode(rdev, bl); + ::encode(ctime, bl); + + ::encode(mode, bl); + ::encode(uid, bl); + ::encode(gid, bl); + + ::encode(nlink, bl); + ::encode(anchored, bl); + + ::encode(size, bl); + ::encode(max_size, bl); + ::encode(mtime, bl); + ::encode(atime, bl); + ::encode(time_warp_seq, bl); + + ::encode(dirstat, bl); + ::encode(accounted_dirstat, bl); + + ::encode(version, bl); + ::encode(file_data_version, bl); + } + void decode(bufferlist::iterator &p) { + ::decode(ino, p); + ::decode(layout, p); + ::decode(rdev, p); + ::decode(ctime, p); + + ::decode(mode, p); + ::decode(uid, p); + ::decode(gid, p); + + ::decode(nlink, p); + ::decode(anchored, p); + + ::decode(size, p); + ::decode(max_size, p); + ::decode(mtime, p); + ::decode(atime, p); + ::decode(time_warp_seq, p); + + ::decode(dirstat, p); + ::decode(accounted_dirstat, p); + + ::decode(version, p); + ::decode(file_data_version, p); + } +}; +WRITE_CLASS_ENCODER(inode_t) + +/* + * like an inode, but for a dir frag + */ +struct fnode_t { + version_t version; + frag_info_t fragstat, accounted_fragstat; + + void encode(bufferlist &bl) const { + ::encode(version, bl); + ::encode(fragstat, bl); + ::encode(accounted_fragstat, bl); + } + void decode(bufferlist::iterator &bl) { + ::decode(version, bl); + ::decode(fragstat, bl); + ::decode(accounted_fragstat, bl); + } +}; +WRITE_CLASS_ENCODER(fnode_t) + + + +// ========= +// reqeusts + struct metareqid_t { entity_name_t name; __u64 tid; diff --git a/src/osdc/Filer.cc b/src/osdc/Filer.cc index d0f5fbfe4db..8ce281cb219 100644 --- a/src/osdc/Filer.cc +++ b/src/osdc/Filer.cc @@ -18,10 +18,6 @@ #include "Filer.h" #include "osd/OSDMap.h" -//#include "messages/MOSDRead.h" -//#include "messages/MOSDReadReply.h" -//#include "messages/MOSDWrite.h" -//#include "messages/MOSDWriteReply.h" #include "messages/MOSDOp.h" #include "messages/MOSDOpReply.h" #include "messages/MOSDMap.h" @@ -47,7 +43,8 @@ public: } }; -int Filer::probe(inode_t& inode, +int Filer::probe(inodeno_t ino, + ceph_file_layout *layout, __u64 start_from, __u64 *end, // LB, when !fwd bool fwd, @@ -55,14 +52,14 @@ int Filer::probe(inode_t& inode, Context *onfinish) { dout(10) << "probe " << (fwd ? "fwd ":"bwd ") - << hex << inode.ino << dec + << hex << ino << dec << " starting from " << start_from << dendl; - Probe *probe = new Probe(inode, start_from, end, flags, fwd, onfinish); + Probe *probe = new Probe(ino, *layout, start_from, end, flags, fwd, onfinish); // period (bytes before we jump unto a new set of object(s)) - __u64 period = ceph_file_layout_period(inode.layout); + __u64 period = ceph_file_layout_period(*layout); // start with 1+ periods. probe->probing_len = period; @@ -83,12 +80,12 @@ int Filer::probe(inode_t& inode, void Filer::_probe(Probe *probe) { - dout(10) << "_probe " << hex << probe->inode.ino << dec + dout(10) << "_probe " << hex << probe->ino << dec << " " << probe->from << "~" << probe->probing_len << dendl; // map range onto objects - file_to_extents(probe->inode.ino, &probe->inode.layout, probe->from, probe->probing_len, probe->probing); + file_to_extents(probe->ino, &probe->layout, probe->from, probe->probing_len, probe->probing); for (list::iterator p = probe->probing.begin(); p != probe->probing.end(); @@ -101,7 +98,7 @@ void Filer::_probe(Probe *probe) void Filer::_probed(Probe *probe, object_t oid, __u64 size) { - dout(10) << "_probed " << probe->inode.ino << " object " << hex << oid << dec << " has size " << size << dendl; + dout(10) << "_probed " << probe->ino << " object " << hex << oid << dec << " has size " << size << dendl; probe->known[oid] = size; assert(probe->ops.count(oid)); @@ -121,7 +118,7 @@ void Filer::_probed(Probe *probe, object_t oid, __u64 size) p != probe->probing.end(); p++) { __u64 shouldbe = p->length+p->start; - dout(10) << "_probed " << probe->inode.ino << " object " << hex << p->oid << dec + dout(10) << "_probed " << probe->ino << " object " << hex << p->oid << dec << " should be " << shouldbe << ", actual is " << probe->known[p->oid] << dendl; @@ -154,7 +151,7 @@ void Filer::_probed(Probe *probe, object_t oid, __u64 size) if (!found) { // keep probing! dout(10) << "_probed didn't find end, probing further" << dendl; - __u64 period = ceph_file_layout_period(probe->inode.layout); + __u64 period = ceph_file_layout_period(probe->layout); if (probe->fwd) { probe->from += probe->probing_len; assert(probe->from % period == 0); diff --git a/src/osdc/Filer.h b/src/osdc/Filer.h index 5b037731af8..043b62f18cb 100644 --- a/src/osdc/Filer.h +++ b/src/osdc/Filer.h @@ -50,7 +50,8 @@ class Filer { // probes struct Probe { - inode_t inode; + inodeno_t ino; + ceph_file_layout layout; __u64 from; // for !fwd, this is start of extent we are probing, thus possibly < our endpoint. __u64 *end; int flags; @@ -65,8 +66,8 @@ class Filer { map known; map ops; - Probe(inode_t &i, __u64 f, __u64 *e, int fl, bool fw, Context *c) : - inode(i), from(f), end(e), flags(fl), fwd(fw), onfinish(c), probing_len(0) {} + Probe(inodeno_t i, ceph_file_layout &l, __u64 f, __u64 *e, int fl, bool fw, Context *c) : + ino(i), layout(l), from(f), end(e), flags(fl), fwd(fw), onfinish(c), probing_len(0) {} }; class C_Probe; @@ -84,27 +85,30 @@ class Filer { } /*** async file interface ***/ - Objecter::OSDRead *prepare_read(inode_t& inode, + Objecter::OSDRead *prepare_read(inodeno_t ino, + ceph_file_layout *layout, __u64 offset, size_t len, bufferlist *bl, int flags) { Objecter::OSDRead *rd = objecter->prepare_read(bl, flags); - file_to_extents(inode.ino, &inode.layout, offset, len, rd->extents); + file_to_extents(ino, layout, offset, len, rd->extents); return rd; } - int read(inode_t& inode, + int read(inodeno_t ino, + ceph_file_layout *layout, __u64 offset, size_t len, bufferlist *bl, // ptr to data int flags, Context *onfinish) { - Objecter::OSDRead *rd = prepare_read(inode, offset, len, bl, flags); + Objecter::OSDRead *rd = prepare_read(ino, layout, offset, len, bl, flags); return objecter->readx(rd, onfinish) > 0 ? 0:-1; } - int write(inode_t& inode, - __u64 offset, + int write(inodeno_t ino, + ceph_file_layout *layout, + __u64 offset, size_t len, bufferlist& bl, int flags, @@ -112,29 +116,31 @@ class Filer { Context *oncommit, objectrev_t rev=0) { Objecter::OSDWrite *wr = objecter->prepare_write(bl, flags); - file_to_extents(inode.ino, &inode.layout, offset, len, wr->extents, rev); + file_to_extents(ino, layout, offset, len, wr->extents, rev); return objecter->modifyx(wr, onack, oncommit) > 0 ? 0:-1; } - int zero(inode_t& inode, - __u64 offset, + int zero(inodeno_t ino, + ceph_file_layout *layout, + __u64 offset, size_t len, int flags, Context *onack, Context *oncommit) { Objecter::OSDModify *z = objecter->prepare_modify(CEPH_OSD_OP_ZERO, flags); - file_to_extents(inode.ino, &inode.layout, offset, len, z->extents); + file_to_extents(ino, layout, offset, len, z->extents); return objecter->modifyx(z, onack, oncommit) > 0 ? 0:-1; } - int remove(inode_t& inode, + int remove(inodeno_t ino, + ceph_file_layout *layout, __u64 offset, size_t len, int flags, Context *onack, Context *oncommit) { Objecter::OSDModify *z = objecter->prepare_modify(CEPH_OSD_OP_DELETE, flags); - file_to_extents(inode.ino, &inode.layout, offset, len, z->extents); + file_to_extents(ino, layout, offset, len, z->extents); return objecter->modifyx(z, onack, oncommit) > 0 ? 0:-1; } @@ -143,7 +149,8 @@ class Filer { * specify direction, * and whether we stop when we find data, or hole. */ - int probe(inode_t& inode, + int probe(inodeno_t ino, + ceph_file_layout *layout, __u64 start_from, __u64 *end, bool fwd, diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc index b43a6cd90e1..8a9b1efdcc0 100644 --- a/src/osdc/Journaler.cc +++ b/src/osdc/Journaler.cc @@ -31,7 +31,7 @@ void Journaler::reset() state = STATE_ACTIVE; write_pos = flush_pos = ack_pos = safe_pos = read_pos = requested_pos = received_pos = - expire_pos = trimming_pos = trimmed_pos = ceph_file_layout_period(inode.layout); + expire_pos = trimming_pos = trimmed_pos = ceph_file_layout_period(layout); } @@ -81,7 +81,7 @@ void Journaler::recover(Context *onread) dout(1) << "read_head" << dendl; state = STATE_READHEAD; C_ReadHead *fin = new C_ReadHead(this); - filer.read(inode, 0, sizeof(Header), &fin->bl, CEPH_OSD_OP_INCLOCK_FAIL, fin); + filer.read(ino, &layout, 0, sizeof(Header), &fin->bl, CEPH_OSD_OP_INCLOCK_FAIL, fin); } void Journaler::_finish_read_head(int r, bufferlist& bl) @@ -112,7 +112,7 @@ void Journaler::_finish_read_head(int r, bufferlist& bl) // probe the log state = STATE_PROBING; C_ProbeEnd *fin = new C_ProbeEnd(this); - filer.probe(inode, h.write_pos, (__u64 *)&fin->end, true, CEPH_OSD_OP_INCLOCK_FAIL, fin); + filer.probe(ino, &layout, h.write_pos, (__u64 *)&fin->end, true, CEPH_OSD_OP_INCLOCK_FAIL, fin); } void Journaler::_finish_probe_end(int r, __s64 end) @@ -168,7 +168,7 @@ void Journaler::write_head(Context *oncommit) bufferlist bl; ::encode(last_written, bl); - filer.write(inode, 0, bl.length(), bl, CEPH_OSD_OP_INCLOCK_FAIL, + filer.write(ino, &layout, 0, bl.length(), bl, CEPH_OSD_OP_INCLOCK_FAIL, NULL, new C_WriteHead(this, last_written, oncommit)); } @@ -276,7 +276,7 @@ __s64 Journaler::append_entry(bufferlist& bl, Context *onsync) if (!g_conf.journaler_allow_split_entries) { // will we span a stripe boundary? - int p = ceph_file_layout_su(inode.layout); + int p = ceph_file_layout_su(layout); if (write_pos / p != (write_pos + (__s64)(bl.length() + sizeof(s))) / p) { // yes. // move write_pos forward. @@ -338,7 +338,7 @@ void Journaler::_do_flush() // submit write for anything pending // flush _start_ pos to _finish_flush utime_t now = g_clock.now(); - filer.write(inode, flush_pos, len, write_buf, + filer.write(ino, &layout, flush_pos, len, write_buf, CEPH_OSD_OP_INCLOCK_FAIL, new C_Flush(this, flush_pos, now, false), // on ACK new C_Flush(this, flush_pos, now, true)); // on COMMIT @@ -526,7 +526,7 @@ void Journaler::_issue_read(__s64 len) << ", read pointers " << read_pos << "/" << received_pos << "/" << (requested_pos+len) << dendl; - filer.read(inode, requested_pos, len, &reading_buf, CEPH_OSD_OP_INCLOCK_FAIL, + filer.read(ino, &layout, requested_pos, len, &reading_buf, CEPH_OSD_OP_INCLOCK_FAIL, new C_Read(this)); requested_pos += len; } @@ -686,7 +686,7 @@ public: void Journaler::trim() { __s64 trim_to = last_committed.expire_pos; - trim_to -= trim_to % ceph_file_layout_period(inode.layout); + trim_to -= trim_to % ceph_file_layout_period(layout); dout(10) << "trim last_commited head was " << last_committed << ", can trim to " << trim_to << dendl; @@ -710,7 +710,7 @@ void Journaler::trim() << trimmed_pos << "/" << trimming_pos << "/" << expire_pos << dendl; - filer.remove(inode, trimming_pos, trim_to-trimming_pos, CEPH_OSD_OP_INCLOCK_FAIL, + filer.remove(ino, &layout, trimming_pos, trim_to-trimming_pos, CEPH_OSD_OP_INCLOCK_FAIL, NULL, new C_Trim(this, trim_to)); trimming_pos = trim_to; } diff --git a/src/osdc/Journaler.h b/src/osdc/Journaler.h index 410760300ae..12e3c794d60 100644 --- a/src/osdc/Journaler.h +++ b/src/osdc/Journaler.h @@ -88,7 +88,8 @@ public: private: // me - inode_t inode; + inodeno_t ino; + ceph_file_layout layout; Objecter *objecter; Filer filer; @@ -188,8 +189,9 @@ public: friend class C_Trim; public: - Journaler(inode_t& inode_, Objecter *obj, Logger *l, Mutex *lk, __s64 fl=0, __s64 pff=0) : - inode(inode_), objecter(obj), filer(objecter), logger(l), + Journaler(inodeno_t ino_, ceph_file_layout *layout_, Objecter *obj, Logger *l, Mutex *lk, __s64 fl=0, __s64 pff=0) : + ino(ino_), layout(*layout_), + objecter(obj), filer(objecter), logger(l), lock(lk), timer(*lk), delay_flush_event(0), state(STATE_UNDEF), error(0), write_pos(0), flush_pos(0), ack_pos(0), safe_pos(0), @@ -201,7 +203,7 @@ public: // prefetch intelligently. // (watch out, this is big if you use big objects or weird striping) if (!fetch_len) - fetch_len = ceph_file_layout_period(inode.layout) * g_conf.journaler_prefetch_periods; + fetch_len = ceph_file_layout_period(layout) * g_conf.journaler_prefetch_periods; if (!prefetch_from) prefetch_from = fetch_len / 2; }