From bbc61d9b73c8bc92cc8e07b062dd5f800fb8f775 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 30 Jan 2008 13:48:24 -0800 Subject: [PATCH] mds: flush file (size+mtime) metadata to journal on close --- src/TODO | 3 ++ src/include/types.h | 5 +-- src/mds/Locker.cc | 30 +++++++++++++----- src/mds/LogEvent.cc | 6 ++++ src/mds/LogEvent.h | 2 ++ src/mds/events/EFileAccess.h | 51 ++++++++++++++++++++++++++++++ src/mds/events/EFileWrite.h | 60 ++++++++++++++++++++++++++++++++++++ src/mds/events/EOpen.h | 1 + src/mds/journal.cc | 46 +++++++++++++++++++++++++++ src/start.sh | 18 ++++++----- 10 files changed, 205 insertions(+), 17 deletions(-) create mode 100644 src/mds/events/EFileAccess.h create mode 100644 src/mds/events/EFileWrite.h diff --git a/src/TODO b/src/TODO index aa44c7f26d24a..8bbc4dd7d4d7b 100644 --- a/src/TODO +++ b/src/TODO @@ -75,6 +75,7 @@ mon mds mustfix +- journal cap mtime/size updates - rename slave in-memory rollback on failure - proper handling of cache expire messages during rejoin phase? -> i think cache expires are fine; the rejoin_ack handler just has to behave if rejoining items go missing @@ -82,6 +83,8 @@ mds mustfix - rerun destro trace against latest, with various journal lengths +- EOpen vs other journal events... update ordering problem? + mds - client cap timeouts / - stale -> resume diff --git a/src/include/types.h b/src/include/types.h index 9f474abb3d760..7e397179a4d20 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -198,10 +198,11 @@ struct inode_t { bool anchored; // auth only? // file (data access) - int64_t size, max_size, allocated_size; + int64_t size; + int64_t max_size; // client(s) are auth to write this much... utime_t mtime; // file data modify time. utime_t atime; // file data access time. - utime_t rmtime; // recursive mtime + utime_t rmtime; // recursive mtime? // special stuff version_t version; // auth only diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 7e0f5844ac693..fa70fe7536cd1 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -27,6 +27,8 @@ #include "events/EString.h" #include "events/EUpdate.h" +#include "events/EFileWrite.h" +#include "events/EFileAccess.h" #include "msg/Messenger.h" @@ -731,6 +733,7 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) assert(cap); // filter wanted based on what we could ever give out (given auth/replica status) + int old_wanted = in->get_caps_wanted(); int wanted = m->get_wanted() & in->filelock.caps_allowed_ever(); dout(7) << "handle_client_file_caps seq " << m->get_seq() @@ -748,6 +751,7 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) had |= had2; // update wanted + bool last_wr = false; // last write cap if (cap->wanted() != wanted) { if (m->get_seq() < cap->get_last_open()) { /* this is awkward. @@ -763,7 +767,19 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) // outright release? dout(7) << " cap for client" << client << " is now null, removing from " << *in << dendl; in->remove_client_cap(client); - if (!in->is_any_caps()) + + // last wr cap? + int new_wanted = in->get_caps_wanted(); + dout(10) << "old_wanted " << cap_string(old_wanted) + << " new_wanted " << cap_string(new_wanted) << dendl; + if ((old_wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER|CEPH_CAP_WREXTEND)) && + !(new_wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER|CEPH_CAP_WREXTEND))) { + dout(7) << " last wr-wanted cap, adjusting max_size" << dendl; + in->inode.max_size = 0; + last_wr = true; + } + + if (!in->is_any_caps()) in->xlist_open_file.remove_myself(); // unpin logsegment if (!in->is_auth()) request_inode_file_caps(in); @@ -777,11 +793,12 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) dout(7) << " taking atime " << m->get_atime() << " > " << in->inode.atime << " for " << *in << dendl; in->inode.atime = m->get_atime(); + mds->mdlog->submit_entry(new EFileAccess(mds->mdlog, in)); } + // mtime|size? + bool dirty = false; if ((has|had) & CEPH_CAP_WR) { - bool dirty = false; - // mtime if (m->get_mtime() > in->inode.mtime) { dout(7) << " taking mtime " << m->get_mtime() << " > " @@ -796,10 +813,9 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) in->inode.size = m->get_size(); dirty = true; } - - if (dirty) - mds->mdlog->submit_entry(new EString("cap inode update dirty fixme")); - } + } + if (dirty || last_wr) + mds->mdlog->submit_entry(new EFileWrite(mds->mdlog, in)); // reevaluate, waiters if (!in->filelock.is_stable()) diff --git a/src/mds/LogEvent.cc b/src/mds/LogEvent.cc index 65b0bb2ec1322..c71299fb20730 100644 --- a/src/mds/LogEvent.cc +++ b/src/mds/LogEvent.cc @@ -32,6 +32,9 @@ #include "events/ESlaveUpdate.h" #include "events/EOpen.h" +#include "events/EFileWrite.h" +#include "events/EFileAccess.h" + #include "events/EPurgeFinish.h" #include "events/EAnchor.h" @@ -70,6 +73,9 @@ LogEvent *LogEvent::decode(bufferlist& bl) case EVENT_SLAVEUPDATE: le = new ESlaveUpdate; break; case EVENT_OPEN: le = new EOpen; break; + case EVENT_FILEWRITE: le = new EFileWrite; break; + case EVENT_FILEACCESS: le = new EFileAccess; break; + case EVENT_PURGEFINISH: le = new EPurgeFinish; break; case EVENT_ANCHOR: le = new EAnchor; break; diff --git a/src/mds/LogEvent.h b/src/mds/LogEvent.h index 8d36a1d515c1c..aa792708cd1b1 100644 --- a/src/mds/LogEvent.h +++ b/src/mds/LogEvent.h @@ -29,6 +29,8 @@ #define EVENT_UPDATE 20 #define EVENT_SLAVEUPDATE 21 #define EVENT_OPEN 22 +#define EVENT_FILEWRITE 23 +#define EVENT_FILEACCESS 24 #define EVENT_PURGEFINISH 30 diff --git a/src/mds/events/EFileAccess.h b/src/mds/events/EFileAccess.h new file mode 100644 index 0000000000000..232f21c80a7aa --- /dev/null +++ b/src/mds/events/EFileAccess.h @@ -0,0 +1,51 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef __MDS_EFILEACCESS_H +#define __MDS_EFILEACCESS_H + +#include "../LogEvent.h" +#include "EMetaBlob.h" + +class EFileAccess : public LogEvent { +public: + inodeno_t ino; + utime_t atime; + + EFileAccess() : LogEvent(EVENT_FILEACCESS) { } + EFileAccess(MDLog *mdlog, CInode *in) : + LogEvent(EVENT_FILEACCESS) { + ino = in->inode.ino; + atime = in->inode.atime; + } + + void print(ostream& out) { + out << "EFileAccess " << ino + << " atime " << atime; + } + + void encode_payload(bufferlist& bl) { + ::_encode(ino, bl); + ::_encode(atime, bl); + } + void decode_payload(bufferlist& bl, int& off) { + ::_decode(ino, bl, off); + ::_decode(atime, bl, off); + } + + void update_segment(); + void replay(MDS *mds); +}; + +#endif diff --git a/src/mds/events/EFileWrite.h b/src/mds/events/EFileWrite.h new file mode 100644 index 0000000000000..e6300a572cd84 --- /dev/null +++ b/src/mds/events/EFileWrite.h @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef __MDS_EFILEWRITE_H +#define __MDS_EFILEWRITE_H + +#include "../LogEvent.h" +#include "EMetaBlob.h" + +class EFileWrite : public LogEvent { +public: + inodeno_t ino; + __u64 size, max_size; + utime_t mtime; + + EFileWrite() : LogEvent(EVENT_FILEWRITE) { } + EFileWrite(MDLog *mdlog, CInode *in) : + LogEvent(EVENT_FILEWRITE) { + ino = in->inode.ino; + size = in->inode.size; + max_size = in->inode.max_size; + mtime = in->inode.mtime; + } + + void print(ostream& out) { + out << "EFileWrite " << ino + << " size " << size + << " max " << max_size + << " mtime " << mtime; + } + + void encode_payload(bufferlist& bl) { + ::_encode(ino, bl); + ::_encode(size, bl); + ::_encode(max_size, bl); + ::_encode(mtime, bl); + } + void decode_payload(bufferlist& bl, int& off) { + ::_decode(ino, bl, off); + ::_decode(size, bl, off); + ::_decode(max_size, bl, off); + ::_decode(mtime, bl, off); + } + + void update_segment(); + void replay(MDS *mds); +}; + +#endif diff --git a/src/mds/events/EOpen.h b/src/mds/events/EOpen.h index c45224aff3010..eb492cd525743 100644 --- a/src/mds/events/EOpen.h +++ b/src/mds/events/EOpen.h @@ -22,6 +22,7 @@ class EOpen : public LogEvent { public: EMetaBlob metablob; list inos; + //list wr_inos; EOpen() : LogEvent(EVENT_OPEN) { } EOpen(MDLog *mdlog) : diff --git a/src/mds/journal.cc b/src/mds/journal.cc index d28ee0b3fead0..0ffcedde9b5d7 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -23,6 +23,9 @@ #include "events/ESlaveUpdate.h" #include "events/EOpen.h" +#include "events/EFileWrite.h" +#include "events/EFileAccess.h" + #include "events/EPurgeFinish.h" #include "events/EExport.h" @@ -600,6 +603,49 @@ void EOpen::replay(MDS *mds) } +// ------------------------ +// EFileWrite + +void EFileWrite::update_segment() +{ + // ?? +} + +void EFileWrite::replay(MDS *mds) +{ + dout(10) << "EFileWrite.replay " << dendl; + CInode *in = mds->mdcache->get_inode(ino); + if (in) { + in->inode.size = size; + in->inode.max_size = max_size; + in->inode.mtime = mtime; + } else { + dout(10) << " missing inode " << ino << dendl; + //assert(in); + } +} + +// ------------------------ +// EFileAccess + +void EFileAccess::update_segment() +{ + // ?? +} + +void EFileAccess::replay(MDS *mds) +{ + dout(10) << "EFileAccess.replay " << dendl; + CInode *in = mds->mdcache->get_inode(ino); + if (in) + in->inode.atime = atime; + else { + dout(10) << " missing inode " << ino << dendl; + //assert(in); + } +} + + // ----------------------- // ESlaveUpdate diff --git a/src/start.sh b/src/start.sh index 65516e753df34..3f52022ed57b8 100755 --- a/src/start.sh +++ b/src/start.sh @@ -15,18 +15,20 @@ if [ `echo $IP | grep '^127\\.'` ] then echo echo "WARNING: hostname resolves to loopback; remote hosts will not be able to" - echo " connect. either adjust /etc/hsots, or edit this script to use your" + echo " connect. either adjust /etc/hosts, or edit this script to use your" echo " machine's real IP." echo fi -ARGS="--bind $IP --doutdir out -d" ./mkmonmap $IP:12345 # your IP here -./cmon --mkfs --mon 0 $ARGS -./cosd --mkfs --osd 0 $ARGS -./cosd --mkfs --osd 1 $ARGS -./cosd --mkfs --osd 2 $ARGS -./cosd --mkfs --osd 3 $ARGS -./cmds $ARGS + +ARGS="-d --bind $IP --doutdir out --debug_ms 1" +./cmon $ARGS --mkfs --mon 0 +./cosd $ARGS --mkfs --osd 0 +./cosd $ARGS --mkfs --osd 1 +./cosd $ARGS --mkfs --osd 2 +./cosd $ARGS --mkfs --osd 3 +./cmds $ARGS --debug_mds 10 + echo "started. stop.sh to stop. see out/* (e.g. 'tail -f out/????') for debug output." -- 2.39.5