--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "osdc/Objecter.h"
+#include "mds/mdstypes.h"
+
+#include "mds/JournalPointer.h"
+
+
+#define dout_subsys ceph_subsys_journaler
+#undef dout_prefix
+#define dout_prefix *_dout << objecter->messenger->get_myname() << ".journalpointer"
+
+/**
+ * Blocking read of JournalPointer for this MDS
+ */
+int JournalPointer::load(Objecter *objecter, Mutex *lock)
+{
+ assert(lock != NULL);
+ assert(objecter != NULL);
+ assert(!lock->is_locked_by_me());
+
+ inodeno_t const pointer_ino = MDS_INO_LOG_POINTER_OFFSET + node_id;
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%llx.%08llx", (long long unsigned)pointer_ino, (long long unsigned)0);
+
+ // Blocking read of data
+ dout(4) << "Reading journal pointer '" << buf << "'" << dendl;
+ bufferlist data;
+ C_SaferCond waiter;
+ lock->Lock();
+ objecter->read_full(object_t(buf), object_locator_t(pool_id),
+ CEPH_NOSNAP, &data, 0, &waiter);
+ lock->Unlock();
+ int r = waiter.wait();
+
+ // Construct JournalPointer result, null or decoded data
+ if (r == 0) {
+ bufferlist::iterator q = data.begin();
+ decode(q);
+ } else {
+ dout(1) << "Journal pointer '" << buf << "' read failed: " << cpp_strerror(r) << dendl;
+ }
+ return r;
+}
+
+
+/**
+ * Blocking write of JournalPointer for this MDS
+ *
+ * @return objecter write op status code
+ */
+int JournalPointer::save(Objecter *objecter, Mutex *lock) const
+{
+ assert(lock != NULL);
+ assert(objecter != NULL);
+ assert(!lock->is_locked_by_me());
+ // It is not valid to persist a null pointer
+ assert(!is_null());
+
+ // Calculate object ID
+ inodeno_t const pointer_ino = MDS_INO_LOG_POINTER_OFFSET + node_id;
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%llx.%08llx", (long long unsigned)pointer_ino, (long long unsigned)0);
+ dout(4) << "Writing pointer object '" << buf << "': 0x"
+ << std::hex << front << ":0x" << back << std::dec << dendl;
+
+ // Serialize JournalPointer object
+ bufferlist data;
+ encode(data);
+
+ // Write to RADOS and wait for durability
+ C_SaferCond waiter;
+ lock->Lock();
+ objecter->write_full(object_t(buf), object_locator_t(pool_id),
+ SnapContext(), data, ceph_clock_now(g_ceph_context), 0, NULL, &waiter);
+ lock->Unlock();
+ int write_result = waiter.wait();
+ if (write_result < 0) {
+ derr << "Error writing pointer object '" << buf << "': " << cpp_strerror(write_result) << dendl;
+ }
+ return write_result;
+}
+
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef JOURNAL_POINTER_H
+#define JOURNAL_POINTER_H
+
+#include "include/encoding.h"
+#include "mdstypes.h"
+
+class Objecter;
+class Mutex;
+
+// This always lives in the same location for a given MDS
+// instance, it tells the daemon where to look for the journal.
+class JournalPointer {
+ // MDS rank
+ int node_id;
+ // Metadata pool ID
+ int64_t pool_id;
+
+ public:
+ // The currently active journal
+ inodeno_t front;
+ // The backup journal, if any (may be 0)
+ inodeno_t back;
+
+ void encode(bufferlist &bl) const {
+ ENCODE_START(1, 1, bl);
+ ::encode(front, bl);
+ ::encode(back, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::iterator &bl) {
+ DECODE_START(1, bl);
+ ::decode(front, bl);
+ ::decode(back, bl);
+ DECODE_FINISH(bl);
+ }
+
+ JournalPointer(int node_id_, int64_t pool_id_) : node_id(node_id_), pool_id(pool_id_),
+ front(0), back(0) {}
+
+ JournalPointer() : node_id(-1), pool_id(-1), front(0), back(0) {}
+
+ int load(Objecter *objecter, Mutex *lock);
+ int save(Objecter *objecter, Mutex *lock) const;
+
+ bool is_null() const {
+ return front == 0 && back == 0;
+ }
+
+ void dump(Formatter *f) const {
+ f->open_object_section("journal_pointer");
+ {
+ f->dump_unsigned("front", front);
+ f->dump_unsigned("back", back);
+ }
+ f->close_section(); // journal_header
+ }
+
+ static void generate_test_instances(std::list<JournalPointer*> &ls)
+ {
+ ls.push_back(new JournalPointer());
+ ls.push_back(new JournalPointer());
+ ls.back()->front = 0xdeadbeef;
+ ls.back()->back = 0xfeedbead;
+ }
+};
+
+#endif // JOURNAL_POINTER_H
#include "LogEvent.h"
#include "osdc/Journaler.h"
+#include "mds/JournalPointer.h"
#include "common/entity_name.h"
#include "common/perf_counters.h"
// First, read the pointer object.
// If the pointer object is not present, then create it with
// front = default ino and back = null
- JournalPointer jp;
- int const read_result = _read_pointer(&jp);
+ JournalPointer jp(mds->get_nodeid(), mds->mdsmap->get_metadata_pool());
+ int const read_result = jp.load(mds->objecter, &(mds->mds_lock));
if (read_result == -ENOENT) {
inodeno_t const default_log_ino = MDS_INO_LOG_OFFSET + mds->get_nodeid();
jp.front = default_log_ino;
- int write_result = _write_pointer(jp);
+ int write_result = jp.save(mds->objecter, &(mds->mds_lock));
// Nothing graceful we can do for this
assert(write_result >= 0);
} else if (read_result != 0) {
} else {
dout(1) << "Successfully erased journal, updating journal pointer" << dendl;
jp.back = 0;
- int write_result = _write_pointer(jp);
+ int write_result = jp.save(mds->objecter, &(mds->mds_lock));
// Nothing graceful we can do for this
assert(write_result >= 0);
}
inodeno_t primary_ino = MDS_INO_LOG_OFFSET + mds->get_nodeid();
inodeno_t secondary_ino = MDS_INO_LOG_BACKUP_OFFSET + mds->get_nodeid();
jp.back = (jp.front == primary_ino ? secondary_ino : primary_ino);
- int write_result = _write_pointer(jp);
+ int write_result = jp.save(mds->objecter, &(mds->mds_lock));
assert(write_result == 0);
/* Create the new Journaler file */
inodeno_t const tmp = jp.front;
jp.front = jp.back;
jp.back = tmp;
- write_result = _write_pointer(jp);
+ write_result = jp.save(mds->objecter, &(mds->mds_lock));
assert(write_result == 0);
/* Delete the old journal to free space */
/* Update the pointer to reflect we're back in clean single journal state. */
jp.back = 0;
- write_result = _write_pointer(jp);
+ write_result = jp.save(mds->objecter, &(mds->mds_lock));
assert(write_result == 0);
/* Reset the Journaler object to its default state */
mds->mds_lock.Unlock();
}
-/**
- * Blocking read of JournalPointer for this MDS
- */
-int MDLog::_read_pointer(JournalPointer *jp)
-{
- assert(!mds->mds_lock.is_locked_by_me());
- assert(jp);
-
- inodeno_t const pointer_ino = MDS_INO_LOG_POINTER_OFFSET + mds->get_nodeid();
- char buf[32];
- snprintf(buf, sizeof(buf), "%llx.%08llx", (long long unsigned)pointer_ino, (long long unsigned)0);
-
- // Blocking read of data
- dout(4) << "Reading journal pointer '" << buf << "'" << dendl;
- bufferlist data;
- C_SaferCond waiter;
- mds->mds_lock.Lock();
- mds->objecter->read_full(object_t(buf), object_locator_t(mds->mdsmap->get_metadata_pool()),
- CEPH_NOSNAP, &data, 0, &waiter);
- mds->mds_lock.Unlock();
- int r = waiter.wait();
-
- // Construct JournalPointer result, null or decoded data
- if (r == 0) {
- bufferlist::iterator q = data.begin();
- jp->decode(q);
- } else {
- dout(1) << "Journal pointer '" << buf << "' read failed: " << cpp_strerror(r) << dendl;
- }
- return r;
-}
-
-
-/**
- * Blocking write of JournalPointer for this MDS
- *
- * @return objecter write op status code
- */
-int MDLog::_write_pointer(JournalPointer const &ptr)
-{
- assert(!mds->mds_lock.is_locked_by_me());
- // It is not valid to persist a null pointer
- assert(!ptr.is_null());
-
- // Calculate object ID
- inodeno_t const pointer_ino = MDS_INO_LOG_POINTER_OFFSET + mds->get_nodeid();
- char buf[32];
- snprintf(buf, sizeof(buf), "%llx.%08llx", (long long unsigned)pointer_ino, (long long unsigned)0);
- dout(4) << "Writing pointer object '" << buf << "': 0x"
- << std::hex << ptr.front << ":0x" << ptr.back << std::dec << dendl;
-
- // Serialize JournalPointer object
- bufferlist data;
- ptr.encode(data);
-
- // Write to RADOS and wait for durability
- C_SaferCond waiter;
- mds->mds_lock.Lock();
- mds->objecter->write_full(object_t(buf), object_locator_t(mds->mdsmap->get_metadata_pool()),
- SnapContext(), data, ceph_clock_now(g_ceph_context), 0, NULL, &waiter);
- mds->mds_lock.Unlock();
- int write_result = waiter.wait();
- if (write_result < 0) {
- derr << "Error writing pointer object '" << buf << "': " << cpp_strerror(write_result) << dendl;
- }
- return write_result;
-}
-
// i am a separate thread
void MDLog::_replay_thread()
}
} recovery_thread;
void _recovery_thread(Context *completion);
- int _read_pointer(JournalPointer *jp);
- int _write_pointer(JournalPointer const &ptr);
void _reformat_journal(JournalPointer const &jp, Journaler *old_journal, Context *completion);
// -- segments --
mds/LogEvent.cc \
mds/MDSTable.cc \
mds/InoTable.cc \
+ mds/JournalPointer.cc \
mds/MDSTableClient.cc \
mds/MDSTableServer.cc \
mds/SnapRealm.cc \
mds/CInode.h \
mds/Capability.h \
mds/InoTable.h \
+ mds/JournalPointer.h \
mds/LocalLock.h \
mds/Locker.h \
mds/LogEvent.h \
};
-// This always lives in the same location for a given MDS
-// instance, it tells the daemon where to look for the journal.
-class JournalPointer {
- public:
- // The currently active journal
- inodeno_t front;
- // The backup journal, if any (may be 0)
- inodeno_t back;
-
- void encode(bufferlist &bl) const {
- ENCODE_START(1, 1, bl);
- ::encode(front, bl);
- ::encode(back, bl);
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::iterator &bl) {
- DECODE_START(1, bl);
- ::decode(front, bl);
- ::decode(back, bl);
- DECODE_FINISH(bl);
- }
-
- JournalPointer() : front(0), back(0) {}
-
- bool is_null() const {
- return front == 0 && back == 0;
- }
-
- void dump(Formatter *f) const {
- f->open_object_section("journal_pointer");
- {
- f->dump_unsigned("front", front);
- f->dump_unsigned("back", back);
- }
- f->close_section(); // journal_header
- }
-
- static void generate_test_instances(list<JournalPointer*> &ls)
- {
- ls.push_back(new JournalPointer());
- ls.push_back(new JournalPointer());
- ls.back()->front = 0xdeadbeef;
- ls.back()->back = 0xfeedbead;
- }
-};
-
-
class Journaler {
public:
// this goes at the head of the log "file".
TYPE(DBObjectMap::_Header)
TYPE(DBObjectMap::State)
-#include "osdc/Journaler.h"
+#include "mds/JournalPointer.h"
TYPE(JournalPointer)
+
+#include "osdc/Journaler.h"
TYPE(Journaler::Header)
#include "mds/snap.h"
{
int r = 0;
Resetter resetter;
- r = resetter.init(rank);
+ r = resetter.init();
if (r < 0) {
derr << "resetter::init failed: " << cpp_strerror(r) << dendl;
return r;
}
- resetter.reset();
+ resetter.reset(rank);
resetter.shutdown();
return r;
*
*/
+#include "common/errno.h"
#include "osdc/Journaler.h"
+#include "mds/JournalPointer.h"
+
#include "mds/mdstypes.h"
#include "mon/MonClient.h"
#include "mds/events/EResetJournal.h"
#include "Resetter.h"
-int Resetter::init(int rank)
+void Resetter::reset(int rank)
{
- int r = MDSUtility::init();
- if (r < 0) {
- return r;
+ Mutex mylock("Resetter::reset::lock");
+ Cond cond;
+ bool done;
+ int r;
+
+ JournalPointer jp(rank, mdsmap->get_metadata_pool());
+ int jp_load_result = jp.load(objecter, &lock);
+ if (jp_load_result != 0) {
+ std::cerr << "Error loading journal: " << cpp_strerror(jp_load_result) << std::endl;
+ return;
}
- inodeno_t ino = MDS_INO_LOG_OFFSET + rank;
- journaler = new Journaler(ino,
+ Journaler journaler(jp.front,
mdsmap->get_metadata_pool(),
CEPH_FS_ONDISK_MAGIC,
objecter, 0, 0, &timer);
- return 0;
-}
-
-void Resetter::reset()
-{
- Mutex mylock("Resetter::reset::lock");
- Cond cond;
- bool done;
- int r;
-
lock.Lock();
- journaler->recover(new C_SafeCond(&mylock, &cond, &done, &r));
+ journaler.recover(new C_SafeCond(&mylock, &cond, &done, &r));
lock.Unlock();
mylock.Lock();
if (r == -ENOENT) {
cerr << "journal does not exist on-disk. Did you set a bad rank?"
<< std::endl;
- shutdown();
return;
} else {
cerr << "got error " << r << "from Journaler, failling" << std::endl;
- shutdown();
return;
}
}
lock.Lock();
- uint64_t old_start = journaler->get_read_pos();
- uint64_t old_end = journaler->get_write_pos();
+ uint64_t old_start = journaler.get_read_pos();
+ uint64_t old_end = journaler.get_write_pos();
uint64_t old_len = old_end - old_start;
cout << "old journal was " << old_start << "~" << old_len << std::endl;
- uint64_t new_start = ROUND_UP_TO(old_end+1, journaler->get_layout_period());
+ uint64_t new_start = ROUND_UP_TO(old_end+1, journaler.get_layout_period());
cout << "new journal start will be " << new_start
<< " (" << (new_start - old_end) << " bytes past old end)" << std::endl;
- journaler->set_read_pos(new_start);
- journaler->set_write_pos(new_start);
- journaler->set_expire_pos(new_start);
- journaler->set_trimmed_pos(new_start);
- journaler->set_writeable();
+ journaler.set_read_pos(new_start);
+ journaler.set_write_pos(new_start);
+ journaler.set_expire_pos(new_start);
+ journaler.set_trimmed_pos(new_start);
+ journaler.set_writeable();
cout << "writing journal head" << std::endl;
- journaler->write_head(new C_SafeCond(&mylock, &cond, &done, &r));
+ journaler.write_head(new C_SafeCond(&mylock, &cond, &done, &r));
lock.Unlock();
mylock.Lock();
le->encode_with_header(bl);
cout << "writing EResetJournal entry" << std::endl;
- journaler->append_entry(bl);
- journaler->flush(new C_SafeCond(&mylock, &cond, &done,&r));
+ journaler.append_entry(bl);
+ journaler.flush(new C_SafeCond(&mylock, &cond, &done,&r));
lock.Unlock();
#define JOURNAL_RESETTER_H_
-#include "osdc/Journaler.h"
#include "MDSUtility.h"
/**
* of the file to dump to.
*/
class Resetter : public MDSUtility {
+ int rank;
public:
- Journaler *journaler;
+ Resetter() {}
- Resetter() : journaler(NULL) {}
-
- int init(int rank);
- void reset();
+ void reset(int rank);
};
#endif /* JOURNAL_RESETTER_H_ */