This avoids long ino lists. Changes ondisk format, unfortunately.
#include <ostream>
using namespace std;
+#include "encoding.h"
+
#ifndef MIN
# define MIN(a,b) ((a)<=(b) ? (a):(b))
#endif
class interval_set {
public:
map<T,T> m; // map start -> len
- int _size;
+ __u64 _size;
interval_set() : _size(0) {}
if (!big.contains(i->first, i->second)) return false;
return true;
}
-
+
};
template<class T>
return out;
}
+template<class T>
+inline void encode(const interval_set<T>& s, bufferlist& bl)
+{
+ s.encode(bl);
+}
+template<class T>
+inline void decode(interval_set<T>& s, bufferlist::iterator& p)
+{
+ s.decode(p);
+}
#endif
++version;
}
-void InoTable::project_alloc_ids(deque<inodeno_t>& ids, int want)
+void InoTable::project_alloc_ids(interval_set<inodeno_t>& ids, int want)
{
dout(10) << "project_alloc_ids " << ids << " to " << projected_free << "/" << free << dendl;
assert(is_active());
- for (int i=0; i<want; i++) {
- inodeno_t id = projected_free.start();
- projected_free.erase(id);
- ids.push_back(id);
+ while (want > 0) {
+ inodeno_t start = projected_free.start();
+ inodeno_t end = projected_free.end_after(start);
+ inodeno_t num = end - start;
+ if (num > (inodeno_t)want)
+ num = want;
+ projected_free.erase(start, num);
+ ids.insert(start, num);
+ want -= num;
}
++projected_version;
}
-void InoTable::apply_alloc_ids(deque<inodeno_t>& ids)
+void InoTable::apply_alloc_ids(interval_set<inodeno_t>& ids)
{
dout(10) << "apply_alloc_ids " << ids << " to " << projected_free << "/" << free << dendl;
- for (deque<inodeno_t>::iterator p = ids.begin();
- p != ids.end();
- p++)
- free.erase(*p);
+ free.subtract(ids);
++version;
}
-void InoTable::project_release_ids(deque<inodeno_t>& ids)
+void InoTable::project_release_ids(interval_set<inodeno_t>& ids)
{
dout(10) << "project_release_ids " << ids << " to " << projected_free << "/" << free << dendl;
- for (deque<inodeno_t>::iterator p = ids.begin(); p != ids.end(); p++)
- projected_free.insert(*p);
+ projected_free.insert(ids);
++projected_version;
}
-void InoTable::apply_release_ids(deque<inodeno_t>& ids)
+void InoTable::apply_release_ids(interval_set<inodeno_t>& ids)
{
dout(10) << "apply_release_ids " << ids << " to " << projected_free << "/" << free << dendl;
- for (deque<inodeno_t>::iterator p = ids.begin(); p != ids.end(); p++)
- free.insert(*p);
+ free.insert(ids);
++version;
}
projected_free.erase(id);
projected_version = ++version;
}
-void InoTable::replay_alloc_ids(deque<inodeno_t>& ids)
+void InoTable::replay_alloc_ids(interval_set<inodeno_t>& ids)
{
dout(10) << "replay_alloc_ids " << ids << dendl;
- for (deque<inodeno_t>::iterator p = ids.begin(); p != ids.end(); p++) {
- free.erase(*p);
- projected_free.erase(*p);
- }
+ free.subtract(ids);
+ projected_free.subtract(ids);
projected_version = ++version;
}
-void InoTable::replay_release_ids(deque<inodeno_t>& ids)
+void InoTable::replay_release_ids(interval_set<inodeno_t>& ids)
{
dout(10) << "replay_release_ids " << ids << dendl;
- for (deque<inodeno_t>::iterator p = ids.begin(); p != ids.end(); p++) {
- free.insert(*p);
- projected_free.insert(*p);
- }
+ free.insert(ids);
+ projected_free.insert(ids);
projected_version = ++version;
}
inodeno_t project_alloc_id(inodeno_t id=0);
void apply_alloc_id(inodeno_t id);
- void project_alloc_ids(deque<inodeno_t>& inos, int want);
- void apply_alloc_ids(deque<inodeno_t>& inos);
+ void project_alloc_ids(interval_set<inodeno_t>& inos, int want);
+ void apply_alloc_ids(interval_set<inodeno_t>& inos);
- void project_release_ids(deque<inodeno_t>& inos);
- void apply_release_ids(deque<inodeno_t>& inos);
+ void project_release_ids(interval_set<inodeno_t>& inos);
+ void apply_release_ids(interval_set<inodeno_t>& inos);
void replay_alloc_id(inodeno_t ino);
- void replay_alloc_ids(deque<inodeno_t>& inos);
- void replay_release_ids(deque<inodeno_t>& inos);
+ void replay_alloc_ids(interval_set<inodeno_t>& inos);
+ void replay_release_ids(interval_set<inodeno_t>& inos);
void init_inode();
void reset_state();
CDentry *tracedn;
inodeno_t alloc_ino, used_prealloc_ino;
- deque<inodeno_t> prealloc_inos;
+ interval_set<inodeno_t> prealloc_inos;
int snap_caps;
bool did_early_reply;
Session *session;
bool open;
version_t cmapv;
- deque<inodeno_t> inos;
+ interval_set<inodeno_t> inos;
version_t inotablev;
public:
C_MDS_session_finish(MDS *m, Session *se, bool s, version_t mv) :
mds(m), session(se), open(s), cmapv(mv), inotablev(0) { }
- C_MDS_session_finish(MDS *m, Session *se, bool s, version_t mv, deque<inodeno_t>& i, version_t iv) :
+ C_MDS_session_finish(MDS *m, Session *se, bool s, version_t mv, interval_set<inodeno_t>& i, version_t iv) :
mds(m), session(se), open(s), cmapv(mv), inotablev(iv) {
inos.swap(i);
}
mds->sessionmap.set_state(session, Session::STATE_CLOSING);
pv = ++mds->sessionmap.projected;
- deque<inodeno_t> both = session->prealloc_inos;
- both.insert(both.end(), session->pending_prealloc_inos.begin(),
- session->pending_prealloc_inos.end());
+ interval_set<inodeno_t> both = session->prealloc_inos;
+ both.insert(session->pending_prealloc_inos);
if (both.size()) {
mds->inotable->project_release_ids(both);
piv = mds->inotable->get_projected_version();
}
}
-void Server::_session_logged(Session *session, bool open, version_t pv, deque<inodeno_t>& inos, version_t piv)
+void Server::_session_logged(Session *session, bool open, version_t pv, interval_set<inodeno_t>& inos, version_t piv)
{
dout(10) << "_session_logged " << session->inst << " " << (open ? "open":"close")
<< " " << pv << dendl;
if (got > 0) {
mds->inotable->project_alloc_ids(mdr->prealloc_inos, got);
assert(mdr->prealloc_inos.size()); // or else fix projected increment semantics
- mdr->session->pending_prealloc_inos.insert(mdr->session->pending_prealloc_inos.end(),
- mdr->prealloc_inos.begin(), mdr->prealloc_inos.end());
+ mdr->session->pending_prealloc_inos.insert(mdr->prealloc_inos);
mds->sessionmap.projected++;
dout(10) << "prepare_new_inode prealloc " << mdr->prealloc_inos << dendl;
}
void Server::journal_allocated_inos(MDRequest *mdr, EMetaBlob *blob)
{
dout(20) << "journal_allocated_inos sessionmapv " << mds->sessionmap.projected
- << " inotablev " << mds->inotable->get_projected_version() << dendl;
+ << " inotablev " << mds->inotable->get_projected_version()
+ << dendl;
blob->set_ino_alloc(mdr->alloc_ino,
mdr->used_prealloc_ino,
mdr->prealloc_inos,
mds->inotable->apply_alloc_id(mdr->alloc_ino);
}
if (mdr->prealloc_inos.size()) {
- for (deque<inodeno_t>::iterator p = mdr->prealloc_inos.begin();
- p != mdr->prealloc_inos.end();
- p++) {
- assert(session->pending_prealloc_inos.front() == *p);
- session->prealloc_inos.push_back(session->pending_prealloc_inos.front());
- session->pending_prealloc_inos.pop_front();
- }
+ session->pending_prealloc_inos.subtract(mdr->prealloc_inos);
+ session->prealloc_inos.insert(mdr->prealloc_inos);
mds->sessionmap.version++;
mds->inotable->apply_alloc_ids(mdr->prealloc_inos);
}
if (mdr->used_prealloc_ino) {
- assert(session->used_inos.front() == mdr->used_prealloc_ino);
- session->used_inos.pop_front();
+ session->used_inos.erase(mdr->used_prealloc_ino);
mds->sessionmap.version++;
}
}
set<int> client_reconnect_gather; // clients i need a reconnect msg from.
void handle_client_session(class MClientSession *m);
- void _session_logged(Session *session, bool open, version_t pv, deque<inodeno_t>& inos,version_t piv);
+ void _session_logged(Session *session, bool open, version_t pv, interval_set<inodeno_t>& inos,version_t piv);
version_t prepare_force_open_sessions(map<__u32,entity_inst_t> &cm);
void finish_force_open_sessions(map<__u32,entity_inst_t> &cm);
void terminate_sessions();
#include "include/Context.h"
#include "include/xlist.h"
+#include "include/interval_set.h"
#include "mdstypes.h"
class CInode;
entity_inst_t inst;
xlist<Session*>::item session_list_item;
- deque<inodeno_t> pending_prealloc_inos; // journaling prealloc, will be added to prealloc_inos
- deque<inodeno_t> prealloc_inos; // preallocated, ready to use.
- deque<inodeno_t> used_inos; // journaling use
+ interval_set<inodeno_t> pending_prealloc_inos; // journaling prealloc, will be added to prealloc_inos
+ interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use.
+ interval_set<inodeno_t> used_inos; // journaling use
inodeno_t take_ino(inodeno_t ino = 0) {
assert(!prealloc_inos.empty());
if (ino) {
- deque<inodeno_t>::iterator p;
- for (p = prealloc_inos.begin(); p != prealloc_inos.end(); p++)
- if (*p == ino)
- break;
- if (p != prealloc_inos.end())
- prealloc_inos.erase(p);
+ if (prealloc_inos.contains(ino))
+ prealloc_inos.erase(ino);
else
ino = 0;
}
if (!ino) {
- ino = prealloc_inos.front();
- prealloc_inos.pop_front();
+ ino = prealloc_inos.start();
+ prealloc_inos.erase(ino);
}
- used_inos.push_back(ino);
+ used_inos.insert(ino, 1);
return ino;
}
int get_num_projected_prealloc_inos() {
::decode(completed_requests, p);
::decode(prealloc_inos, p);
::decode(used_inos, p);
- prealloc_inos.insert(prealloc_inos.begin(), used_inos.begin(), used_inos.end()); // HACK
+ prealloc_inos.insert(used_inos);
used_inos.clear();
}
};
// ino (pre)allocation. may involve both inotable AND session state.
version_t inotablev, sessionmapv;
inodeno_t allocated_ino; // inotable
- deque<inodeno_t> preallocated_inos; // inotable + session
+ interval_set<inodeno_t> preallocated_inos; // inotable + session
inodeno_t used_preallocated_ino; // session
entity_name_t client_name; // session
void set_ino_alloc(inodeno_t alloc,
inodeno_t used_prealloc,
- deque<inodeno_t>& prealloc,
+ interval_set<inodeno_t>& prealloc,
entity_name_t client,
version_t sv, version_t iv) {
allocated_ino = alloc;
bool open; // open or close
version_t cmapv; // client map version
- deque<inodeno_t> inos;
+ interval_set<inodeno_t> inos;
version_t inotablev;
public:
cmapv(v),
inotablev(0) {
}
- ESession(entity_inst_t inst, bool o, version_t v, deque<inodeno_t>& i, version_t iv) :
+ ESession(entity_inst_t inst, bool o, version_t v, interval_set<inodeno_t>& i, version_t iv) :
LogEvent(EVENT_SESSION),
client_inst(inst),
open(o),
::encode(client_inst, bl);
::encode(open, bl);
::encode(cmapv, bl);
- ::encode(inos, bl);
+ ::encode(inos.m, bl);
::encode(inotablev, bl);
}
void decode(bufferlist::iterator &bl) {
::decode(client_inst, bl);
::decode(open, bl);
::decode(cmapv, bl);
- ::decode(inos, bl);
+ ::decode(inos.m, bl);
::decode(inotablev, bl);
}
mds->sessionmap.projected = ++mds->sessionmap.version;
}
if (preallocated_inos.size()) {
- session->prealloc_inos.insert(session->prealloc_inos.end(),
- preallocated_inos.begin(),
- preallocated_inos.end());
+ session->prealloc_inos.insert(preallocated_inos);
mds->sessionmap.projected = ++mds->sessionmap.version;
}
assert(sessionmapv == mds->sessionmap.version);
#include "include/xlist.h"
#include "include/nstring.h"
-#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v001"
+#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v002"
#define MDS_REF_SET // define me for improved debug output, sanity checking