cfuse: cfuse.cc client.o osdc.o client/fuse.o msg/SimpleMessenger.o common.o
${CC} ${CFLAGS} ${LIBS} -lfuse $^ -o $@
+activemaster: active/activemaster.cc client.o osdc.o msg/SimpleMessenger.o common.o
+ ${CC} ${CFLAGS} ${LIBS} $^ -o $@
+
+activeslave: active/activeslave.cc client.o osdc.o msg/SimpleMessenger.o common.o
+ ${CC} ${CFLAGS} ${LIBS} $^ -o $@
+
+echotestclient: active/echotestclient.cc client.o osdc.o msg/SimpleMessenger.o common.o
+ ${CC} ${CFLAGS} ${LIBS} $^ -o $@
+
+msgtestclient: active/msgtestclient.cc client.o osdc.o msg/SimpleMessenger.o common.o
+ ${CC} ${CFLAGS} ${LIBS} $^ -o $@
+
+
+
# misc
gprof-helper.so: test/gprof-helper.c
-sage
- mds diropen
-doc
+
+some smallish projects:
+
+- crush rewrite in C
+ - generalize any memory management etc. to allow use in kernel and userspace
+- userspace crush tools
+ - xml import/export?
+ - ?
+
+- SimpleMessenger
+ - clean up/merge Messenger/Dispatcher interfaces
+ - auto close idle connections
+ - delivery ack and buffering, and then reconnect
+ - take a look at RDS? http://oss.oracle.com/projects/rds/
+
+- generalize monitor client?
+ - throttle message resend attempts
+
+- paxos layer work
+ - integrate leasing into paxos framework
+ - carefully interface design...
+
+
+code cleanup
+- endian portability
+- word size
+ - clean up all encoded structures
+
+general kernel planning
+- soft consistency on lookup?
+
+
+
+
+
+sage doc
- mdsmonitor beacon semantics
- cache expiration, cache invariants
- including dual expire states, transition, vs subtree grouping of expire messages
-mds
+sage mds
- finish multistage rejoin
- more testing of failures + thrashing.
- in particular, i care about dirfragtree.. get it on rejoin?
- and dir sizes, if i add that... also on rejoin?
-/- properly recover lock state on rejoin...
-/ - recovering mds rejoins replicas it pulled out of its journal
-/ - replicas will tell it when they hold an xlock
-/ - surviving mds rejoins replicas from a recovering mds
-/ - will tell auth if it holds an xlock
-- send_rejoin_acks
-
- recovering open files
- recovery will either have inode (from EOpen), or will provide path+cap to reassert open state.
- path+cap window will require some fetching of metadata from disk before doing the rejoin
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
}
+
+void Client::lock_fh_pos(Fh *f)
+{
+ dout(10) << "lock_fh_pos " << f << endl;
+
+ if (f->pos_locked || !f->pos_waiters.empty()) {
+ Cond cond;
+ f->pos_waiters.push_back(&cond);
+ dout(10) << "lock_fh_pos BLOCKING on " << f << endl;
+ while (f->pos_locked || f->pos_waiters.front() != &cond)
+ cond.Wait(client_lock);
+ dout(10) << "lock_fh_pos UNBLOCKING on " << f << endl;
+ assert(f->pos_waiters.front() == &cond);
+ f->pos_waiters.pop_front();
+ }
+
+ f->pos_locked = true;
+}
+
+void Client::unlock_fh_pos(Fh *f)
+{
+ dout(10) << "unlock_fh_pos " << f << endl;
+ f->pos_locked = false;
+}
+
+
// blocking osd interface
int Client::read(fh_t fh, char *buf, off_t size, off_t offset)
bool movepos = false;
if (offset < 0) {
+ lock_fh_pos(f);
offset = f->pos;
movepos = true;
}
// we can trust size info bc we get accurate info when buffering/caching caps are issued.
dout(10) << "file size: " << in->inode.size << endl;
if (offset > 0 && offset >= in->inode.size) {
+ if (movepos) unlock_fh_pos(f);
client_lock.Unlock();
return 0;
}
size = (off_t)in->inode.size - offset;
if (size == 0) {
- dout(-10) << "read is size=0, returning 0" << endl;
+ dout(10) << "read is size=0, returning 0" << endl;
+ if (movepos) unlock_fh_pos(f);
client_lock.Unlock();
return 0;
}
if (movepos) {
// adjust fd pos
f->pos = offset+blist.length();
+ unlock_fh_pos(f);
}
// copy data into caller's char* buf
Fh *f = fh_map[fh];
Inode *in = f->inode;
+ // use/adjust fd pos?
if (offset < 0) {
+ lock_fh_pos(f);
offset = f->pos;
- // adjust fd pos
- f->pos = offset+size;
+ f->pos = offset+size;
+ unlock_fh_pos(f);
}
bool lazy = f->mode == FILE_MODE_LAZY;
}
-
void Client::ms_handle_failure(Message *m, const entity_inst_t& inst)
{
entity_name_t dest = inst.name;
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
bool is_lazy() { return mode & O_LAZY; }
- Fh() : inode(0), pos(0), mds(0), mode(0) {}
+ bool pos_locked; // pos is currently in use
+ list<Cond*> pos_waiters; // waiters for pos
+
+ Fh() : inode(0), pos(0), mds(0), mode(0), pos_locked(false) {}
};
};
+ // cluster descriptors
+ MDSMap *mdsmap;
+ OSDMap *osdmap;
+
+
protected:
Messenger *messenger;
int whoami;
void handle_client_request_forward(MClientRequestForward *reply);
void handle_client_reply(MClientReply *reply);
-
- // cluster descriptors
- MDSMap *mdsmap;
- OSDMap *osdmap;
-
bool mounted;
bool unmounting;
Cond mount_cond;
void close_release(Inode *in);
void close_safe(Inode *in);
+ void lock_fh_pos(Fh *f);
+ void unlock_fh_pos(Fh *f);
+
// metadata cache
Inode* insert_inode(Dir *dir, InodeStat *in_info, const string& dn);
void update_inode_dist(Inode *in, InodeStat *st);
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
const char *p = prefix.c_str();
- map<__int64_t, __int64_t> open_files;
+ map<int64_t, int64_t> open_files;
while (!t.end()) {
client->rename(a,b);
} else if (strcmp(op, "mkdir") == 0) {
const char *a = t.get_string(p);
- __int64_t b = t.get_int();
+ int64_t b = t.get_int();
client->mkdir(a, b);
} else if (strcmp(op, "rmdir") == 0) {
const char *a = t.get_string(p);
client->lstat(a, &st);
} else if (strcmp(op, "chmod") == 0) {
const char *a = t.get_string(p);
- __int64_t b = t.get_int();
+ int64_t b = t.get_int();
client->chmod(a, b);
} else if (strcmp(op, "chown") == 0) {
const char *a = t.get_string(p);
- __int64_t b = t.get_int();
- __int64_t c = t.get_int();
+ int64_t b = t.get_int();
+ int64_t c = t.get_int();
client->chown(a, b, c);
} else if (strcmp(op, "utime") == 0) {
const char *a = t.get_string(p);
- __int64_t b = t.get_int();
- __int64_t c = t.get_int();
+ int64_t b = t.get_int();
+ int64_t c = t.get_int();
struct utimbuf u;
u.actime = b;
u.modtime = c;
client->utime(a, &u);
} else if (strcmp(op, "mknod") == 0) {
const char *a = t.get_string(p);
- __int64_t b = t.get_int();
+ int64_t b = t.get_int();
client->mknod(a, b);
} else if (strcmp(op, "getdir") == 0) {
const char *a = t.get_string(p);
client->getdir(a, contents);
} else if (strcmp(op, "open") == 0) {
const char *a = t.get_string(p);
- __int64_t b = t.get_int();
- __int64_t id = t.get_int();
- __int64_t fh = client->open(a, b);
+ int64_t b = t.get_int();
+ int64_t id = t.get_int();
+ int64_t fh = client->open(a, b);
open_files[id] = fh;
} else if (strcmp(op, "close") == 0) {
- __int64_t id = t.get_int();
- __int64_t fh = open_files[id];
+ int64_t id = t.get_int();
+ int64_t fh = open_files[id];
if (fh > 0) client->close(fh);
open_files.erase(id);
} else if (strcmp(op, "truncate") == 0) {
const char *a = t.get_string(p);
- __int64_t b = t.get_int();
+ int64_t b = t.get_int();
client->truncate(a,b);
} else if (strcmp(op, "read") == 0) {
- __int64_t id = t.get_int();
- __int64_t fh = open_files[id];
+ int64_t id = t.get_int();
+ int64_t fh = open_files[id];
int size = t.get_int();
int off = t.get_int();
char *buf = new char[size];
client->read(fh, buf, size, off);
delete[] buf;
} else if (strcmp(op, "lseek") == 0) {
- __int64_t id = t.get_int();
- __int64_t fh = open_files[id];
+ int64_t id = t.get_int();
+ int64_t fh = open_files[id];
int off = t.get_int();
int whence = t.get_int();
client->lseek(fh, off, whence);
} else if (strcmp(op, "write") == 0) {
- __int64_t id = t.get_int();
- __int64_t fh = open_files[id];
+ int64_t id = t.get_int();
+ int64_t fh = open_files[id];
int size = t.get_int();
int off = t.get_int();
char *buf = new char[size];
}
// close open files
- for (map<__int64_t, __int64_t>::iterator fi = open_files.begin();
+ for (map<int64_t, int64_t>::iterator fi = open_files.begin();
fi != open_files.end();
fi++) {
dout(1) << "leftover close " << fi->second << endl;
int SyntheticClient::write_file(string& fn, int size, int wrsize) // size is in MB, wrsize in bytes
{
- //__uint64_t wrsize = 1024*256;
+ //uint64_t wrsize = 1024*256;
char *buf = new char[wrsize+100]; // 1 MB
memset(buf, 7, wrsize);
- __uint64_t chunks = (__uint64_t)size * (__uint64_t)(1024*1024) / (__uint64_t)wrsize;
+ uint64_t chunks = (uint64_t)size * (uint64_t)(1024*1024) / (uint64_t)wrsize;
int fd = client->open(fn.c_str(), O_RDWR|O_CREAT);
dout(5) << "writing to " << fn << " fd " << fd << endl;
// 64 bits : file offset
// 64 bits : client id
// = 128 bits (16 bytes)
- __uint64_t *p = (__uint64_t*)buf;
+ uint64_t *p = (uint64_t*)buf;
while ((char*)p < buf + wrsize) {
*p = i*wrsize + (char*)p - buf;
p++;
{
char *buf = new char[rdsize];
memset(buf, 1, rdsize);
- __uint64_t chunks = (__uint64_t)size * (__uint64_t)(1024*1024) / (__uint64_t)rdsize;
+ uint64_t chunks = (uint64_t)size * (uint64_t)(1024*1024) / (uint64_t)rdsize;
int fd = client->open(fn.c_str(), O_RDONLY);
dout(5) << "reading from " << fn << " fd " << fd << endl;
// verify fingerprint
int bad = 0;
- __int64_t *p = (__int64_t*)buf;
- __int64_t readoff, readclient;
+ int64_t *p = (int64_t*)buf;
+ int64_t readoff, readclient;
while ((char*)p + 32 < buf + rdsize) {
readoff = *p;
- __int64_t wantoff = i*rdsize + (__int64_t)((char*)p - buf);
+ int64_t wantoff = i*rdsize + (int64_t)((char*)p - buf);
p++;
readclient = *p;
p++;
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
(JNIEnv *, jobject)
{
- cout << "Initializing Ceph client:" << endl;
+ dout(3) << "CephFSInterface: Initializing Ceph client:" << endl;
// parse args from CEPH_ARGS
vector<char*> args;
// load monmap
MonMap monmap;
- int r = monmap.read(".ceph_monmap");
+ // int r = monmap.read(".ceph_monmap");
+ int r = monmap.read("/cse/grads/eestolan/ceph/trunk/ceph/.ceph_monmap");
if (r < 0) {
- cout << "could not find .ceph_monmap" << endl;
- return 0;
+ dout(0) << "CephFSInterface: could not find .ceph_monmap" << endl;
+ assert(0 && "could not find .ceph_monmap");
+ // return 0;
}
assert(r >= 0);
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1copyFromLocalFile
(JNIEnv * env, jobject obj, jlong clientp, jstring j_local_path, jstring j_ceph_path) {
- cout << "In copyFromLocalFile" << endl;
- cout.flush();
+ dout(10) << "CephFSInterface: In copyFromLocalFile" << endl;
Client* client;
//client = (Client*) clientp;
client = *(Client**)&clientp;
const char* c_local_path = env->GetStringUTFChars(j_local_path, 0);
const char* c_ceph_path = env->GetStringUTFChars(j_ceph_path, 0);
- cout << "Local source file is "<< c_local_path << " and Ceph destination file is " << c_ceph_path << endl;
+ dout(10) << "CephFSInterface: Local source file is "<< c_local_path << " and Ceph destination file is " << c_ceph_path << endl;
struct stat st;
int r = ::stat(c_local_path, &st);
assert (r == 0);
int fh_ceph = client->open(c_ceph_path, O_WRONLY|O_CREAT|O_TRUNC);
assert (fh_local > -1);
assert (fh_ceph > -1);
- cout << "local fd is " << fh_local << " and Ceph fd is " << fh_ceph << endl;
+ dout(10) << "CephFSInterface: local fd is " << fh_local << " and Ceph fd is " << fh_ceph << endl;
// get the source file size
off_t remaining = st.st_size;
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1copyToLocalFile
(JNIEnv *env, jobject obj, jlong clientp, jstring j_ceph_path, jstring j_local_path)
{
-
-
Client* client;
client = *(Client**)&clientp;
const char* c_ceph_path = env->GetStringUTFChars(j_ceph_path, 0);
const char* c_local_path = env->GetStringUTFChars(j_local_path, 0);
- cout << "In copyToLocalFile, copying from Ceph file " << c_ceph_path <<
+ dout(3) << "CephFSInterface: dout(3): In copyToLocalFile, copying from Ceph file " << c_ceph_path <<
+ " to local file " << c_local_path << endl;
+
+ cout << "CephFSInterface: cout: In copyToLocalFile, copying from Ceph file " << c_ceph_path <<
" to local file " << c_local_path << endl;
- cout.flush();
// get source file size
struct stat st;
- cout << "Attempting lstat with file " << c_ceph_path << ":" << endl;
+ //dout(10) << "Attempting lstat with file " << c_ceph_path << ":" << endl;
int r = client->lstat(c_ceph_path, &st);
assert (r == 0);
- cout << "Opening Ceph source file for read: " << endl;
- cout.flush();
+ dout(10) << "CephFSInterface: Opening Ceph source file for read: " << endl;
int fh_ceph = client->open(c_ceph_path, O_RDONLY);
assert (fh_ceph > -1);
- cout << " Opened Ceph file! Opening local destination file: " << endl;
- cout.flush();
+ dout(10) << "CephFSInterface: Opened Ceph file! Opening local destination file: " << endl;
int fh_local = ::open(c_local_path, O_WRONLY|O_CREAT|O_TRUNC, 0644);
assert (fh_local > -1);
JNIEXPORT jstring JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1getcwd
(JNIEnv *env, jobject obj, jlong clientp)
{
- //cout << "In getcwd" << endl;
- //cout.flush();
+ dout(10) << "CephFSInterface: In getcwd" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1setcwd
(JNIEnv *env, jobject obj, jlong clientp, jstring j_path)
{
- //cout << "In setcwd" << endl;
- //cout.flush();
+ dout(10) << "CephFSInterface: In setcwd" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1rmdir
(JNIEnv *env, jobject, jlong clientp, jstring j_path)
{
- cout << "In rmdir" << endl;
- cout.flush();
+ dout(10) << "CephFSInterface: In rmdir" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1mkdir
(JNIEnv * env, jobject, jlong clientp, jstring j_path)
{
- //cout << "In mkdir" << endl;
- //cout.flush();
-
+ dout(10) << "CephFSInterface: In mkdir" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1unlink
(JNIEnv * env, jobject, jlong clientp, jstring j_path)
{
- cout.flush();
-
Client* client;
client = *(Client**)&clientp;
const char* c_path = env->GetStringUTFChars(j_path, 0);
- cout << "In unlink for path " << c_path << ":" << endl;
+ dout(10) << "CephFSInterface: In unlink for path " << c_path << ":" << endl;
// is it a file or a directory?
struct stat stbuf;
int stat_result = client->lstat(c_path, &stbuf);
if (stat_result < 0) {// then the path doesn't even exist
- cout << "ceph_unlink: path " << c_path << " does not exist" << endl;
+ dout(0) << "ceph_unlink: path " << c_path << " does not exist" << endl;
return false;
}
int result;
if (0 != S_ISDIR(stbuf.st_mode)) { // it's a directory
- cout << "ceph_unlink: path " << c_path << " is a directory. Calling client->rmdir()" << endl;
+ dout(10) << "ceph_unlink: path " << c_path << " is a directory. Calling client->rmdir()" << endl;
result = client->rmdir(c_path);
}
else if (0 != S_ISREG(stbuf.st_mode)) { // it's a file
- cout << "ceph_unlink: path " << c_path << " is a file. Calling client->unlink()" << endl;
+ dout(10) << "ceph_unlink: path " << c_path << " is a file. Calling client->unlink()" << endl;
result = client->unlink(c_path);
}
else {
- cout << "ceph_unlink: path " << c_path << " is not a file or a directory. Failing:" << endl;
+ dout(0) << "ceph_unlink: path " << c_path << " is not a file or a directory. Failing:" << endl;
result = -1;
}
- cout << "In ceph_unlink for path " << c_path <<
+ dout(10) << "In ceph_unlink for path " << c_path <<
": got result "
<< result << ". Returning..."<< endl;
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1rename
(JNIEnv *env, jobject, jlong clientp, jstring j_from, jstring j_to)
{
- cout << "In rename" << endl;
- cout.flush();
-
+ dout(10) << "CephFSInterface: In rename" << endl;
Client* client;
client = *(Client**)&clientp;
(JNIEnv *env, jobject, jlong clientp, jstring j_path)
{
- //cout << "In exists" << endl;
- //cout.flush();
+ dout(10) << "CephFSInterface: In exists" << endl;
Client* client;
struct stat stbuf;
client = *(Client**)&clientp;
const char* c_path = env->GetStringUTFChars(j_path, 0);
- cout << "Attempting lstat with file " << c_path << ":" ;
- //int i = (int) (*c_path);
- //cout << "First character value is " << i;
- // cout.flush();
+ dout(10) << "Attempting lstat with file " << c_path << ":" ;
int result = client->lstat(c_path, &stbuf);
- cout << "result is " << result << endl;
- // cout << "Attempting to release string \"" << c_path << "\"" << endl;
- //cout.flush();
+ dout(10) << "result is " << result << endl;
env->ReleaseStringUTFChars(j_path, c_path);
- //cout << "String released!" << endl;
if (result < 0) {
- //cout << "Returning false (file does not exist)" << endl;
- //cout.flush();
+ dout(10) << "Returning false (file does not exist)" << endl;
return JNI_FALSE;
}
else {
- //cout << "Returning true (file exists)" << endl;
- //cout.flush();
+ dout(10) << "Returning true (file exists)" << endl;
return JNI_TRUE;
}
-
}
/*
JNIEXPORT jlong JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1getblocksize
(JNIEnv *env, jobject obj, jlong clientp, jstring j_path)
{
- cout << "In getblocksize" << endl;
- cout.flush();
-
+ dout(10) << "In getblocksize" << endl;
Client* client;
- struct stat stbuf;
+ //struct stat stbuf;
client = *(Client**)&clientp;
jint result;
const char* c_path = env->GetStringUTFChars(j_path, 0);
+
+ /*
if (0 > client->lstat(c_path, &stbuf))
result = -1;
else
result = stbuf.st_blksize;
+ */
+
+ // we need to open the file to retrieve the stripe size
+ dout(10) << "CephFSInterface: getblocksize: opening file" << endl;
+ int fh = client->open(c_path, O_RDONLY);
+ if (fh < 0)
+ return -1;
+
+ result = client->get_stripe_unit(fh);
+
+ int close_result = client->close(fh);
+ assert (close_result > -1);
+
env->ReleaseStringUTFChars(j_path, c_path);
return result;
JNIEXPORT jlong JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1getfilesize
(JNIEnv *env, jobject, jlong clientp, jstring j_path)
{
- cout << "In getfilesize" << endl;
- cout.flush();
+ dout(10) << "In getfilesize" << endl;
Client* client;
struct stat stbuf;
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1isfile
(JNIEnv *env, jobject obj, jlong clientp, jstring j_path)
{
- //cout << "In isfile" << endl;
- //cout.flush();
+ dout(10) << "In isfile" << endl;
Client* client;
struct stat stbuf;
const char* c_path = env->GetStringUTFChars(j_path, 0);
- //cout << "Attempting lstat with file " << c_path << ":" << endl;
- //cout.flush();
int result = client->lstat(c_path, &stbuf);
- //cout << "Got through lstat without crashing: result is " << result << endl;
- //cout.flush();
env->ReleaseStringUTFChars(j_path, c_path);
if (0 > result) return JNI_FALSE;
// check the stat result
- //cout << "Stat call succeeded: attempting to look inside stbuf for result" << endl;
return (0 == S_ISREG(stbuf.st_mode)) ? JNI_FALSE : JNI_TRUE;
}
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1isdirectory
(JNIEnv *env, jobject, jlong clientp, jstring j_path)
{
- //cout << "In isdirectory" << endl;
- //cout.flush();
+ dout(10) << "In isdirectory" << endl;
Client* client;
struct stat stbuf;
const char* c_path = env->GetStringUTFChars(j_path, 0);
int result = client->lstat(c_path, &stbuf);
env->ReleaseStringUTFChars(j_path, c_path);
- //cout << "String released!" << endl;
- //cout.flush();
// if the stat call failed, it's definitely not a directory...
if (0 > result) return JNI_FALSE;
JNIEXPORT jobjectArray JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1getdir
(JNIEnv *env, jobject obj, jlong clientp, jstring j_path) {
- //cout << "In getdir" << endl;
- //cout.flush();
-
+ dout(10) << "In getdir" << endl;
Client* client;
client = *(Client**)&clientp;
map<string, inode_t> contents;
const char* c_path = env->GetStringUTFChars(j_path, 0);
int result = client->getdir(c_path, contents);
- //cout << "Releasing string" << endl;
env->ReleaseStringUTFChars(j_path, c_path);
if (result < 0) return NULL;
- //cout << "checking for empty dir" << endl;
+ dout(10) << "checking for empty dir" << endl;
jint dir_size = contents.size();
- // Hadoop doesn't want . or .. in the listing, so we shrink the
- // listing size by two, or by one if the directory's root
+ // Hadoop freaks out if the listing contains "." or "..". Shrink
+ // the listing size by two, or by one if the directory is the root.
if(('/' == c_path[0]) && (0 == c_path[1]))
dir_size -= 1;
else
// jstring blankString = env->NewStringUTF("");
jclass stringClass = env->FindClass("java/lang/String");
if (NULL == stringClass) {
- cout << "ERROR: java String class not found; dying a horrible, painful death" << endl;
+ dout(0) << "ERROR: java String class not found; dying a horrible, painful death" << endl;
assert(0);
}
jobjectArray dirListingStringArray = (jobjectArray) env->NewObjectArray(dir_size, stringClass, NULL);
if (it->first == dotdot) continue;
if (0 == dir_size)
- cout << "WARNING: adding stuff to an empty array" << endl;
+ dout(0) << "CephFSInterface: WARNING: adding stuff to an empty array." << endl;
assert (i < dir_size);
env->SetObjectArrayElement(dirListingStringArray, i,
env->NewStringUTF(it->first.c_str()));
(JNIEnv *env, jobject obj, jlong clientp, jstring j_path)
{
- //cout << "In open_for_read" << endl;
- //cout.flush();
-
+ dout(10) << "In open_for_read" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jint JNICALL Java_org_apache_hadoop_fs_ceph_CephFileSystem_ceph_1open_1for_1overwrite
(JNIEnv *env, jobject obj, jlong clientp, jstring j_path)
{
- //cout << "In open_for_overwrite" << endl;
- //cout.flush();
+ dout(10) << "In open_for_overwrite" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jint JNICALL Java_org_apache_hadoop_fs_ceph_CephInputStream_ceph_1read
(JNIEnv *env, jobject obj, jlong clientp, jint fh, jbyteArray j_buffer, jint buffer_offset, jint length)
{
- //cout << "In read" << endl;
- //cout.flush();
+ dout(10) << "In read" << endl;
// IMPORTANT NOTE: Hadoop read arguments are a bit different from POSIX so we
JNIEXPORT jlong JNICALL Java_org_apache_hadoop_fs_ceph_CephInputStream_ceph_1seek_1from_1start
(JNIEnv *env, jobject obj, jlong clientp, jint fh, jlong pos)
{
- //cout << "In CephInputStream::seek_from_start" << endl;
- //cout.flush();
-
+ dout(10) << "In CephInputStream::seek_from_start" << endl;
Client* client;
client = *(Client**)&clientp;
return result;
}
-
JNIEXPORT jlong JNICALL Java_org_apache_hadoop_fs_ceph_CephInputStream_ceph_1getpos
(JNIEnv *env, jobject obj, jlong clientp, jint fh)
{
- cout << "In CephInputStream::ceph_getpos" << endl;
- cout.flush();
-
+ dout(10) << "In CephInputStream::ceph_getpos" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jint JNICALL Java_org_apache_hadoop_fs_ceph_CephInputStream_ceph_1close
(JNIEnv *env, jobject obj, jlong clientp, jint fh)
{
- cout << "In CephInputStream::ceph_close" << endl;
- cout.flush();
+ dout(10) << "In CephInputStream::ceph_close" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jlong JNICALL Java_org_apache_hadoop_fs_ceph_CephOutputStream_ceph_1seek_1from_1start
(JNIEnv *env, jobject obj, jlong clientp, jint fh, jlong pos)
{
- cout << "In CephOutputStream::ceph_seek_from_start" << endl;
- cout.flush();
+ dout(10) << "In CephOutputStream::ceph_seek_from_start" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jlong JNICALL Java_org_apache_hadoop_fs_ceph_CephOutputStream_ceph_1getpos
(JNIEnv *env, jobject obj, jlong clientp, jint fh)
{
- cout << "In CephOutputStream::ceph_getpos" << endl;
- cout.flush();
+ dout(10) << "In CephOutputStream::ceph_getpos" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jint JNICALL Java_org_apache_hadoop_fs_ceph_CephOutputStream_ceph_1close
(JNIEnv *env, jobject obj, jlong clientp, jint fh)
{
- cout << "In CephOutputStream::ceph_close" << endl;
- cout.flush();
+ dout(10) << "In CephOutputStream::ceph_close" << endl;
Client* client;
client = *(Client**)&clientp;
JNIEXPORT jint JNICALL Java_org_apache_hadoop_fs_ceph_CephOutputStream_ceph_1write
(JNIEnv *env, jobject obj, jlong clientp, jint fh, jbyteArray j_buffer, jint buffer_offset, jint length)
{
- //cout << "In write" << endl;
- //cout.flush();
-
+ dout(10) << "In write" << endl;
// IMPORTANT NOTE: Hadoop write arguments are a bit different from POSIX so we
// have to convert. The write is *always* from the current position in the file,
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_apache_hadoop_fs_ceph_CephFileSystem */
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
class LogType {
protected:
- hash_map<__uint64_t, int> keymap;
+ hash_map<intptr_t, int> keymap;
vector<const char*> keys;
set<int> inc_keys;
// HACK to avoid the hash table as often as possible...
// cache recent key name lookups in a small ring buffer
const static int cache_keys = 10;
- __uint64_t kc_ptr[cache_keys];
+ intptr_t kc_ptr[cache_keys];
int kc_val[cache_keys];
int kc_pos;
i = keys.size();
keys.push_back(key);
-#ifdef __LP64__
- __uint64_t p = (__uint64_t)key;
-#else
- __uint64_t p = (__uint32_t)key;
-#endif
+ intptr_t p = (intptr_t)key;
keymap[p] = i;
if (is_inc) inc_keys.insert(i);
}
int lookup_key(const char* key) {
-#ifdef __LP64__
- __uint64_t p = (__uint64_t)key;
-#else
- __uint64_t p = (__uint32_t)key;
-#endif
+ intptr_t p = (intptr_t)key;
if (keymap.count(p))
return keymap[p];
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
-// -*- mode:C++; tab-width:4; c-basic-offset:2; indent-tabs-mode:t -*-
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
num_blocks = st.st_size;
}
- num_blocks /= (__uint64_t)EBOFS_BLOCK_SIZE;
+ num_blocks /= (uint64_t)EBOFS_BLOCK_SIZE;
if (g_conf.bdev_fake_mb) {
num_blocks = g_conf.bdev_fake_mb * 256;
}
// figure size
- __uint64_t bsize = get_num_blocks();
+ uint64_t bsize = get_num_blocks();
dout(2) << "open " << bsize << " bytes, " << num_blocks << " blocks" << endl;
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
st.num_free_extent = 0;
st.avg_free_extent = 0;
/*
- __uint64_t tfree = 0;
+ uint64_t tfree = 0;
for (int b=0; b<=EBOFS_NUM_FREE_BUCKETS; b++) {
Table<block_t,block_t> *tab;
if (b < EBOFS_NUM_FREE_BUCKETS) {
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "FileJournal.h"
+#include "Ebofs.h"
+
+#include "config.h"
+#define dout(x) if (x <= g_conf.debug_ebofs) cout << "ebofs(" << dev.get_device_name() << ").journal "
+#define derr(x) if (x <= g_conf.debug_ebofs) cerr << "ebofs(" << dev.get_device_name() << ").journal "
+
+
+
+void FileJournal::create()
+{
+ dout(1) << "create " << fn << endl;
+
+ // open/create
+ fd = ::open(fn.c_str(), O_CREAT|O_WRONLY);
+ assert(fd > 0);
+
+ ::ftruncate(fd);
+ ::fchmod(fd, 0644);
+
+ ::close(fd);
+}
+
+
+void FileJournal::open()
+{
+ dout(1) << "open " << fn << endl;
+
+ // open and file
+ assert(fd == 0);
+ fd = ::open(fn.c_str(), O_RDWR);
+ assert(fd > 0);
+
+ // read header?
+ // ***
+
+
+ start_writer();
+}
+
+void FileJournal::close()
+{
+ dout(1) << "close " << fn << endl;
+
+ // stop writer thread
+ stop_writer();
+
+ // close
+ assert(q.empty());
+ assert(fd > 0);
+ ::close(fd);
+ fd = 0;
+}
+
+void FileJournal::start_writer()
+{
+ write_stop = false;
+ write_thread.create();
+}
+
+void FileJournal::stop_writer()
+{
+ write_lock.Lock();
+ {
+ write_stop = true;
+ write_cond.Signal();
+ }
+ write_lock.Unlock();
+ write_thread.join();
+}
+
+
+void FileJournal::write_header()
+{
+ dout(10) << "write_header" << endl;
+
+ ::lseek(fd, 0, SEEK_SET);
+ ::write(fd, &header, sizeof(header));
+}
+
+
+void FileJournal::write_thread_entry()
+{
+ dout(10) << "write_thread_entry start" << endl;
+ write_lock.Lock();
+
+ while (!write_stop) {
+ if (writeq.empty()) {
+ // sleep
+ dout(20) << "write_thread_entry going to sleep" << endl;
+ write_cond.Wait(write_lock);
+ dout(20) << "write_thread_entry woke up" << endl;
+ continue;
+ }
+
+ // do queued writes
+ while (!writeq.empty()) {
+ // grab next item
+ epoch_t e = writeq.front().first;
+ bufferlist bl;
+ bl.claim(writeq.front().second);
+ writeq.pop_front();
+ Context *oncommit = commitq.front();
+ commitq.pop_front();
+
+ dout(15) << "write_thread_entry writing " << bottom << " : "
+ << bl.length()
+ << " epoch " << e
+ << endl;
+
+ // write epoch, len, data.
+ ::fseek(fd, bottom, SEEK_SET);
+ ::write(fd, &e, sizeof(e));
+
+ uint32_t len = bl.length();
+ ::write(fd, &len, sizeof(len));
+
+ for (list<bufferptr>::const_iterator it = bl.buffers().begin();
+ it != bl.buffers().end();
+ it++) {
+ if ((*it).length() == 0) continue; // blank buffer.
+ ::write(fd, (char*)(*it).c_str(), (*it).length() );
+ }
+
+ // move position pointer
+ bottom += sizeof(epoch_t) + sizeof(uint32_t) + e.length();
+
+ // do commit callback
+ if (oncommit) {
+ oncommit->finish(0);
+ delete oncommit;
+ }
+ }
+ }
+
+ write_lock.Unlock();
+ dout(10) << "write_thread_entry finish" << endl;
+}
+
+void FileJournal::submit_entry(bufferlist& e, Context *oncommit)
+{
+ dout(10) << "submit_entry " << bottom << " : " << e.length()
+ << " epoch " << ebofs->super_epoch
+ << " " << oncommit << endl;
+
+ // dump on queue
+ writeq.push_back(pair<epoch_t,bufferlist>(ebofs->super_epoch, e));
+ commitq.push_back(oncommit);
+
+ // kick writer thread
+ write_cond.Signal();
+}
+
+
+void FileJournal::commit_epoch_start()
+{
+ dout(10) << "commit_epoch_start" << endl;
+
+ write_lock.Lock();
+ {
+ header.epoch2 = ebofs->super_epoch;
+ header.top2 = bottom;
+ write_header();
+ }
+ write_lock.Unlock();
+}
+
+void FileJournal::commit_epoch_finish()
+{
+ dout(10) << "commit_epoch_finish" << endl;
+
+ write_lock.Lock();
+ {
+ // update header
+ header.epoch1 = ebofs->super_epoch;
+ header.top1 = header.top2;
+ header.epoch2 = 0;
+ header.top2 = 0;
+ write_header();
+
+ // flush any unwritten items in previous epoch
+ while (!writeq.empty() &&
+ writeq.front().first < ebofs->super_epoch) {
+ dout(15) << " dropping uncommitted journal item from prior epoch" << endl;
+ writeq.pop_front();
+ Context *oncommit = commitq.front();
+ commitq.pop_front();
+
+ if (oncommit) {
+ oncommit->finish(0);
+ delete oncommit;
+ }
+ }
+ }
+ write_lock.Unlock();
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef __EBOFS_FILEJOURNAL_H
+#define __EBOFS_FILEJOURNAL_H
+
+
+#include "Journal.h"
+
+
+class FileJournal : public Journal {
+public:
+ struct header_t {
+ epoch_t epoch1;
+ off_t top1;
+ epoch_t epoch2;
+ off_t top2;
+ } header;
+
+private:
+ string fn;
+
+ off_t max_size;
+ off_t top; // byte of first entry chronologically
+ off_t bottom; // byte where next entry goes
+ off_t committing_to; // offset of epoch boundary, if we are committing
+
+ int fd;
+
+ list<pair<epoch_t,bufferlist> > writeq; // currently journaling
+ list<Context*> commitq; // currently journaling
+
+ // write thread
+ Mutex write_lock;
+ Cond write_cond;
+ bool write_stop;
+
+ void write_header();
+ void start_writer();
+ void stop_writer();
+ void write_thread_entry();
+
+ class Writer : public Thread {
+ FileJournal *journal;
+ public:
+ Writer(FileJournal *fj) : journal(fj) {}
+ void *entry() {
+ journal->write_thread();
+ return 0;
+ }
+ } write_thread;
+
+ public:
+ FileJournal(Ebofs *e, char *f, off_t sz) :
+ Journal(e),
+ fn(f), max_size(sz),
+ top(0), bottom(0), committing_to(0),
+ fd(0),
+ write_stop(false), write_thread(this)
+ { }
+ ~FileJournal() {}
+
+ void create();
+ void open();
+ void close();
+
+ // writes
+ void submit_entry(bufferlist& e, Context *oncommit); // submit an item
+ void commit_epoch_start(); // mark epoch boundary
+ void commit_epoch_finish(); // mark prior epoch as committed (we can expire)
+
+ // reads
+};
+
+#endif
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef __EBOFS_JOURNAL_H
+#define __EBOFS_JOURNAL_H
+
+
+class Journal {
+ Ebofs *ebofs;
+
+ public:
+ Journal(Ebofs *e) : ebofs(e) { }
+ virtual ~Journal() { }
+
+ virtual void create() = 0;
+ virtual void open() = 0;
+ virtual void close() = 0;
+
+ // writes
+ virtual void submit_entry(bufferlist& e, Context *oncommit) = 0;// submit an item
+ virtual void commit_epoch_start() = 0; // mark epoch boundary
+ virtual void commit_epoch_finish(list<Context*>& ls) = 0; // mark prior epoch as committed (we can expire)
+
+ // reads/recovery
+
+};
+
+#endif
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// disk
-typedef __uint64_t block_t; // disk location/sector/block
+typedef uint64_t block_t; // disk location/sector/block
static const int EBOFS_BLOCK_SIZE = 4096;
static const int EBOFS_BLOCK_BITS = 12; // 1<<12 == 4096
// objects
-typedef __uint64_t coll_t;
+typedef uint64_t coll_t;
struct ebofs_onode {
Extent onode_loc; /* this is actually the block we live in */
// super
-typedef __uint64_t version_t;
+typedef uint64_t version_t;
static const unsigned EBOFS_MAGIC = 0x000EB0F5;
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
MonMap *monmap = new MonMap(g_conf.num_mon);
entity_addr_t a;
- monmap->mon_inst[0] = entity_inst_t(MSG_ADDR_MON(0), a); // hack ; see FakeMessenger.cc
+ for (int i=0; i<g_conf.num_mon; i++) {
+ a.port = i;
+ monmap->mon_inst[i] = entity_inst_t(MSG_ADDR_MON(i), a); // hack ; see FakeMessenger.cc
+ }
char hostname[100];
gethostname(hostname,100);
// create mon
Monitor *mon[g_conf.num_mon];
- for (int i=0; i<g_conf.num_mon; i++) {
+ for (int i=0; i<g_conf.num_mon; i++)
mon[i] = new Monitor(i, new FakeMessenger(MSG_ADDR_MON(i)), monmap);
- }
// create mds
MDS *mds[g_conf.num_mds];
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
#ifndef __BUFFER_H
#define __BUFFER_H
+#include <stdint.h>
+
#include "common/Mutex.h"
#include <iostream>
template<class T>
inline void _encode(const std::list<T>& ls, bufferlist& bl)
{
- __uint32_t n = ls.size();
+ uint32_t n = ls.size();
_encoderaw(n, bl);
for (typename std::list<T>::const_iterator p = ls.begin(); p != ls.end(); ++p)
_encode(*p, bl);
template<class T>
inline void _decode(std::list<T>& ls, bufferlist& bl, int& off)
{
- __uint32_t n;
+ uint32_t n;
_decoderaw(n, bl, off);
ls.clear();
while (n--) {
template<class T>
inline void _encode(const std::deque<T>& ls, bufferlist& bl)
{
- __uint32_t n = ls.size();
+ uint32_t n = ls.size();
_encoderaw(n, bl);
for (typename std::deque<T>::const_iterator p = ls.begin(); p != ls.end(); ++p)
_encode(*p, bl);
template<class T>
inline void _decode(std::deque<T>& ls, bufferlist& bl, int& off)
{
- __uint32_t n;
+ uint32_t n;
_decoderaw(n, bl, off);
ls.clear();
while (n--) {
template<class T>
inline void _encode(const std::set<T>& s, bufferlist& bl)
{
- __uint32_t n = s.size();
+ uint32_t n = s.size();
_encoderaw(n, bl);
for (typename std::set<T>::const_iterator p = s.begin(); p != s.end(); ++p)
_encode(*p, bl);
template<class T>
inline void _decode(std::set<T>& s, bufferlist& bl, int& off)
{
- __uint32_t n;
+ uint32_t n;
_decoderaw(n, bl, off);
s.clear();
while (n--) {
template<class T>
inline void _encode(const std::vector<T>& v, bufferlist& bl)
{
- __uint32_t n = v.size();
+ uint32_t n = v.size();
_encoderaw(n, bl);
for (typename std::vector<T>::const_iterator p = v.begin(); p != v.end(); ++p)
_encode(*p, bl);
template<class T>
inline void _decode(std::vector<T>& v, bufferlist& bl, int& off)
{
- __uint32_t n;
+ uint32_t n;
_decoderaw(n, bl, off);
v.resize(n);
- for (__uint32_t i=0; i<n; i++)
+ for (uint32_t i=0; i<n; i++)
_decode(v[i], bl, off);
}
template<class T, class U>
inline void _encode(const std::map<T,U>& m, bufferlist& bl)
{
- __uint32_t n = m.size();
+ uint32_t n = m.size();
_encoderaw(n, bl);
for (typename std::map<T,U>::const_iterator p = m.begin(); p != m.end(); ++p) {
_encode(p->first, bl);
template<class T, class U>
inline void _decode(std::map<T,U>& m, bufferlist& bl, int& off)
{
- __uint32_t n;
+ uint32_t n;
_decoderaw(n, bl, off);
m.clear();
while (n--) {
template<class T, class U>
inline void _encode(const __gnu_cxx::hash_map<T,U>& m, bufferlist& bl)
{
- __uint32_t n = m.size();
+ uint32_t n = m.size();
_encoderaw(n, bl);
for (typename __gnu_cxx::hash_map<T,U>::const_iterator p = m.begin(); p != m.end(); ++p) {
_encode(p->first, bl);
template<class T, class U>
inline void _decode(__gnu_cxx::hash_map<T,U>& m, bufferlist& bl, int& off)
{
- __uint32_t n;
+ uint32_t n;
_decoderaw(n, bl, off);
m.clear();
while (n--) {
// string
inline void _encode(const std::string& s, bufferlist& bl)
{
- __uint32_t len = s.length();
+ uint32_t len = s.length();
_encoderaw(len, bl);
bl.append(s.c_str(), len+1);
}
inline void _decode(std::string& s, bufferlist& bl, int& off)
{
- __uint32_t len;
+ uint32_t len;
_decoderaw(len, bl, off);
s = bl.c_str() + off; // FIXME someday to avoid a huge buffer copy?
off += len+1;
// bufferptr (encapsulated)
inline void _encode(bufferptr& bp, bufferlist& bl)
{
- __uint32_t len = bp.length();
+ uint32_t len = bp.length();
_encoderaw(len, bl);
bl.append(bp);
}
inline void _decode(bufferptr& bp, bufferlist& bl, int& off)
{
- __uint32_t len;
+ uint32_t len;
_decoderaw(len, bl, off);
bufferlist s;
// bufferlist (encapsulated)
inline void _encode(const bufferlist& s, bufferlist& bl)
{
- __uint32_t len = s.length();
+ uint32_t len = s.length();
_encoderaw(len, bl);
bl.append(s);
}
inline void _decode(bufferlist& s, bufferlist& bl, int& off)
{
- __uint32_t len;
+ uint32_t len;
_decoderaw(len, bl, off);
s.substr_of(bl, off, len);
off += len;
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
#ifndef __FRAG_H
#define __FRAG_H
+#include <stdint.h>
#include <map>
#include <list>
#include "buffer.h"
* we get 0/1 and 1/1. quartering gives us 0/2, 1/2, 2/2, 3/2. and so on.
*/
-typedef __uint32_t _frag_t;
+typedef uint32_t _frag_t;
class frag_t {
/* encoded value.
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
#ifndef __LRU_H
#define __LRU_H
+#include <stdint.h>
#include <assert.h>
#include <iostream>
using namespace std;
class LRUList {
private:
LRUObject *head, *tail;
- __uint32_t len;
+ uint32_t len;
public:
LRUList() {
len = 0;
}
- __uint32_t get_length() { return len; }
+ uint32_t get_length() { return len; }
LRUObject *get_head() {
return head;
class LRU {
protected:
LRUList lru_top, lru_bot, lru_pintail;
- __uint32_t lru_num, lru_num_pinned;
- __uint32_t lru_max; // max items
+ uint32_t lru_num, lru_num_pinned;
+ uint32_t lru_max; // max items
double lru_midpoint;
friend class LRUObject;
lru_max = max;
}
- __uint32_t lru_get_size() { return lru_num; }
- __uint32_t lru_get_top() { return lru_top.get_length(); }
- __uint32_t lru_get_bot() { return lru_bot.get_length(); }
- __uint32_t lru_get_pintail() { return lru_pintail.get_length(); }
- __uint32_t lru_get_max() { return lru_max; }
- __uint32_t lru_get_num_pinned() { return lru_num_pinned; }
+ uint32_t lru_get_size() { return lru_num; }
+ uint32_t lru_get_top() { return lru_top.get_length(); }
+ uint32_t lru_get_bot() { return lru_bot.get_length(); }
+ uint32_t lru_get_pintail() { return lru_pintail.get_length(); }
+ uint32_t lru_get_max() { return lru_max; }
+ uint32_t lru_get_num_pinned() { return lru_num_pinned; }
- void lru_set_max(__uint32_t m) { lru_max = m; }
+ void lru_set_max(uint32_t m) { lru_max = m; }
void lru_set_midpoint(float f) { lru_midpoint = f; }
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
#ifndef __OBJECT_H
#define __OBJECT_H
+#include <stdint.h>
+
#include <iostream>
#include <iomanip>
using namespace std;
using namespace __gnu_cxx;
-typedef __uint32_t objectrev_t;
+typedef uint32_t objectrev_t;
struct object_t {
- static const __uint32_t MAXREV = 0xffffffffU;
+ static const uint32_t MAXREV = 0xffffffffU;
- __uint64_t ino; // "file" identifier
- __uint32_t bno; // "block" in that "file"
+ uint64_t ino; // "file" identifier
+ uint32_t bno; // "block" in that "file"
objectrev_t rev; // revision. normally ctime (as epoch).
object_t() : ino(0), bno(0), rev(0) {}
- object_t(__uint64_t i, __uint32_t b) : ino(i), bno(b), rev(0) {}
- object_t(__uint64_t i, __uint32_t b, __uint32_t r) : ino(i), bno(b), rev(r) {}
+ object_t(uint64_t i, uint32_t b) : ino(i), bno(b), rev(0) {}
+ object_t(uint64_t i, uint32_t b, uint32_t r) : ino(i), bno(b), rev(r) {}
};
namespace __gnu_cxx {
#ifndef __LP64__
- template<> struct hash<__uint64_t> {
- size_t operator()(__uint64_t __x) const {
- static hash<__uint32_t> H;
+ template<> struct hash<uint64_t> {
+ size_t operator()(uint64_t __x) const {
+ static hash<uint32_t> H;
return H((__x >> 32) ^ (__x & 0xffffffff));
}
};
template<> struct hash<object_t> {
size_t operator()(const object_t &r) const {
- static hash<__uint64_t> H;
- static hash<__uint32_t> I;
+ static hash<uint64_t> H;
+ static hash<uint32_t> I;
return H(r.ino) ^ I(r.bno);
}
};
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#ifndef _STATLITE_H
#define _STATLITE_H
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
#define __MDS_TYPES_H
extern "C" {
+#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include <iomanip>
+
using namespace std;
#include <ext/hash_map>
};
#ifndef __LP64__
- template<> struct hash<__int64_t> {
- size_t operator()(__int64_t __x) const {
- static hash<__int32_t> H;
+ template<> struct hash<int64_t> {
+ size_t operator()(int64_t __x) const {
+ static hash<int32_t> H;
return H((__x >> 32) ^ (__x & 0xffffffff));
}
};
// ----------------------
// some basic types
-typedef __uint64_t tid_t;
-typedef __uint64_t version_t;
-typedef __uint32_t epoch_t; // map epoch (32bits -> 13 epochs/second for 10 years)
+typedef uint64_t tid_t; // transaction id
+typedef uint64_t version_t;
+typedef uint32_t epoch_t; // map epoch (32bits -> 13 epochs/second for 10 years)
// object and pg layout
// --------------------------------------
// inode
-typedef __uint64_t _inodeno_t;
+typedef uint64_t _inodeno_t;
+
struct inodeno_t {
_inodeno_t val;
inodeno_t() : val(0) {}
{
size_t operator()( const inodeno_t& x ) const
{
- static hash<__uint64_t> H;
+ static hash<uint64_t> H;
return H(x.val);
}
};
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
class CDirExport {
struct {
dirfrag_t dirfrag;
- long nden; // num dentries (including null ones)
+ uint32_t nden; // num dentries (including null ones)
version_t version;
version_t committed_version;
- unsigned state;
+ uint32_t state;
meta_load_t popularity_justme;
meta_load_t popularity_curdom;
- int dir_rep;
+ int32_t dir_rep;
} st;
map<int,int> replicas;
set<int> rep_by;
}
dirfrag_t get_dirfrag() { return st.dirfrag; }
- __uint64_t get_nden() { return st.nden; }
+ uint32_t get_nden() { return st.nden; }
void update_dir(CDir *dir) {
assert(dir->dirfrag() == st.dirfrag);
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
void CInode::close_dirfrag(frag_t fg)
{
+ dout(14) << "close_dirfrag " << fg << endl;
assert(dirfrags.count(fg));
- dirfrags[fg]->remove_null_dentries();
+ CDir *dir = dirfrags[fg];
+ dir->remove_null_dentries();
- assert(dirfrags[fg]->get_num_ref() == 0);
- delete dirfrags[fg];
+ // clear dirty flag
+ if (dir->is_dirty())
+ dir->mark_clean();
+
+ // dump any remaining dentries, for debugging purposes
+ for (map<string,CDentry*>::iterator p = dir->items.begin();
+ p != dir->items.end();
+ ++p)
+ dout(14) << "close_dirfrag LEFTOVER dn " << *p->second << endl;
+
+ assert(dir->get_num_ref() == 0);
+ delete dir;
dirfrags.erase(fg);
}
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
public:
FileLock(MDSCacheObject *o, int t, int wo) : SimpleLock(o, t, wo) { }
- char get_replica_state() {
+ int get_replica_state() {
switch (state) {
case LOCK_LOCK:
case LOCK_GLOCKM:
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
// =======================================================================
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// file i/o -----------------------------------------
-__uint64_t Locker::issue_file_data_version(CInode *in)
+version_t Locker::issue_file_data_version(CInode *in)
{
dout(7) << "issue_file_data_version on " << *in << endl;
return in->inode.file_data_version;
for (map<int, Capability>::iterator it = in->client_caps.begin();
it != in->client_caps.end();
it++) {
- if (it->second.issued() != (it->second.wanted() & allowed)) {
+ if (it->second.pending() != (it->second.wanted() & allowed)) {
// issue
nissued++;
lock->set_state(LOCK_SYNC);
// waiters?
- lock->finish_waiters(SimpleLock::WAIT_STABLE);
+ lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE);
}
void Locker::simple_lock(SimpleLock *lock)
return true;
}
- // can't read, and replicated.
- assert(!lock->get_parent()->is_auth());
-
// wait!
dout(7) << "simple_rdlock_start waiting on " << *lock << " on " << *lock->get_parent() << endl;
lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mdr));
// stable.
assert(lock->is_stable());
- if (in->is_auth()) {
- // [auth]
+ if (in->is_auth() &&
+ !lock->is_xlocked()) {
+ // [auth]
+ // and not xlocked!
int wanted = in->get_caps_wanted();
bool loner = (in->client_caps.size() == 1) && in->mds_caps_wanted.empty();
dout(7) << "file_eval wanted=" << cap_string(wanted)
lock->get_state() != LOCK_LOCK) {
file_lock(lock);
}
-
- } else {
- // replica
- // recall? check wiaters? XXX
}
}
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
int num_wrlock;
public:
- ScatterLock(MDSCacheObject *o, int t, int wo) : SimpleLock(o, t, wo) {}
+ ScatterLock(MDSCacheObject *o, int t, int wo) : SimpleLock(o, t, wo),
+ num_wrlock(0) {}
- char get_replica_state() {
+ int get_replica_state() {
switch (state) {
case LOCK_SYNC:
case LOCK_GSYNCS:
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// lock the path
set<SimpleLock*> rdlocks, empty;
- for (unsigned i=0; i<trace.size(); i++)
+ for (int i=0; i<(int)trace.size(); i++)
rdlocks.insert(&trace[i]->lock);
if (!mds->locker->acquire_locks(mdr, rdlocks, empty, empty))
// -- lock --
set<SimpleLock*> rdlocks, wrlocks, xlocks;
- for (unsigned i=0; i<trace.size(); i++)
+ for (int i=0; i<(int)trace.size(); i++)
rdlocks.insert(&trace[i]->lock);
if (dn->is_null()) {
xlocks.insert(&dn->lock); // new dn, xlock
if (ref->is_dir() &&
mask & INODE_MASK_MTIME) rdlocks.insert(&ref->dirlock);
- mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks);
+ if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
+ return;
// reply
dout(10) << "reply to stat on " << *req << endl;
// create lock lists
set<SimpleLock*> rdlocks, wrlocks, xlocks;
- for (unsigned i=0; i<linktrace.size(); i++)
+ for (int i=0; i<(int)linktrace.size(); i++)
rdlocks.insert(&linktrace[i]->lock);
xlocks.insert(&dn->lock);
wrlocks.insert(&dn->dir->inode->dirlock);
- for (unsigned i=0; i<targettrace.size(); i++)
+ for (int i=0; i<(int)targettrace.size(); i++)
rdlocks.insert(&targettrace[i]->lock);
xlocks.insert(&targeti->linklock);
// lock
set<SimpleLock*> rdlocks, wrlocks, xlocks;
- for (unsigned i=0; i<trace.size()-1; i++)
+ for (int i=0; i<(int)trace.size()-1; i++)
rdlocks.insert(&trace[i]->lock);
xlocks.insert(&dn->lock);
wrlocks.insert(&dn->dir->inode->dirlock);
set<SimpleLock*> rdlocks, wrlocks, xlocks;
// rdlock sourcedir path, xlock src dentry
- for (unsigned i=0; i<srctrace.size()-1; i++)
+ for (int i=0; i<(int)srctrace.size()-1; i++)
rdlocks.insert(&srctrace[i]->lock);
xlocks.insert(&srcdn->lock);
wrlocks.insert(&srcdn->dir->inode->dirlock);
// rdlock destdir path, xlock dest dentry
- for (unsigned i=0; i<desttrace.size(); i++)
+ for (int i=0; i<(int)desttrace.size(); i++)
rdlocks.insert(&desttrace[i]->lock);
xlocks.insert(&destdn->lock);
wrlocks.insert(&destdn->dir->inode->dirlock);
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// auth rep
#define LOCK_SYNC 1 // AR R . R .
#define LOCK_LOCK 2 // AR R W . .
-#define LOCK_GLOCKR 3 // AR R . . .
+#define LOCK_GLOCKR -3 // AR R . . .
inline const char *get_simplelock_state_name(int n) {
switch (n) {
int wait_offset;
// lock state
- char state;
+ int state;
set<__int32_t> gather_set; // auth
// local state
parent->encode_lock_state(type, bl);
}
void finish_waiters(int mask, int r=0) {
- parent->finish_waiting(mask < wait_offset, r);
+ parent->finish_waiting(mask << wait_offset, r);
}
void add_waiter(int mask, Context *c) {
- parent->add_waiter(mask < wait_offset, c);
+ parent->add_waiter(mask << wait_offset, c);
}
bool is_waiter_for(int mask) {
- return parent->is_waiter_for(mask < wait_offset);
+ return parent->is_waiter_for(mask << wait_offset);
}
// state
- char get_state() { return state; }
- char set_state(char s) {
+ int get_state() { return state; }
+ int set_state(int s) {
state = s;
assert(!is_stable() || gather_set.size() == 0); // gather should be empty in stable states.
return s;
// simplelock specifics
- char get_replica_state() {
+ int get_replica_state() {
switch (state) {
case LOCK_LOCK:
case LOCK_GLOCKR:
bool can_xlock(MDRequest *mdr) {
if (!parent->is_auth()) return false;
if (state != LOCK_LOCK) return false;
- if (mdr && xlock_by == mdr) return true;
+ if (xlock_by == 0 ||
+ (mdr && xlock_by == mdr)) return true;
return false;
}
bool can_xlock_soon() {
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#ifndef __MDSTYPES_H
#define __MDSTYPES_H
namespace __gnu_cxx {
template<> struct hash<metareqid_t> {
size_t operator()(const metareqid_t &r) const {
- hash<__uint64_t> H;
+ hash<uint64_t> H;
return H(r.client) ^ H(r.tid);
}
};
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
int result; // error code
unsigned char file_caps; // for open
long file_caps_seq;
- __uint64_t file_data_version; // for client buffercache consistency
+ uint64_t file_data_version; // for client buffercache consistency
int _num_trace_in;
int _dir_size;
unsigned char get_file_caps() { return st.file_caps; }
long get_file_caps_seq() { return st.file_caps_seq; }
- __uint64_t get_file_data_version() { return st.file_data_version; }
+ uint64_t get_file_data_version() { return st.file_data_version; }
void set_result(int r) { st.result = r; }
void set_file_caps(unsigned char c) { st.file_caps = c; }
void set_file_caps_seq(long s) { st.file_caps_seq = s; }
- void set_file_data_version(__uint64_t v) { st.file_data_version = v; }
+ void set_file_data_version(uint64_t v) { st.file_data_version = v; }
MClientReply() {};
MClientReply(MClientRequest *req, int result = 0) :
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -- types --
struct inode_strong {
- __int32_t caps_wanted;
- __int32_t nonce;
- __int32_t authlock;
- __int32_t linklock;
- __int32_t dirfragtreelock;
- __int32_t filelock;
+ int32_t caps_wanted;
+ int32_t nonce;
+ int32_t authlock;
+ int32_t linklock;
+ int32_t dirfragtreelock;
+ int32_t filelock;
inode_strong() {}
inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0) :
caps_wanted(cw),
};
struct dirfrag_strong {
- __int32_t nonce;
+ int32_t nonce;
dirfrag_strong() {}
dirfrag_strong(int n) : nonce(n) {}
};
struct dn_strong {
- __int32_t nonce;
- __int32_t lock;
+ int32_t nonce;
+ int32_t lock;
dn_strong() {}
dn_strong(int n, int l) : nonce(n), lock(l) {}
};
// -- data --
- __int32_t op;
+ int32_t op;
set<inodeno_t> weak_inodes;
map<inodeno_t, inode_strong> strong_inodes;
::_encode(weak_inodes, payload);
::_encode(strong_inodes, payload);
- __uint32_t nfull = full_inodes.size();
+ uint32_t nfull = full_inodes.size();
::_encode(nfull, payload);
for (list<inode_full>::iterator p = full_inodes.begin(); p != full_inodes.end(); ++p)
p->_encode(payload);
::_decode(weak_inodes, payload, off);
::_decode(strong_inodes, payload, off);
- __uint32_t nfull;
+ uint32_t nfull;
::_decode(nfull, payload, off);
for (unsigned i=0; i<nfull; i++)
full_inodes.push_back(inode_full(payload, off));
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// op types
const static int OP_COLLECT = 1; // proposer: propose round
const static int OP_LAST = 2; // voter: accept proposed round
- const static int OP_OLDROUND = 3; // voter: notify proposer he proposed an old round
const static int OP_BEGIN = 4; // proposer: value proposed for this round
const static int OP_ACCEPT = 5; // voter: accept propsed value
- const static int OP_SUCCESS = 7; // proposer: notify learners of agreed value
- const static int OP_ACK = 8; // learner: notify proposer that new value has been saved
+ const static int OP_COMMIT = 7; // proposer: notify learners of agreed value
+ const static char *get_opname(int op) {
+ switch (op) {
+ case OP_COLLECT: return "collect";
+ case OP_LAST: return "last";
+ case OP_BEGIN: return "begin";
+ case OP_ACCEPT: return "accept";
+ case OP_COMMIT: return "commit";
+ default: assert(0); return 0;
+ }
+ }
+ // which state machine?
int op;
int machine_id;
- version_t proposal;
- version_t n;
- bufferlist value;
+
+ version_t last_committed; // i've committed to
+ version_t pn_from; // i promise to accept after
+ version_t pn; // with with proposal
+ version_t old_accepted_pn; // previous pn, if we are a LAST with an uncommitted value
+
+ map<version_t,bufferlist> values;
MMonPaxos() : Message(MSG_MON_PAXOS) {}
- MMonPaxos(int o, int mid,
- version_t pn, version_t v) : Message(MSG_MON_PAXOS),
- op(o), machine_id(mid),
- proposal(pn), n(v) {}
- MMonPaxos(int o, int mid,
- version_t pn, version_t v,
- bufferlist& b) : Message(MSG_MON_PAXOS),
- op(o), machine_id(mid),
- proposal(pn), n(v),
- value(b) {}
+ MMonPaxos(int o, int mid) : Message(MSG_MON_PAXOS),
+ op(o), machine_id(mid),
+ last_committed(0), pn_from(0), pn(0), old_accepted_pn(0) { }
virtual char *get_type_name() { return "paxos"; }
void print(ostream& out) {
- out << "paxos(op " << op
- << ", machine " << machine_id
- << ", proposal " << proposal
- << ", state " << n
- << ", " << value.length() << " bytes)";
+ out << "paxos(m" << machine_id
+ << " " << get_opname(op) << " lc " << last_committed
+ << " pn " << pn << " opn " << old_accepted_pn
+ << ")";
}
void encode_payload() {
- payload.append((char*)&op, sizeof(op));
- payload.append((char*)&machine_id, sizeof(machine_id));
- payload.append((char*)&proposal, sizeof(proposal));
- payload.append((char*)&n, sizeof(n));
- ::_encode(value, payload);
+ ::_encode(op, payload);
+ ::_encode(machine_id, payload);
+ ::_encode(last_committed, payload);
+ ::_encode(pn_from, payload);
+ ::_encode(pn, payload);
+ ::_encode(old_accepted_pn, payload);
+ ::_encode(values, payload);
}
void decode_payload() {
int off = 0;
- payload.copy(off, sizeof(op), (char*)&op);
- off += sizeof(op);
- payload.copy(off, sizeof(machine_id), (char*)&machine_id);
- off += sizeof(machine_id);
- payload.copy(off, sizeof(proposal), (char*)&proposal);
- off += sizeof(proposal);
- payload.copy(off, sizeof(n), (char*)&n);
- off += sizeof(n);
- ::_decode(value, payload, off);
+ ::_decode(op, payload, off);
+ ::_decode(machine_id, payload, off);
+ ::_decode(last_committed, payload, off);
+ ::_decode(pn_from, payload, off);
+ ::_decode(pn, payload, off);
+ ::_decode(old_accepted_pn, payload, off);
+ ::_decode(values, payload, off);
}
};
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
class MOSDPGPeer : public Message {
- __uint64_t map_version;
+ uint64_t map_version;
list<pg_t> pg_list;
bool complete;
public:
- __uint64_t get_version() { return map_version; }
+ uint64_t get_version() { return map_version; }
list<pg_t>& get_pg_list() { return pg_list; }
bool get_complete() { return complete; }
MOSDPGPeer() {}
- MOSDPGPeer(__uint64_t v, list<pg_t>& l, bool c=false) :
+ MOSDPGPeer(uint64_t v, list<pg_t>& l, bool c=false) :
Message(MSG_OSD_PG_PEER) {
this->map_version = v;
this->complete = c;
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
#include "osd/OSD.h"
class MOSDPGPeerAck : public Message {
- __uint64_t map_version;
+ version_t map_version;
public:
list<pg_t> pg_dne; // pg dne
map<pg_t, PGReplicaInfo > pg_state; // state, lists, etc.
- __uint64_t get_version() { return map_version; }
+ version_t get_version() { return map_version; }
MOSDPGPeerAck() {}
- MOSDPGPeerAck(__uint64_t v) :
+ MOSDPGPeerAck(version_t v) :
Message(MSG_OSD_PG_PEERACK) {
this->map_version = v;
}
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
class MOSDPGPeerRequest : public Message {
- __uint64_t map_version;
+ version_t map_version;
list<repgroup_t> pg_list;
public:
- __uint64_t get_version() { return map_version; }
+ version_t get_version() { return map_version; }
list<repgroup_t>& get_pg_list() { return pg_list; }
MOSDPGPeerRequest() {}
- MOSDPGPeerRequest(__uint64_t v, list<repgroup_t>& l) :
+ MOSDPGPeerRequest(version_t v, list<repgroup_t>& l) :
Message(MSG_OSD_PG_PEERREQUEST) {
this->map_version = v;
pg_list.splice(pg_list.begin(), l);
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
int from = m->get_source().num();
if (from > whoami) {
- // wait, i should win!
- if (!electing_me)
- start();
+ if (leader_acked >= 0 && // we already acked someone
+ leader_acked < from) { // who would win over them
+ dout(5) << "no, we already acked " << leader_acked << endl;
+ } else {
+ // wait, i should win!
+ if (!electing_me)
+ start();
+ }
} else {
// they would win over me
if (leader_acked < 0 || // haven't acked anyone yet, or
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// ---------------------------------
+
+// PHASE 1
+
// proposer
-void Paxos::propose(version_t v, bufferlist& value)
+
+void Paxos::collect(version_t oldpn)
{
-//todo high rf
+ // reset the number of lasts received
+ accepted_pn = get_new_proposal_number(MAX(accepted_pn, oldpn));
+ accepted_pn_from = last_committed;
+ num_last = 1;
+ old_accepted_pn = 0;
+ old_accepted_value.clear();
+
+ dout(10) << "collect with pn " << accepted_pn << endl;
+
+ // send collect
+ for (int i=0; i<mon->monmap->num_mon; ++i) {
+ if (i == whoami) continue;
+
+ MMonPaxos *collect = new MMonPaxos(MMonPaxos::OP_COLLECT, machine_id);
+ collect->last_committed = last_committed;
+ collect->pn = accepted_pn;
+ mon->messenger->send_message(collect, mon->monmap->get_inst(i));
+ }
}
+
+void Paxos::handle_collect(MMonPaxos *collect)
+{
+ dout(10) << "handle_collect " << *collect << endl;
+
+ // reply
+ MMonPaxos *last = new MMonPaxos(MMonPaxos::OP_LAST, machine_id);
+ last->last_committed = last_committed;
-void Paxos::handle_last(MMonPaxos *m)
+ // do we have an accepted but uncommitted value?
+ // (it'll be at last_committed+1)
+ bufferlist bl;
+ if (mon->store->exists_bl_sn(machine_name, last_committed+1)) {
+ mon->store->get_bl_sn(bl, machine_name, last_committed+1);
+ assert(bl.length() > 0);
+ dout(10) << "sharing our accepted but uncommitted value for " << last_committed+1 << endl;
+ last->values[last_committed+1] = bl;
+ last->old_accepted_pn = accepted_pn;
+ }
+
+ // can we accept this pn?
+ if (collect->pn > accepted_pn) {
+ // ok, accept it
+ accepted_pn = collect->pn;
+ accepted_pn_from = collect->pn_from;
+ dout(10) << "accepting pn " << accepted_pn << " from " << accepted_pn_from << endl;
+ } else {
+ // don't accept!
+ dout(10) << "NOT accepting pn " << collect->pn << " from " << collect->pn_from
+ << ", we already accepted " << accepted_pn << " from " << accepted_pn_from
+ << endl;
+ }
+ last->pn = accepted_pn;
+ last->pn_from = accepted_pn_from;
+
+ // and share whatever data we have
+ for (version_t v = collect->last_committed;
+ v <= last_committed;
+ v++) {
+ if (mon->store->exists_bl_sn(machine_name, v)) {
+ mon->store->get_bl_sn(last->values[v], machine_name, v);
+ dout(10) << " sharing " << v << " "
+ << last->values[v].length() << " bytes" << endl;
+ }
+ }
+
+ // send reply
+ mon->messenger->send_message(last, collect->get_source_inst());
+ delete collect;
+}
+
+
+void Paxos::handle_last(MMonPaxos *last)
+{
+ dout(10) << "handle_last " << *last << endl;
+
+ // share committed values?
+ if (last->last_committed < last_committed) {
+ // share committed values
+ dout(10) << "sending commit to " << last->get_source() << endl;
+ MMonPaxos *commit = new MMonPaxos(MMonPaxos::OP_COMMIT, machine_id);
+ for (version_t v = last->last_committed;
+ v <= last_committed;
+ v++) {
+ mon->store->get_bl_sn(commit->values[v], machine_name, v);
+ dout(10) << "sharing " << v << " "
+ << commit->values[v].length() << " bytes" << endl;
+ }
+ mon->messenger->send_message(commit, last->get_source_inst());
+ }
+
+ // did we receive committed value?
+ if (last->last_committed > last_committed) {
+ for (version_t v = last_committed;
+ v <= last->last_committed;
+ v++) {
+ mon->store->put_bl_sn(last->values[v], machine_name, v);
+ dout(10) << "committing " << v << " "
+ << last->values[v].length() << " bytes" << endl;
+ }
+ last_committed = last->last_committed;
+ mon->store->put_int(last_committed, machine_name, "last_commtted");
+ dout(10) << "last_committed now " << last_committed << endl;
+ }
+
+ // do they accept your pn?
+ if (last->old_accepted_pn > accepted_pn) {
+ dout(10) << "uh oh, they have a higher pn than us. pick a new one." << endl;
+ collect(last->old_accepted_pn);
+ } else {
+ // they accepted our pn. great.
+ num_last++;
+ dout(10) << "great, they accepted our pn, we now have " << num_last << endl;
+
+ // did this person send back an accepted but uncommitted value?
+ if (last->old_accepted_pn &&
+ last->old_accepted_pn > old_accepted_pn) {
+ version_t v = last->last_committed+1;
+ dout(10) << "we learned an old value for " << v << " pn " << last->old_accepted_pn;
+ old_accepted_pn = last->old_accepted_pn;
+ old_accepted_value = last->values[v];
+ }
+
+ // do we have a majority?
+ if (num_last == mon->monmap->num_mon/2+1) {
+ // do this once.
+
+ // did we learn an old value?
+ if (old_accepted_value.length()) {
+ dout(10) << "begin on old learned value" << endl;
+ begin(old_accepted_value);
+ }
+ }
+ }
+
+ delete last;
+}
+
+
+void Paxos::begin(bufferlist& v)
{
-//todo high rf
- dout(10) << "handle_last " << *m << endl;
- delete m;
+ dout(10) << "begin for " << last_committed+1 << " "
+ << new_value.length() << " bytes"
+ << endl;
+
+ // we must already have a majority for this to work.
+ assert(num_last > mon->monmap->num_mon/2);
+
+ // and no value, yet.
+ assert(new_value.length() == 0);
+
+ // accept it ourselves
+ num_accepted = 1;
+ new_value = v;
+ mon->store->put_bl_sn(new_value, machine_name, last_committed+1);
+
+ // ask others to accept it to!
+ for (int i=0; i<mon->monmap->num_mon; ++i) {
+ if (i == whoami) continue;
+
+ dout(10) << " sending begin to mon" << i << endl;
+ MMonPaxos *begin = new MMonPaxos(MMonPaxos::OP_BEGIN, machine_id);
+ begin->values[last_committed+1] = new_value;
+ begin->pn = accepted_pn;
+
+ mon->messenger->send_message(begin, mon->monmap->get_inst(i));
+ }
}
-void Paxos::handle_accept(MMonPaxos *m)
+void Paxos::handle_begin(MMonPaxos *begin)
{
-//todo high rf
- dout(10) << "handle_accept " << *m << endl;
- delete m;
+ dout(10) << "handle_begin " << *begin << endl;
+
+ // can we accept this?
+ if (begin->pn != accepted_pn) {
+ dout(10) << " we accepted a higher pn " << accepted_pn << ", ignoring" << endl;
+ delete begin;
+ return;
+ }
+
+ // yes.
+ version_t v = last_committed+1;
+ dout(10) << "accepting value for " << v << " pn " << accepted_pn << endl;
+ mon->store->put_bl_sn(begin->values[v], machine_name, v);
+ // reply
+ MMonPaxos *accept = new MMonPaxos(MMonPaxos::OP_ACCEPT, machine_id);
+ accept->pn = accepted_pn;
+ accept->last_committed = last_committed;
+ mon->messenger->send_message(accept, begin->get_source_inst());
+
+ delete begin;
}
-void Paxos::handle_ack(MMonPaxos *m)
+
+void Paxos::handle_accept(MMonPaxos *accept)
{
-//todo high rf
- dout(10) << "handle_ack " << *m << endl;
- delete m;
+ dout(10) << "handle_accept " << *accept << endl;
+
+ if (accept->pn != accepted_pn) {
+ // we accepted a higher pn, from some other leader
+ dout(10) << " we accepted a higher pn " << accepted_pn << ", ignoring" << endl;
+ delete accept;
+ return;
+ }
+ if (accept->last_committed != last_committed) {
+ dout(10) << " this is from an old round that's already committed, ignoring" << endl;
+ delete accept;
+ return;
+ }
+
+ num_accepted++;
+ dout(10) << "now " << num_accepted << " have accepted" << endl;
+
+ // new majority?
+ if (num_accepted == mon->monmap->num_mon/2+1) {
+ // yay, commit!
+ dout(10) << "we got a majority, committing too" << endl;
+ commit();
+ }
+
}
-void Paxos::handle_old_round(MMonPaxos *m)
+void Paxos::commit()
{
-//todo high rf
- dout(10) << "handle_old_round " << *m << endl;
- delete m;
+ dout(10) << "commit " << last_committed+1 << endl;
+
+ // commit locally
+ last_committed++;
+ mon->store->put_int(last_committed, machine_name, "last_committed");
+
+ // tell everyone
+ for (int i=0; i<mon->monmap->num_mon; ++i) {
+ if (i == whoami) continue;
+
+ dout(10) << " sending commit to mon" << i << endl;
+ MMonPaxos *commit = new MMonPaxos(MMonPaxos::OP_COMMIT, machine_id);
+ commit->values[last_committed] = new_value;
+ commit->pn = accepted_pn;
+
+ mon->messenger->send_message(commit, mon->monmap->get_inst(i));
+ }
+
+ // get ready for a new round.
+ new_value.clear();
+
}
+
+
+void Paxos::handle_commit(MMonPaxos *commit)
+{
+ dout(10) << "handle_commit on " << commit->last_committed << endl;
+
+ // commit locally.
+ last_committed = commit->last_committed;
+ mon->store->put_bl_sn(commit->values[last_committed], machine_name, last_committed);
+ mon->store->put_int(last_committed, machine_name, "last_committed");
+ delete commit;
+}
+
+
/*
* return a globally unique, monotonically increasing proposal number
}
-// ---------------------------------
-// accepter
-void Paxos::handle_collect(MMonPaxos *m)
-{
-//todo high rf
- // ...
-
- delete m;
-}
-
-
-
-
-// ---------------------------------
-// learner
-void Paxos::handle_success(MMonPaxos *m)
-{
- //todo high rf
- delete m;
-}
-
-void Paxos::handle_begin(MMonPaxos *m)
-{
- //todo high rf
- delete m;
-}
-
-// ---------------------------------
-
void Paxos::leader_start()
{
- dout(10) << "i am the leader" << endl;
-
- // .. do something else too
- version_t pn = get_new_proposal_number();
- for (int i=0; i<mon->monmap->num_mon; ++i) {
- if (i == whoami) continue;
- // todo high rf I pass the pn twice... what is the last parameter for?
- mon->messenger->send_message(new MMonPaxos(MMonPaxos::OP_COLLECT, whoami, pn, pn),
- mon->monmap->get_inst(i));
- }
+ dout(10) << "leader_start -- i am the leader, start paxos" << endl;
+ collect(0);
}
-
void Paxos::dispatch(Message *m)
{
switch (m->get_type()) {
handle_last(pm);
break;
- case MMonPaxos::OP_OLDROUND:
- handle_old_round(pm);
- break;
-
case MMonPaxos::OP_BEGIN:
handle_begin(pm);
break;
handle_accept(pm);
break;
- case MMonPaxos::OP_SUCCESS:
- handle_success(pm);
- break;
-
- case MMonPaxos::OP_ACK:
- handle_ack(pm);
+ case MMonPaxos::OP_COMMIT:
+ handle_commit(pm);
break;
-
- default:
- assert(0);
+
+ default:
+ assert(0);
}
}
break;
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
*
*/
+/*
+time---->
+
+cccccccccccccccccca????????????????????????????????????????
+cccccccccccccccccca????????????????????????????????????????
+cccccccccccccccccca???????????????????????????????????????? leader
+cccccccccccccccccc?????????????????????????????????????????
+ccccc??????????????????????????????????????????????????????
+
+last_committed
+
+pn_from
+pn
+
+a 12v
+b 12v
+c 14v
+d
+e 12v
+
+
+*/
#ifndef __MON_PAXOS_H
#define __MON_PAXOS_H
// my state machine info
int machine_id;
const char *machine_name;
- map<version_t, bufferlist> accepted_values;
- map<version_t, int> accepted_proposal_number;
- // proposer
- void propose(version_t v, bufferlist& value);
+ // phase 1
+ version_t last_committed;
+ version_t accepted_pn;
+ version_t accepted_pn_from;
+ // results from our last replies
+ int num_last;
+ version_t old_accepted_pn;
+ bufferlist old_accepted_value;
+
+ // phase 2
+ bufferlist new_value;
+ int num_accepted;
+
+ void collect(version_t oldpn);
+ void handle_collect(MMonPaxos*);
void handle_last(MMonPaxos*);
+ void begin(bufferlist& value);
+ void handle_begin(MMonPaxos*);
void handle_accept(MMonPaxos*);
- void handle_ack(MMonPaxos*);
- void handle_old_round(MMonPaxos*);
-
- version_t get_new_proposal_number(version_t gt=0);
-
- // accepter
- void handle_collect(MMonPaxos*);
+ void commit();
+ void handle_commit(MMonPaxos*);
- // learner
- void handle_success(MMonPaxos*);
- void handle_begin(MMonPaxos*);
+ version_t get_new_proposal_number(version_t gt=0);
-
public:
Paxos(Monitor *m, int w,
int mid,const char *mnm) : mon(m), whoami(w),
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#include <cassert>
#include <iostream>
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
void Rank::EntityMessenger::reset_myname(entity_name_t newname)
{
- entity_name_t oldname = get_myname();
- dout(10) << "reset_myname " << oldname << " to " << newname << endl;
-
- rank.local.erase(oldname);
- rank.local[newname] = this;
-
- _set_myname(newname);
+ rank.lock.Lock();
+ {
+ entity_name_t oldname = get_myname();
+ dout(10) << "reset_myname " << oldname << " to " << newname << endl;
+
+ rank.local.erase(oldname);
+ rank.local[newname] = this;
+
+ _set_myname(newname);
+ }
+ rank.lock.Unlock();
}
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* ipv4 for now.
*/
struct entity_addr_t {
- __uint8_t ipq[4];
- __uint32_t port;
- __uint32_t nonce; // bind time, or pid, or something unique!
+ uint8_t ipq[4];
+ uint32_t port;
+ uint32_t nonce; // bind time, or pid, or something unique!
entity_addr_t() : port(0), nonce(0) {
ipq[0] = ipq[1] = ipq[2] = ipq[3] = 0;
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#include <mpi.h>
#include "NewMessenger.h"
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#include "tcp.h"
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#ifndef __TCP_H
#define __TCP_H
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/* OSBDB.cc -- ObjectStore on top of Berkeley DB.
Copyright (C) 2007 Casey Marshall <csm@soe.ucsc.edu>
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/* OSBDB.h -- ObjectStore on Berkeley DB. -*- c++ -*-
Copyright (C) 2007 Casey Marshall <csm@soe.ucsc.edu>
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#include "include/types.h"
bool start_debug = false;
-__uint64_t Ager::age_fill(float pc, utime_t until) {
+uint64_t Ager::age_fill(float pc, utime_t until) {
int max = 1024*1024;
bufferptr bp(max);
bp.zero();
bufferlist bl;
bl.push_back(bp);
- __uint64_t wrote = 0;
+ uint64_t wrote = 0;
while (1) {
if (g_clock.now() > until) break;
g_conf.ebofs_verify = false;
}
-void pfrag(__uint64_t written, ObjectStore::FragmentationStat &st)
+void pfrag(uint64_t written, ObjectStore::FragmentationStat &st)
{
cout << "#gb wr\ttotal\tn x\tavg x\tavg per\tavg j\tfree\tn fr\tavg fr\tnum<2\tsum<2\tnum<4\tsum<4\t..."
<< endl;
<< "\t" << st.avg_free_extent;
int n = st.num_extent;
- for (__uint64_t i=1; i <= 30; i += 1) {
+ for (uint64_t i=1; i <= 30; i += 1) {
cout << "\t" << st.extent_dist[i];
cout << "\t" << st.extent_dist_sum[i];
//cout << "\ta " << (st.extent_dist[i] ? (st.extent_dist_sum[i] / st.extent_dist[i]):0);
ObjectStore::FragmentationStat st;
- __uint64_t wrote = 0;
+ uint64_t wrote = 0;
for (int c=1; c<=count; c++) {
if (g_clock.now() > until) break;
//if (c == 7) start_debug = true;
dout(1) << "#age " << c << "/" << count << " filling to " << high_water << endl;
- __uint64_t w = age_fill(high_water, until);
+ uint64_t w = age_fill(high_water, until);
//dout(1) << "age wrote " << w << endl;
wrote += w;
//store->sync();
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#ifndef __AGER_H
#define __AGER_H
bool did_distn;
void age_empty(float pc);
- __uint64_t age_fill(float pc, utime_t until);
+ uint64_t age_fill(float pc, utime_t until);
ssize_t age_pick_size();
object_t age_get_oid();
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
assert(sizeof(oid) == 16);
#ifdef __LP64__
sprintf(s, "%s/objects/%02lx/%016lx.%016lx", basedir.c_str(), H(oid) & HASH_MASK,
- *((__uint64_t*)&oid),
- *(((__uint64_t*)&oid) + 1));
+ *((uint64_t*)&oid),
+ *(((uint64_t*)&oid) + 1));
#else
sprintf(s, "%s/objects/%02x/%016llx.%016llx", basedir.c_str(), H(oid) & HASH_MASK,
- *((__uint64_t*)&oid),
- *(((__uint64_t*)&oid) + 1));
+ *((uint64_t*)&oid),
+ *(((uint64_t*)&oid) + 1));
#endif
}
assert(sizeof(oid) == 16);
#ifdef __LP64__
sprintf(s, "%s/collections/%016lx/%016lx.%016lx", basedir.c_str(), cid,
- *((__uint64_t*)&oid),
- *(((__uint64_t*)&oid) + 1));
+ *((uint64_t*)&oid),
+ *(((uint64_t*)&oid) + 1));
#else
sprintf(s, "%s/collections/%016llx/%016llx.%016llx", basedir.c_str(), cid,
- *((__uint64_t*)&oid),
- *(((__uint64_t*)&oid) + 1));
+ *((uint64_t*)&oid),
+ *(((uint64_t*)&oid) + 1));
#endif
}
// parse
object_t o;
assert(sizeof(o) == 16);
- *(((__uint64_t*)&o) + 0) = strtoll(de->d_name, 0, 16);
+ *(((uint64_t*)&o) + 0) = strtoll(de->d_name, 0, 16);
assert(de->d_name[16] == '.');
- *(((__uint64_t*)&o) + 1) = strtoll(de->d_name+17, 0, 16);
+ *(((uint64_t*)&o) + 1) = strtoll(de->d_name+17, 0, 16);
dout(0) << " got " << o << " errno " << errno << " on " << de->d_name << endl;
if (errno) continue;
ls.push_back(o);
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#include "ObjectStore.h"
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
struct stat st;
int r = osd->store->stat(oid, &st);
if (r >= 0) {
- if (op->get_offset() + op->get_length() >= st.st_size) {
+ if (op->get_offset() + (off_t)op->get_length() >= (off_t)st.st_size) {
if (op->get_offset())
t.truncate(oid, op->get_length() + op->get_offset());
else
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// osd types
-typedef __uint64_t coll_t; // collection id
+typedef uint64_t coll_t; // collection id
// pg stuff
#define PG_INO 1
-typedef __uint16_t ps_t;
-typedef __uint8_t pruleset_t; // hmm what is this for? -sage
+typedef uint16_t ps_t;
+typedef uint8_t pruleset_t;
// crush rule ids
private:
union {
struct {
- /*
- int preferred:32; // 32
- unsigned type:3; // 3
- unsigned size:5; // 5
- unsigned ps:16; // 16
- unsigned ruleset:8; // 8
- */
- __int32_t preferred;
- __uint8_t type;
- __uint8_t size;
- __uint16_t ps;
+ int32_t preferred;
+ uint8_t type;
+ uint8_t size;
+ uint16_t ps;
} fields;
- __uint64_t val; // 64
+ uint64_t val; // 64
} u;
public:
//u.fields.ruleset = r;
assert(sizeof(u.fields) == sizeof(u.val));
}
- pg_t(__uint64_t v) { u.val = v; }
+ pg_t(uint64_t v) { u.val = v; }
int type() { return u.fields.type; }
bool is_rep() { return type() == TYPE_REP; }
int preferred() { return u.fields.preferred; } // hack: avoid negative.
/*
- pg_t operator=(__uint64_t v) { u.val = v; return *this; }
- pg_t operator&=(__uint64_t v) { u.val &= v; return *this; }
+ pg_t operator=(uint64_t v) { u.val = v; return *this; }
+ pg_t operator&=(uint64_t v) { u.val &= v; return *this; }
pg_t operator+=(pg_t o) { u.val += o.val; return *this; }
pg_t operator-=(pg_t o) { u.val -= o.val; return *this; }
pg_t operator++() { ++u.val; return *this; }
*/
- operator __uint64_t() const { return u.val; }
+ operator uint64_t() const { return u.val; }
object_t to_object() const { return object_t(PG_INO, u.val >> 32, u.val & 0xffffffff); }
};
inline ostream& operator<<(ostream& out, pg_t pg)
{
- //return out << hex << pg.val << dec;
-
if (pg.is_rep())
out << pg.size() << 'x';
else if (pg.is_raid4())
{
size_t operator()( const pg_t& x ) const
{
- static hash<__uint64_t> H;
+ static hash<uint64_t> H;
return H(x);
}
};
class OSDSuperblock {
public:
- const static __uint64_t MAGIC = 0xeb0f505dULL;
- __uint64_t magic;
- __uint64_t fsid; // unique fs id (random number)
+ const static uint64_t MAGIC = 0xeb0f505dULL;
+ uint64_t magic;
+ uint64_t fsid; // unique fs id (random number)
int whoami; // my role in this fs.
epoch_t current_epoch; // most recent epoch
epoch_t oldest_map, newest_map; // oldest/newest maps we have.
- OSDSuperblock(__uint64_t f=0, int w=0) :
+ OSDSuperblock(uint64_t f=0, int w=0) :
magic(MAGIC), fsid(f), whoami(w),
current_epoch(0), oldest_map(0), newest_map(0) {}
};
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
off_t left = len;
while (left > 0) {
// layout into objects
- off_t blockno = cur / inode.layout.stripe_unit;
- off_t stripeno = blockno / inode.layout.stripe_count;
- off_t stripepos = blockno % inode.layout.stripe_count;
- off_t objectsetno = stripeno / stripes_per_object;
- off_t objectno = objectsetno * inode.layout.stripe_count + stripepos;
+ off_t blockno = cur / inode.layout.stripe_unit; // which block
+ off_t stripeno = blockno / inode.layout.stripe_count; // which horizontal stripe (Y)
+ off_t stripepos = blockno % inode.layout.stripe_count; // which object in the object set (X)
+ off_t objectsetno = stripeno / stripes_per_object; // which object set
+ off_t objectno = objectsetno * inode.layout.stripe_count + stripepos; // object id
// find oid, extent
ObjectExtent *ex = 0;
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
(to be used on any osd in the proper replica group) */
/*object_t file_to_object(inodeno_t ino,
size_t _ono) {
- __uint64_t ono = _ono;
+ uint64_t ono = _ono;
assert(ino < (1ULL<<OID_INO_BITS)); // legal ino can't be too big
assert(ono < (1ULL<<OID_ONO_BITS));
return ono + (ino << OID_ONO_BITS);
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#include "msg/Messenger.h"
#include "ObjectCacher.h"
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#ifndef __OBJECTCACHER_H_
#define __OBJECTCACHER_H_
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#include "Objecter.h"
#include "osd/OSDMap.h"
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#ifndef __OBJECTER_H
#define __OBJECTER_H
use strict;
my $fn = shift @ARGV;
-my $f = `cat $fn`;
+my $old = `cat $fn`;
-my $header = '// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
+my $header = `cat doc/header.txt`;
-';
+# strip existing header
+my $new = $old;
+if ($new =~ /^(.*)\* Ceph - scalable distributed file system/s) {
+ my ($a,@b) = split(/\*\/\n/, $new);
+ $new = join("*/\n",@b);
+}
+$new = $header . $new;
-unless ($f =~ /Ceph - scalable distributed file system/) {
+if ($new ne $old) {
open(O, ">$fn.new");
- print O $header;
- print O $f;
+ print O $new;
close O;
+ system "diff $fn $fn.new";
rename "$fn.new", $fn;
+ #unlink "$fn.new";
+
}
--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+my $fn = shift @ARGV;
+my $old = `cat $fn`;
+my $header = `cat doc/modeline.txt`;
+
+# strip existing modeline
+my $new = $old;
+$new =~ s/^\/\/ \-\*\- ([^\n]+) \-\*\-([^\n]*)\n//s; # emacs
+$new =~ s/^\/\/ vim: ([^\n]*)\n//s; # vim;
+$new =~ s/^\/\/ \-\*\- ([^\n]+) \-\*\-([^\n]*)\n//s; # emacs
+$new =~ s/^\/\/ vim: ([^\n]*)\n//s; # vim;
+$new =~ s/^\/\/ \-\*\- ([^\n]+) \-\*\-([^\n]*)\n//s; # emacs
+$new =~ s/^\/\/ vim: ([^\n]*)\n//s; # vim;
+
+# add correct header
+$new = $header . $new;
+
+if ($new ne $old) {
+ print "$fn\n";
+ open(O, ">$fn.new");
+ print O $new;
+ close O;
+ system "diff $fn $fn.new";
+ rename "$fn.new", $fn;
+ #unlink "$fn.new";
+}
+