From eea7d84a6ab3460b3d75e19052aa2b02ce2c0dd0 Mon Sep 17 00:00:00 2001 From: sageweil Date: Wed, 17 Oct 2007 18:37:06 +0000 Subject: [PATCH] merged r1937:1957 from trunk into branches/sage/mds git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1958 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/mds/client/Client.cc | 6 +- branches/sage/mds/client/SyntheticClient.cc | 13 +- branches/sage/mds/config.cc | 93 +++++--- branches/sage/mds/config.h | 8 +- branches/sage/mds/crush2/Makefile | 26 +++ branches/sage/mds/crush2/buckets.c | 56 +++++ branches/sage/mds/crush2/buckets.h | 49 ++++ branches/sage/mds/crush2/crush.c | 236 ++++++++++++++++++++ branches/sage/mds/crush2/crush.h | 49 ++++ branches/sage/mds/crush2/hash.h | 80 +++++++ branches/sage/mds/crush2/types.h | 11 + branches/sage/mds/include/ceph_fs.h | 163 ++++++++++++++ branches/sage/mds/include/ceph_inttypes.h | 8 - branches/sage/mds/include/object.h | 22 -- branches/sage/mds/include/types.h | 85 +------ branches/sage/mds/jobs/runjobsample | 26 +++ branches/sage/mds/kernel/bufferlist.h | 74 ++++++ branches/sage/mds/kernel/ceph_fs.h | 79 ------- branches/sage/mds/kernel/inode.c | 4 - branches/sage/mds/kernel/kmsg.h | 51 +++++ branches/sage/mds/kernel/kmsgbits.h | 50 +++++ branches/sage/mds/kernel/mds_client.h | 42 ++++ branches/sage/mds/kernel/mdsmap.h | 4 - branches/sage/mds/kernel/monmap.h | 10 +- branches/sage/mds/kernel/osd_client.h | 18 ++ branches/sage/mds/kernel/super.h | 75 +++++++ branches/sage/mds/mds/ClientMap.cc | 2 +- branches/sage/mds/mds/IdAllocator.cc | 2 +- branches/sage/mds/mds/MDLog.cc | 6 +- branches/sage/mds/msg/Message.h | 4 +- branches/sage/mds/msg/ceph_msg_types.h | 49 ---- branches/sage/mds/msg/msg_types.h | 16 +- branches/sage/mds/osd/OSDMap.h | 18 +- branches/sage/mds/osd/PG.h | 1 - branches/sage/mds/osd/osd_types.h | 52 ++--- branches/sage/mds/osdc/Filer.cc | 25 +-- branches/sage/mds/osdc/Journaler.cc | 6 +- branches/sage/mds/osdc/Journaler.h | 2 +- branches/sage/mds/osdc/Objecter.cc | 19 +- branches/sage/mds/osdc/Objecter.h | 12 +- 40 files changed, 1163 insertions(+), 389 deletions(-) create mode 100644 branches/sage/mds/crush2/Makefile create mode 100644 branches/sage/mds/crush2/buckets.c create mode 100644 branches/sage/mds/crush2/buckets.h create mode 100644 branches/sage/mds/crush2/crush.c create mode 100644 branches/sage/mds/crush2/crush.h create mode 100644 branches/sage/mds/crush2/hash.h create mode 100644 branches/sage/mds/crush2/types.h create mode 100644 branches/sage/mds/include/ceph_fs.h delete mode 100644 branches/sage/mds/include/ceph_inttypes.h create mode 100644 branches/sage/mds/jobs/runjobsample create mode 100644 branches/sage/mds/kernel/bufferlist.h delete mode 100644 branches/sage/mds/kernel/ceph_fs.h create mode 100644 branches/sage/mds/kernel/kmsg.h create mode 100644 branches/sage/mds/kernel/kmsgbits.h create mode 100644 branches/sage/mds/kernel/mds_client.h create mode 100644 branches/sage/mds/kernel/osd_client.h create mode 100644 branches/sage/mds/kernel/super.h delete mode 100644 branches/sage/mds/msg/ceph_msg_types.h diff --git a/branches/sage/mds/client/Client.cc b/branches/sage/mds/client/Client.cc index 4a7e6baacaf5a..67c5af7101ed5 100644 --- a/branches/sage/mds/client/Client.cc +++ b/branches/sage/mds/client/Client.cc @@ -3847,21 +3847,21 @@ int Client::get_stripe_unit(int fd) { FileLayout layout; describe_layout(fd, &layout); - return layout.stripe_unit; + return layout.fl_stripe_unit; } int Client::get_stripe_width(int fd) { FileLayout layout; describe_layout(fd, &layout); - return layout.stripe_width(); + return ceph_file_layout_stripe_width(layout); } int Client::get_stripe_period(int fd) { FileLayout layout; describe_layout(fd, &layout); - return layout.period(); + return ceph_file_layout_period(layout); } int Client::enumerate_layout(int fd, list& result, diff --git a/branches/sage/mds/client/SyntheticClient.cc b/branches/sage/mds/client/SyntheticClient.cc index 931ea790625bb..1695631b8b8cb 100644 --- a/branches/sage/mds/client/SyntheticClient.cc +++ b/branches/sage/mds/client/SyntheticClient.cc @@ -1683,7 +1683,7 @@ int SyntheticClient::write_file(string& fn, int size, int wrsize) // size is i // = 128 bits (16 bytes) uint64_t *p = (uint64_t*)buf; while ((char*)p < buf + wrsize) { - *p = i*wrsize + (char*)p - buf; + *p = (uint64_t)i*(uint64_t)wrsize + (uint64_t)((char*)p - buf); p++; *p = client->get_nodeid(); p++; @@ -1729,11 +1729,12 @@ int SyntheticClient::read_file(string& fn, int size, int rdsize, bool ignoreprin // verify fingerprint int bad = 0; - int64_t *p = (int64_t*)buf; - int64_t readoff, readclient; + uint64_t *p = (uint64_t*)buf; + uint64_t readoff; + int64_t readclient; while ((char*)p + 32 < buf + rdsize) { readoff = *p; - int64_t wantoff = i*rdsize + (int64_t)((char*)p - buf); + uint64_t wantoff = (uint64_t)i*(uint64_t)rdsize + (uint64_t)((char*)p - buf); p++; readclient = *p; p++; @@ -1817,7 +1818,7 @@ int SyntheticClient::create_objects(int nobj, int osize, int inflight) if (time_to_stop()) break; object_t oid(0x1000, i); - ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.pg_size); + ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.fl_pg_size); if (i % inflight == 0) { dout(6) << "create_objects " << i << "/" << (nobj+1) << dendl; @@ -1919,7 +1920,7 @@ int SyntheticClient::object_rw(int nobj, int osize, int wrpc, } object_t oid(0x1000, o); - ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.pg_size); + ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.fl_pg_size); client->client_lock.Lock(); utime_t start = g_clock.now(); diff --git a/branches/sage/mds/config.cc b/branches/sage/mds/config.cc index f9dea43f893a1..f037fe728dfe4 100644 --- a/branches/sage/mds/config.cc +++ b/branches/sage/mds/config.cc @@ -41,10 +41,45 @@ ostream *_dout = &std::cout; ostream *_derr = &std::cerr; // file layouts -FileLayout g_OSD_FileLayout( 1<<22, 1, 1<<22, pg_t::TYPE_REP, 2 ); // 4M objects, 2x replication -FileLayout g_OSD_MDDirLayout( 1<<22, 1, 1<<22, pg_t::TYPE_REP, 2 ); // 4M objects, 2x replication. (a lie, just object layout policy) -FileLayout g_OSD_MDLogLayout( 1<<20, 1, 1<<20, pg_t::TYPE_REP, 2 ); // 1M objects -FileLayout g_OSD_MDAnchorTableLayout( 1<<20, 1, 1<<20, pg_t::TYPE_REP, 2 ); // 1M objects. (a lie, just object layout policy) +struct ceph_file_layout g_OSD_FileLayout = { + fl_stripe_unit: 1<<22, + fl_stripe_count: 1, + fl_object_size: 1<<22, + fl_object_stripe_unit: 0, + fl_pg_preferred: -1, + fl_pg_type: CEPH_PG_TYPE_REP, + fl_pg_size: 2 +}; + +struct ceph_file_layout g_OSD_MDDirLayout = { + fl_stripe_unit: 1<<22, + fl_stripe_count: 1, + fl_object_size: 1<<22, + fl_object_stripe_unit: 0, + fl_pg_preferred: -1, + fl_pg_type: CEPH_PG_TYPE_REP, + fl_pg_size: 2 +}; + +struct ceph_file_layout g_OSD_MDLogLayout = { + fl_stripe_unit: 1<<20, + fl_stripe_count: 1, + fl_object_size: 1<<20, + fl_object_stripe_unit: 0, + fl_pg_preferred: -1, + fl_pg_type: CEPH_PG_TYPE_REP, + fl_pg_size: 2 +}; + +struct ceph_file_layout g_OSD_MDAnchorTableLayout = { + fl_stripe_unit: 1<<20, + fl_stripe_count: 1, + fl_object_size: 1<<20, + fl_object_stripe_unit: 0, + fl_pg_preferred: -1, + fl_pg_type: CEPH_PG_TYPE_REP, + fl_pg_size: 2 +}; #include @@ -266,8 +301,8 @@ md_config_t g_conf = { osd_stat_refresh_interval: .5, osd_pg_bits: 4, // bits per osd - osd_object_layout: OBJECT_LAYOUT_HASHINO,//LINEAR,//HASHINO, - osd_pg_layout: PG_LAYOUT_CRUSH,//LINEAR,//CRUSH, + osd_object_layout: CEPH_OBJECT_LAYOUT_HASHINO,//LINEAR,//HASHINO, + osd_pg_layout: CEPH_PG_LAYOUT_CRUSH,//LINEAR,//CRUSH, osd_max_rep: 4, osd_min_raid_width: 4, osd_max_raid_width: 3, //6, @@ -896,18 +931,18 @@ void parse_config_options(std::vector& args) else if (strcmp(args[i], "--osd_object_layout") == 0) { i++; - if (strcmp(args[i], "linear") == 0) g_conf.osd_object_layout = OBJECT_LAYOUT_LINEAR; - else if (strcmp(args[i], "hashino") == 0) g_conf.osd_object_layout = OBJECT_LAYOUT_HASHINO; - else if (strcmp(args[i], "hash") == 0) g_conf.osd_object_layout = OBJECT_LAYOUT_HASH; + if (strcmp(args[i], "linear") == 0) g_conf.osd_object_layout = CEPH_OBJECT_LAYOUT_LINEAR; + else if (strcmp(args[i], "hashino") == 0) g_conf.osd_object_layout = CEPH_OBJECT_LAYOUT_HASHINO; + else if (strcmp(args[i], "hash") == 0) g_conf.osd_object_layout = CEPH_OBJECT_LAYOUT_HASH; else assert(0); } else if (strcmp(args[i], "--osd_pg_layout") == 0) { i++; - if (strcmp(args[i], "linear") == 0) g_conf.osd_pg_layout = PG_LAYOUT_LINEAR; - else if (strcmp(args[i], "hash") == 0) g_conf.osd_pg_layout = PG_LAYOUT_HASH; - else if (strcmp(args[i], "hybrid") == 0) g_conf.osd_pg_layout = PG_LAYOUT_HYBRID; - else if (strcmp(args[i], "crush") == 0) g_conf.osd_pg_layout = PG_LAYOUT_CRUSH; + if (strcmp(args[i], "linear") == 0) g_conf.osd_pg_layout = CEPH_PG_LAYOUT_LINEAR; + else if (strcmp(args[i], "hash") == 0) g_conf.osd_pg_layout = CEPH_PG_LAYOUT_HASH; + else if (strcmp(args[i], "hybrid") == 0) g_conf.osd_pg_layout = CEPH_PG_LAYOUT_HYBRID; + else if (strcmp(args[i], "crush") == 0) g_conf.osd_pg_layout = CEPH_PG_LAYOUT_CRUSH; else assert(0); } @@ -917,38 +952,38 @@ void parse_config_options(std::vector& args) g_conf.tick = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_unit") == 0) - g_OSD_FileLayout.stripe_unit = atoi(args[++i]); + g_OSD_FileLayout.fl_stripe_unit = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_count") == 0) - g_OSD_FileLayout.stripe_count = atoi(args[++i]); + g_OSD_FileLayout.fl_stripe_count = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_osize") == 0) - g_OSD_FileLayout.object_size = atoi(args[++i]); + g_OSD_FileLayout.fl_object_size = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_pg_type") == 0) - g_OSD_FileLayout.pg_type = atoi(args[++i]); + g_OSD_FileLayout.fl_pg_type = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_pg_size") == 0) - g_OSD_FileLayout.pg_size = atoi(args[++i]); + g_OSD_FileLayout.fl_pg_size = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_unit") == 0) - g_OSD_MDDirLayout.stripe_unit = atoi(args[++i]); + g_OSD_MDDirLayout.fl_stripe_unit = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_scount") == 0) - g_OSD_MDDirLayout.stripe_count = atoi(args[++i]); + g_OSD_MDDirLayout.fl_stripe_count = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_osize") == 0) - g_OSD_MDDirLayout.object_size = atoi(args[++i]); + g_OSD_MDDirLayout.fl_object_size = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_pg_type") == 0) - g_OSD_MDDirLayout.pg_type = atoi(args[++i]); + g_OSD_MDDirLayout.fl_pg_type = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_pg_size") == 0) - g_OSD_MDDirLayout.pg_size = atoi(args[++i]); + g_OSD_MDDirLayout.fl_pg_size = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_unit") == 0) - g_OSD_MDLogLayout.stripe_unit = atoi(args[++i]); + g_OSD_MDLogLayout.fl_stripe_unit = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_scount") == 0) - g_OSD_MDLogLayout.stripe_count = atoi(args[++i]); + g_OSD_MDLogLayout.fl_stripe_count = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_osize") == 0) - g_OSD_MDLogLayout.object_size = atoi(args[++i]); + g_OSD_MDLogLayout.fl_object_size = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_pg_type") == 0) - g_OSD_MDLogLayout.pg_type = atoi(args[++i]); + g_OSD_MDLogLayout.fl_pg_type = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_pg_size") == 0) { - g_OSD_MDLogLayout.pg_size = atoi(args[++i]); - if (!g_OSD_MDLogLayout.pg_size) + g_OSD_MDLogLayout.fl_pg_size = atoi(args[++i]); + if (!g_OSD_MDLogLayout.fl_pg_size) g_conf.mds_log = false; } diff --git a/branches/sage/mds/config.h b/branches/sage/mds/config.h index 3c56f6af20941..b5cdf6cbd586d 100644 --- a/branches/sage/mds/config.h +++ b/branches/sage/mds/config.h @@ -15,10 +15,10 @@ #ifndef __CONFIG_H #define __CONFIG_H -extern class FileLayout g_OSD_FileLayout; -extern class FileLayout g_OSD_MDDirLayout; -extern class FileLayout g_OSD_MDLogLayout; -extern class FileLayout g_OSD_MDAnchorTableLayout; +extern struct ceph_file_layout g_OSD_FileLayout; +extern struct ceph_file_layout g_OSD_MDDirLayout; +extern struct ceph_file_layout g_OSD_MDLogLayout; +extern struct ceph_file_layout g_OSD_MDAnchorTableLayout; #include #include diff --git a/branches/sage/mds/crush2/Makefile b/branches/sage/mds/crush2/Makefile new file mode 100644 index 0000000000000..7db40f789198e --- /dev/null +++ b/branches/sage/mds/crush2/Makefile @@ -0,0 +1,26 @@ + +CC = gcc +CFLAGS = -Wall +CFLAGS += -O3 -g +LD = ld +RM = rm + +all: depend libcrush.o + +clean: + rm -f *.o libcrush.o + +%.o: %.c + ${CC} ${CFLAGS} -c $< -o $@ + +libcrush.o: crush.o buckets.o + $(LD) -i -o $@ $^ + +.depend: + touch .depend + +depend: + $(RM) .depend + makedepend -f- -- $(CFLAGS) -- *.c > .depend 2>/dev/null + +include .depend diff --git a/branches/sage/mds/crush2/buckets.c b/branches/sage/mds/crush2/buckets.c new file mode 100644 index 0000000000000..2a2e170bbbb6c --- /dev/null +++ b/branches/sage/mds/crush2/buckets.c @@ -0,0 +1,56 @@ + +#include "hash.h" +#include "buckets.h" + +int +crush_bucket_uniform_choose(struct crush_bucket_uniform *bucket, int x, int r) +{ + unsigned o, p, s; + o = crush_hash32_2(x, bucket->h.id); + p = bucket->primes[crush_hash32_2(bucket->h.id, x) % bucket->h.size]; + s = (x + o + (r+1)*p) % bucket->h.size; + return bucket->h.items[s]; +} + +int +crush_bucket_list_choose(struct crush_bucket_list *bucket, int x, int r) +{ + int i; + __u64 w; + + for (i=0; ih.size; i++) { + w = crush_hash32_4(x, bucket->h.items[i], r, bucket->h.id) & 0xffff; + w = (w * bucket->sum_weights[i]) >> 32; + if (w < bucket->item_weights[i]) + return bucket->h.items[i]; + } + + BUG_ON(1); + return 0; +} + +int +crush_bucket_tree_choose(struct crush_bucket_tree *bucket, int x, int r) +{ + return 0; +} + +int +crush_bucket_straw_choose(struct crush_bucket_straw *bucket, int x, int r) +{ + int i; + int high = 0; + unsigned high_draw = 0; + __u64 draw; + + for (i=0; ih.size; i++) { + draw = (crush_hash32_3(x, bucket->h.items[i], r) & 0xffff) * bucket->straws[i]; + draw = draw >> 32; + if (i == 0 || draw > high_draw) { + high = i; + high_draw = draw; + } + } + + return high; +} diff --git a/branches/sage/mds/crush2/buckets.h b/branches/sage/mds/crush2/buckets.h new file mode 100644 index 0000000000000..c83d522159ffc --- /dev/null +++ b/branches/sage/mds/crush2/buckets.h @@ -0,0 +1,49 @@ +#ifndef _CRUSH_BUCKETS_H +#define _CRUSH_BUCKETS_H + +#include "types.h" + +enum { + CRUSH_BUCKET_UNIFORM = 1, + CRUSH_BUCKET_LIST = 2, + CRUSH_BUCKET_TREE = 3, + CRUSH_BUCKET_STRAW = 4 +}; + +struct crush_bucket { + __u32 id; + __u32 type; + __u32 weight; /* 16-bit fixed point */ + __u32 size; /* num items */ + __s32 *items; +}; + +struct crush_bucket_uniform { + struct crush_bucket h; + __u32 item_weight; /* 16-bit fixed point */ + __u32 item_type; + __u32 *primes; +}; + +struct crush_bucket_list { + struct crush_bucket h; + __u32 *item_weights; /* 16-bit fixed point */ + __u32 *sum_weights; /* 16-bit fixed point */ +}; + +struct crush_bucket_tree { + struct crush_bucket h; + +}; + +struct crush_bucket_straw { + struct crush_bucket h; + __u32 *straws; /* 16-bit fixed point */ +}; + +extern int crush_bucket_uniform_choose(struct crush_bucket_uniform *bucket, int x, int r); +extern int crush_bucket_list_choose(struct crush_bucket_list *bucket, int x, int r); +extern int crush_bucket_tree_choose(struct crush_bucket_tree *bucket, int x, int r); +extern int crush_bucket_straw_choose(struct crush_bucket_straw *bucket, int x, int r); + +#endif diff --git a/branches/sage/mds/crush2/crush.c b/branches/sage/mds/crush2/crush.c new file mode 100644 index 0000000000000..3b420c43780d7 --- /dev/null +++ b/branches/sage/mds/crush2/crush.c @@ -0,0 +1,236 @@ + +#include "crush.h" +#include "hash.h" + +/* + * choose numrep distinct items of given type + */ +static int crush_choose(struct crush_map *map, + struct crush_bucket *bucket, + int x, int numrep, int type, + int *out, int firstn, + int *outmap) +{ + int rep; + int ftotal, flocal; + int retry_rep, skip_rep; + struct crush_bucket *in = bucket; + int r; + int i; + int item; + int itemtype; + int outpos; + int collide, bad; + + outpos = 0; + + for (rep = 0; rep < numrep; rep++) { + /* keep trying until we get a non-out, non-colliding item */ + ftotal = 0; + skip_rep = 0; + + while (1) { + in = bucket; /* initial bucket */ + + /* choose through intervening buckets */ + flocal = 0; + retry_rep = 0; + + while (1) { + r = rep; + if (in->type == CRUSH_BUCKET_UNIFORM) { + /* be careful */ + if (firstn || numrep >= in->size) { + r += ftotal; /* r' = r + f_total */ + } else { + r += numrep * flocal; /* r' = r + n*f_local */ + /* make sure numrep is not a multiple of bucket size */ + if (in->size % numrep == 0) + /* shift seq once per pass through the bucket */ + r += numrep * flocal / in->size; + } + } else { + if (firstn) + r += ftotal; /* r' = r + f_total */ + else + r += numrep * flocal; /* r' = r + n*f_local */ + } + + /* bucket choose */ + switch (in->type) { + case CRUSH_BUCKET_UNIFORM: + item = crush_bucket_uniform_choose((struct crush_bucket_uniform*)in, x, r); + break; + case CRUSH_BUCKET_LIST: + item = crush_bucket_list_choose((struct crush_bucket_list*)in, x, r); + break; + case CRUSH_BUCKET_TREE: + item = crush_bucket_tree_choose((struct crush_bucket_tree*)in, x, r); + break; + case CRUSH_BUCKET_STRAW: + item = crush_bucket_straw_choose((struct crush_bucket_straw*)in, x, r); + break; + default: + BUG_ON(1); + } + + /* desired type? */ + if (in->type == CRUSH_BUCKET_UNIFORM) + itemtype = ((struct crush_bucket_uniform*)in)->item_type; + else if (item < 0) + itemtype = map->buckets[-item].type; + else + itemtype = 0; + + /* keep going? */ + if (itemtype != type) { + in = &map->buckets[-item]; + continue; + } + + /* collision? */ + collide = 0; + for (i=0; i out[item]) + bad = 1; + } + + if (bad || collide) { + ftotal++; + flocal++; + + if (collide && flocal < 3) + continue; /* locally a few times */ + if (ftotal >= 10) { + /* give up, ignore dup, fixme */ + skip_rep = 1; + break; + } + retry_rep = 1; + } + break; + } + + if (retry_rep) continue; + } + + if (skip_rep) continue; + + out[outpos] = item; + outpos++; + } + + return outpos; +} + + +int crush_do_rule(struct crush_map *map, + int ruleno, + int x, int *result, int result_max, + int *outmap, /* array of size max_devices, values 0...0xffff */ + int forcefeed) /* -1 for none */ +{ + int result_len; + int force_stack[CRUSH_MAX_DEPTH]; + int force_pos = -1; + int a[CRUSH_MAX_SET]; + int b[CRUSH_MAX_SET]; + int *w; + int wsize = 0; + int *o; + int osize; + int *tmp; + struct crush_rule *rule; + int step; + int i; + int numrep; + + rule = &map->rules[ruleno]; + result_len = 0; + w = a; + o = b; + + /* determine hierarchical context of forcefeed, if any */ + if (forcefeed >= 0) { + while (1) { + force_stack[++force_pos] = forcefeed; + if (forcefeed >= 0) + forcefeed = map->device_parent_map[forcefeed]; + else + forcefeed = map->bucket_parent_map[-forcefeed]; + if (forcefeed == 0) break; + } + } + + for (step = 0; step < rule->len; step++) { + switch (rule->steps[step].op) { + case CRUSH_RULE_TAKE: + if (force_pos >= 0) { + w[0] = force_stack[force_pos]; + force_pos--; + BUG_ON(w[0] != rule->steps[step].arg1); + } else { + w[0] = rule->steps[step].arg1; + } + wsize = 1; + break; + + case CRUSH_RULE_CHOOSE_FIRSTN: + case CRUSH_RULE_CHOOSE_INDEP: + BUG_ON(wsize == 0); + + /* reset output */ + osize = 0; + + for (i = 0; i < wsize; i++) { + numrep = rule->steps[step].arg1; + + if (force_pos >= 0) { + o[osize++] = force_stack[force_pos]; + force_pos--; + numrep--; + } + if (numrep) + crush_choose(map, + &map->buckets[-w[i]], + x, numrep, rule->steps[step].arg2, + o+osize, rule->steps[step].op == CRUSH_RULE_CHOOSE_FIRSTN, + outmap); + } + + /* swap t and w arrays */ + tmp = o; + o = w; + w = o; + wsize = osize; + break; + + + case CRUSH_RULE_EMIT: + for (i=0; i>13); \ + b=b-c; b=b-a; b=b^(a<<8); \ + c=c-a; c=c-b; c=c^(b>>13); \ + a=a-b; a=a-c; a=a^(c>>12); \ + b=b-c; b=b-a; b=b^(a<<16); \ + c=c-a; c=c-b; c=c^(b>>5); \ + a=a-b; a=a-c; a=a^(c>>3); \ + b=b-c; b=b-a; b=b^(a<<10); \ + c=c-a; c=c-b; c=c^(b>>15); + +#define crush_hash_seed 1315423911 + +static __inline__ unsigned crush_hash32(unsigned a) { + unsigned hash = crush_hash_seed ^ a; + unsigned b = a; + unsigned x = 231232; + unsigned y = 1232; + hashmix(b, x, hash); + hashmix(y, a, hash); + return (hash & 0xFFFFFFFF); +} + +static __inline__ unsigned crush_hash32_2(unsigned a, unsigned b) { + unsigned hash = crush_hash_seed ^ a ^ b; + unsigned x = 231232; + unsigned y = 1232; + hashmix(a, b, hash); + hashmix(x, a, hash); + hashmix(b, y, hash); + return (hash & 0xFFFFFFFF); +} + +static __inline__ unsigned crush_hash32_3(unsigned a, unsigned b, unsigned c) { + unsigned int hash = crush_hash_seed ^ a ^ b ^ c; + unsigned x = 231232; + unsigned y = 1232; + hashmix(a, b, hash); + hashmix(c, x, hash); + hashmix(y, a, hash); + hashmix(b, x, hash); + hashmix(y, c, hash); + return (hash & 0xFFFFFFFF); +} + +static __inline__ unsigned crush_hash32_4(unsigned a, unsigned b, unsigned c, unsigned d) { + unsigned int hash = crush_hash_seed ^a ^ b ^ c ^ d; + unsigned x = 231232; + unsigned y = 1232; + hashmix(a, b, hash); + hashmix(c, d, hash); + hashmix(a, x, hash); + hashmix(y, b, hash); + hashmix(c, x, hash); + hashmix(y, d, hash); + return (hash & 0xFFFFFFFF); +} + +static __inline__ unsigned crush_hash32_5(unsigned a, unsigned b, unsigned c, unsigned d, unsigned e) { + unsigned int hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e; + unsigned x = 231232; + unsigned y = 1232; + hashmix(a, b, hash); + hashmix(c, d, hash); + hashmix(e, x, hash); + hashmix(y, a, hash); + hashmix(b, x, hash); + hashmix(y, c, hash); + hashmix(d, x, hash); + hashmix(y, e, hash); + return (hash & 0xFFFFFFFF); +} + +#endif diff --git a/branches/sage/mds/crush2/types.h b/branches/sage/mds/crush2/types.h new file mode 100644 index 0000000000000..ea682401e146b --- /dev/null +++ b/branches/sage/mds/crush2/types.h @@ -0,0 +1,11 @@ +#ifndef _CRUSH_TYPES_H +#define _CRUSH_TYPES_H + +#include /* just for int types */ + +#ifndef BUG_ON +# include +# define BUG_ON(x) assert(!(x)) +#endif + +#endif diff --git a/branches/sage/mds/include/ceph_fs.h b/branches/sage/mds/include/ceph_fs.h new file mode 100644 index 0000000000000..ede0663a79158 --- /dev/null +++ b/branches/sage/mds/include/ceph_fs.h @@ -0,0 +1,163 @@ +/* ceph_fs.h + * + * C data types to share between kernel and userspace + */ + +#ifndef _FS_CEPH_CEPH_FS_H +#define _FS_CEPH_CEPH_FS_H + +#include + + +typedef __u64 ceph_ino_t; + + +/** + * object id + */ +struct ceph_object { + ceph_ino_t ino; /* inode "file" identifier */ + __u32 bno; /* "block" (object) in that "file" */ + __u32 rev; /* revision. normally ctime (as epoch). */ +}; +typedef struct ceph_object ceph_object_t; + + + + +/** object layout + * how objects are mapped into PGs + */ +#define CEPH_OBJECT_LAYOUT_HASH 1 +#define CEPH_OBJECT_LAYOUT_LINEAR 2 +#define CEPH_OBJECT_LAYOUT_HASHINO 3 + +/** + * pg layout -- how PGs are mapped into (sets of) OSDs + */ +#define CEPH_PG_LAYOUT_CRUSH 0 +#define CEPH_PG_LAYOUT_HASH 1 +#define CEPH_PG_LAYOUT_LINEAR 2 +#define CEPH_PG_LAYOUT_HYBRID 3 + + +/** + * ceph_file_layout - describe data layout for a file/inode + */ +struct ceph_file_layout { + /* file -> object mapping */ + __u32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple of page size. */ + __u32 fl_stripe_count; /* over this many objects */ + __u32 fl_object_size; /* until objects are this big, then move to new objects */ + + /* pg -> disk layout */ + __u32 fl_object_stripe_unit; /* for per-object raid */ + + /* object -> pg layout */ + __s32 fl_pg_preferred; /* preferred primary for pg */ + __u8 fl_pg_type; /* pg type; see PG_TYPE_* */ + __u8 fl_pg_size; /* pg size (num replicas, raid stripe width, etc. */ +}; + +#define ceph_file_layout_stripe_width(l) (l.fl_stripe_unit * l.fl_stripe_count) + +/* period = bytes before i start on a new set of objects */ +#define ceph_file_layout_period(l) (l.fl_object_size * l.fl_stripe_count) + + + +/** + * placement group id + */ +#define CEPH_PG_TYPE_REP 1 +#define CEPH_PG_TYPE_RAID4 2 + +union ceph_pg { + __u64 pg64; + struct { + __s32 preferred; /* preferred primary osd */ + __u16 ps; /* placement seed */ + __u8 type; + __u8 size; + } pg; +}; +typedef union ceph_pg ceph_pg_t; + +#define ceph_pg_is_rep(pg) (pg.pg.type == CEPH_PG_TYPE_REP) +#define ceph_pg_is_raid4(pg) (pg.pg.type == CEPH_PG_TYPE_RAID4) + +/** + * object layout + * + * describe how a given object should be stored. + */ +struct ceph_object_layout { + ceph_pg_t ol_pgid; + __u32 ol_stripe_unit; +}; + + + +/** + * object extent + */ +struct ceph_object_extent { + ceph_object_t oe_oid; + __u64 oe_start; + __u64 oe_length; + struct ceph_object_layout oe_object_layout; + + /* buffer extent reverse mapping? */ +}; + + + + + +/********************************************* + * message types + */ + +/* + * entity_name + */ +struct ceph_entity_name { + __u32 type; + __u32 num; +}; + +#define CEPH_ENTITY_TYPE_MON 1 +#define CEPH_ENTITY_TYPE_MDS 2 +#define CEPH_ENTITY_TYPE_OSD 3 +#define CEPH_ENTITY_TYPE_CLIENT 4 +#define CEPH_ENTITY_TYPE_ADMIN 5 + + +/* + * entity_addr + * ipv4 only for now + */ +struct ceph_entity_addr { + __u64 nonce; + __u32 port; + __u8 ipq[4]; +}; + + +struct ceph_entity_inst { + struct ceph_entity_name name; + struct ceph_entity_addr addr; +}; + + +/* + * message header + */ +struct ceph_message_header { + __u32 type; + struct ceph_entity_inst src, dst; + __u32 source_port, dest_port; + __u32 nchunks; +}; + +#endif diff --git a/branches/sage/mds/include/ceph_inttypes.h b/branches/sage/mds/include/ceph_inttypes.h deleted file mode 100644 index c31c76ace1c5d..0000000000000 --- a/branches/sage/mds/include/ceph_inttypes.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __CEPH_INTTYPES_H -#define __CEPH_INTTYPES_H - -typedef uint32_t __u32; -typedef uint16_t __u16; -typedef uint8_t __u8; - -#endif diff --git a/branches/sage/mds/include/object.h b/branches/sage/mds/include/object.h index a1382626f7cae..3b8ac05a86b38 100644 --- a/branches/sage/mds/include/object.h +++ b/branches/sage/mds/include/object.h @@ -85,17 +85,6 @@ inline ostream& operator<<(ostream& out, const object_t o) { namespace __gnu_cxx { -#ifndef __LP64__ - template<> struct hash { - size_t operator()(uint64_t __x) const { - static hash H; - return H((__x >> 32) ^ (__x & 0xffffffff)); - //static rjhash H; - //return H(__x); - } - }; -#endif - template<> struct hash { size_t operator()(const object_t &r) const { static rjhash H; @@ -107,15 +96,4 @@ namespace __gnu_cxx { }; } - -/* - template<> struct rjhash { - size_t operator()(const object_t &r) const { - static rjhash H; - static rjhash I; - return H(r.ino) ^ I(r.bno) ^ I(r.rev); - } - }; -*/ - #endif diff --git a/branches/sage/mds/include/types.h b/branches/sage/mds/include/types.h index 92bcb94c6dc5f..cf8374d329a77 100644 --- a/branches/sage/mds/include/types.h +++ b/branches/sage/mds/include/types.h @@ -36,6 +36,8 @@ using namespace std; #include using namespace __gnu_cxx; +#include "ceph_fs.h" + #include "object.h" #include "utime.h" @@ -68,6 +70,12 @@ namespace __gnu_cxx { return H((__x >> 32) ^ (__x & 0xffffffff)); } }; + template<> struct hash { + size_t operator()(uint64_t __x) const { + static hash H; + return H((__x >> 32) ^ (__x & 0xffffffff)); + } + }; #endif } @@ -105,76 +113,18 @@ typedef uint64_t version_t; typedef uint32_t epoch_t; // map epoch (32bits -> 13 epochs/second for 10 years) -// object and pg layout -// specified in g_conf.osd_* #define O_LAZY 01000000 -/** object layout - * how objects are mapped into PGs - */ -#define OBJECT_LAYOUT_HASH 1 -#define OBJECT_LAYOUT_LINEAR 2 -#define OBJECT_LAYOUT_HASHINO 3 - -/** pg layout - * how PGs are mapped into (sets of) OSDs - */ -#define PG_LAYOUT_CRUSH 0 -#define PG_LAYOUT_HASH 1 -#define PG_LAYOUT_LINEAR 2 -#define PG_LAYOUT_HYBRID 3 - - - -// ----------------------- -// FileLayout - -/** FileLayout - * specifies a striping and replication strategy - */ - -//#define FILE_LAYOUT_CRUSH 0 // stripe via crush -//#define FILE_LAYOUT_LINEAR 1 // stripe linearly across cluster - -struct FileLayout { - // -- file -> object mapping -- - int32_t stripe_unit; // stripe unit, in bytes - int32_t stripe_count; // over this many objects - int32_t object_size; // until objects are this big, then move to new objects - - int stripe_width() { return stripe_unit * stripe_count; } - - // period = bytes before i start on a new set of objects. - int period() { return object_size * stripe_count; } - - // -- object -> pg layout -- - char pg_type; // pg type (replicated, raid, etc.) (see pg_t::TYPE_*) - char pg_size; // pg size (num replicas, or raid4 stripe width) - int32_t preferred; // preferred primary osd? - - // -- pg -> disk layout -- - int32_t object_stripe_unit; // for per-object raid - - FileLayout() { } - FileLayout(int su, int sc, int os, int pgt, int pgs, int o=-1) : - stripe_unit(su), stripe_count(sc), object_size(os), - pg_type(pgt), pg_size(pgs), preferred(o), - object_stripe_unit(su) // note: bad default, we pbly want su/(pgs-1) - { - assert(object_size % stripe_unit == 0); - } - -}; - +typedef ceph_file_layout FileLayout; // -------------------------------------- // inode -typedef uint64_t _inodeno_t; +typedef __uint64_t _inodeno_t; struct inodeno_t { _inodeno_t val; @@ -228,14 +178,6 @@ namespace __gnu_cxx { inline int DT_TO_MODE(int dt) { return dt << 12; - /* - switch (dt) { - case DT_REG: return INODE_MODE_FILE; - case DT_DIR: return INODE_MODE_DIR; - case DT_LNK: return INODE_MODE_SYMLINK; - default: assert(0); return 0; - } - */ } struct inode_t { @@ -278,13 +220,6 @@ struct inode_t { inline unsigned char MODE_TO_DT(int mode) { return mode >> 12; - /* - if (S_ISREG(mode)) return inode_t::DT_REG; - if (S_ISLNK(mode)) return inode_t::DT_LNK; - if (S_ISDIR(mode)) return inode_t::DT_DIR; - assert(0); - return 0; - */ } diff --git a/branches/sage/mds/jobs/runjobsample b/branches/sage/mds/jobs/runjobsample new file mode 100644 index 0000000000000..590be207771b2 --- /dev/null +++ b/branches/sage/mds/jobs/runjobsample @@ -0,0 +1,26 @@ +#!/usr/bin/perl + +# hi there +{ + '_sleep' => 3, + + 'nummds' => 1, + 'numosd' => 16, #[8],#10,14,16], + 'numclient' => 32,#,4,10,20,40], #[10*16], + '_n' => 32, + + '_start' => 15, + '_end' => 45, + '_kill_after' => 190, + + 'osd_pg_bits' => [4, 6], + 'osd_auto_weight' => [0,1], + 'file_layout_pg_size' => [1,2], + + '_custom' => '--syn createobjects 1000000 1048576 2', + + '_comb' => { + 'x' => 'osd_pg_bits', + 'vars' => [ 'osd.c_wrb' ] + } +}; diff --git a/branches/sage/mds/kernel/bufferlist.h b/branches/sage/mds/kernel/bufferlist.h new file mode 100644 index 0000000000000..78e4c6f95216b --- /dev/null +++ b/branches/sage/mds/kernel/bufferlist.h @@ -0,0 +1,74 @@ +#ifndef _FS_CEPH_BUFFERLIST_H +#define _FS_CEPH_BUFFERLIST_H + + + +#define CEPH_BUFFERLIST_START_IOVLEN 8 /* embed some statically, for fast normal case */ + +struct ceph_bufferlist { + struct iovec *b_iov; /* data payload */ + struct iovec b_iov_array[CEPH_BUFFERLIST_START_IOVLEN]; + int b_iovlen; /* used/defined elements in b_iov */ + int b_iovmax; /* allocated size of b_iov array */ + struct iovec b_append; /* preallocated memory for appending data to this bufferlist */ +}; + +struct ceph_bufferlist_iterator { + int i_iov; /* which iov */ + int i_off; /* offset in that iov */ +}; + +/* + * add referenced memory to the bufferlist. + * expand b_iov array if necessary. + * extend tail iovec if the added region is contiguous. + */ +void ceph_bufferlist_append_ref(struct ceph_bufferlist *bl, void *p, int len) +{ + struct iovec *tmpvec; + if (bl->b_iovlen == bl->b_iovmax) { + if (bl->b_iovmax) { + bl->b_iovmax *= 2; + tmpvec = kmalloc(bl->b_iovmax); + memcpy(tmpvec, bl->b_iov, sizeof(iovec)*bl->b_iovlen); + if (bl->b_iovlen > CEPH_BUFFERLIST_START_IOVLEN) + kfree(bl->b_iov); + bl->b_iov = tmpvec; + memset(tmpvec + bl->b_iovlen, 0, + sizeof(iovec)*(bl->b_iovmax - bl->b_iovlen)); + } else { + bl->b_iovmax = CEPH_BUFFERLIST_START_IOVLEN; + bl->b_iov = bl->b_iov_array; + } + } + + if (bl->b_iovlen && + p == bl->b_iov[bl->b_iovlen-1].iov_base + bl->b_iov[bl->b_iovlen-1].iov_base) { + bl->b_iov[bl->b_iovlen-1].iov_len += len; + } else { + bl->b_iov[bl->b_iovlen].iov_base = p; + bl->b_iov[bl->b_iovlen].iov_len = len; + bl->b_iovlen++; + } +} + +void ceph_bufferlist_append_copy(struct ceph_bufferlist *bl, void *p, int len) +{ + int s; + while (len > 0) { + /* allocate more space? */ + if (!bl->b_append.iov_len) { + bl->b_append.iov_len = (len + PAGE_SIZE - 1) & ~(PAGE_SIZE-1); + bl->b_append.iov_base = kmalloc(bl->b_append.iov_len, GFP_KERNEL); + } + + /* copy what we can */ + s = min(bl->b_append.iov_len, len); + memcpy(bl->b_append.iov_base, s); + ceph_bufferlist_append_ref(bl, b_append.iov_base, b_append.iov_len); + len -= s; + bl->b_append.iov_len -= s; + } +} + +#endif diff --git a/branches/sage/mds/kernel/ceph_fs.h b/branches/sage/mds/kernel/ceph_fs.h deleted file mode 100644 index 5804b495a3907..0000000000000 --- a/branches/sage/mds/kernel/ceph_fs.h +++ /dev/null @@ -1,79 +0,0 @@ -/* -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- - * vim: ts=8 sw=8 smarttab - */ - -#ifndef _FS_CEPH_CEPH_H -#define _FS_CEPH_CEPH_H - -/* #include */ - -#include "kmsg.h" - -#include "mdsmap.h" -#include "monmap.h" - -/* do these later -#include "osdmap.h" -*/ -struct ceph_osdmap; - - - -/* - * state associated with an individual MDS<->client session - */ -struct ceph_mds_session { - __u64 s_push_seq; - /* wait queue? */ -}; - -struct ceph_mds_request { - -}; - -/* - * CEPH file system in-core superblock info - */ -struct ceph_sb_info { - __u32 s_whoami; /* client number */ - struct ceph_kmsg *s_kmsg; /* messenger instance */ - - struct ceph_monmap *s_monmap; /* monitor map */ - struct ceph_mdsmap *s_mdsmap; /* mds map */ - struct ceph_osdmap *s_osdmap; /* osd map */ - - /* mds sessions */ - struct ceph_mds_session **s_mds_sessions; /* sparse array; elements NULL if no session */ - int s_max_mds_sessions; /* size of s_mds_sessions array */ - - - - /* current requests */ - /* ... */ - __u64 last_tid; -}; - -/* - * CEPH file system in-core inode info - */ -struct ceph_inode_info { - unsigned long val; /* inode from types.h is uint64_t */ - struct inode vfs_inode; -}; - -static inline struct ceph_inode_info *CEPH_I(struct inode *inode) -{ - return list_entry(inode, struct ceph_inode_info, vfs_inode); -} - - -/* file.c */ -extern const struct inode_operations ceph_file_inops; -extern const struct file_operations ceph_file_operations; -extern const struct address_space_operations ceph_aops; - -/* dir.c */ -extern const struct inode_operations ceph_dir_inops; -extern const struct file_operations ceph_dir_operations; - -#endif /* _FS_CEPH_CEPH_H */ diff --git a/branches/sage/mds/kernel/inode.c b/branches/sage/mds/kernel/inode.c index 99fdaf84de4c1..f21fa58386935 100644 --- a/branches/sage/mds/kernel/inode.c +++ b/branches/sage/mds/kernel/inode.c @@ -1,7 +1,3 @@ -/* -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- - * vim: ts=8 sw=8 smarttab - */ - #include #include #include diff --git a/branches/sage/mds/kernel/kmsg.h b/branches/sage/mds/kernel/kmsg.h new file mode 100644 index 0000000000000..cc44b9fd291e5 --- /dev/null +++ b/branches/sage/mds/kernel/kmsg.h @@ -0,0 +1,51 @@ +#ifndef __FS_CEPH_KMSG_H +#define __FS_CEPH_KMSG_H + +#include +#include +#include +#include "ceph_kthread.h" + + +struct ceph_kthreadpool *msg_threadpool; /* thread pool */ + +struct ceph_kmsgr { + void *m_parent; + struct radix_tree_root mpipes; /* other nodes talk to */ + struct client_thread_info cthread; /* listener thread info */ +}; + +struct ceph_message { + struct ceph_message_header *msghdr; /* header */ + struct kvec *m_iov; /* data storage */ + size_t m_iovlen; /* is this kvec.iov_len why need it in kvec? */ + struct list_head m_list_head; +}; + +struct ceph_kmsg_pipe { + int p_sd; /* socket descriptor */ + __u64 p_out_seq; /* last message sent */ + __u64 p_in_seq; /* last message received */ + + /* out queue */ + struct list_head p_out_queue; + struct ceph_message *p_out_partial; /* partially sent message */ + int p_out_partial_pos; + struct list_head p_out_sent; /* sent but unacked; may need resend if connection drops */ + + /* partially read message contents */ + struct kvec *p_in_partial_iov; /* hrm, this probably isn't what we want */ + size_t p_in_partial_iovlen; + size_t p_in_parital_iovmax; /* size of currently allocated m_iov array */ + /* .. or something like that? .. */ + +}; + +/* + * function prototypes + */ +extern void ceph_read_message(struct ceph_message *message); +extern void ceph_write_message(struct ceph_message *message); +extern void ceph_client_dispatch(void *fs_client, struct ceph_message *message ); +extern void queue_message(struct ceph_message *message); +#endif diff --git a/branches/sage/mds/kernel/kmsgbits.h b/branches/sage/mds/kernel/kmsgbits.h new file mode 100644 index 0000000000000..730ff7f74f53b --- /dev/null +++ b/branches/sage/mds/kernel/kmsgbits.h @@ -0,0 +1,50 @@ + + + +struct ceph_message { + struct ceph_message_header m_hdr; /* header */ + struct iovec *m_iov; /* payload */ + int m_iovlen; + struct list_head m_list_head; /* i'll sit in a queue */ +}; + + + +/* dispatch method type */ +typedef void (*ceph_kmsg_dispatch_t)(void *h, struct ceph_message *message); + +struct ceph_kmsg { + ceph_kmsg_dispatch_t m_dispatch; /* where incoming messages go */ + void *m_parent; /* passed to dispatch method */ + + struct ceph_kmsg_threadpool *m_threadpool; /* pool of threads */ + /* possibly shared among multiple kmsg instances? */ + + /* other nodes i talk to */ + struct radix_tree_root m_pipes; /* key: dest addr, value: ceph_kmsg_pipe */ + + /* ... */ +}; + + +struct ceph_kmsg_pipe { + int p_sd; /* socket descriptor */ + __u64 p_out_seq; /* last message sent */ + __u64 p_in_seq; /* last message received */ + + /* out queue */ + struct list_head p_out_queue; + struct ceph_message *p_out_partial; /* partially sent message */ + int p_out_partial_pos; + struct list_head p_out_sent; /* sent but unacked; may need resend if connection drops */ + + /* partially read message contents */ + struct iovec *p_in_partial_iov; /* hrm, this probably isn't what we want */ + int p_in_partial_iovlen; + int p_in_parital_iovmax; /* size of currently allocated m_iov array */ + /* .. or something like that? .. */ + +}; + + + diff --git a/branches/sage/mds/kernel/mds_client.h b/branches/sage/mds/kernel/mds_client.h new file mode 100644 index 0000000000000..764d7ccd6bdf6 --- /dev/null +++ b/branches/sage/mds/kernel/mds_client.h @@ -0,0 +1,42 @@ +#ifndef _FS_CEPH_MDS_CLIENT_H +#define _FS_CEPH_MDS_CLIENT_H + +#include +#include "kmsg.h" + +/* + * state associated with an individual MDS<->client session + */ +struct ceph_mds_session { + __u64 s_push_seq; + /* wait queue? */ +}; + +struct ceph_mds_request { + __u64 r_tid; + struct ceph_message *r_msg; + __u8 r_idempotent; + + __u32 r_mds[4]; /* set of mds's with whom request may be outstanding */ + __u32 r_num_mds; /* items in r_mds */ + + __u32 r_num_fwd; /* number of forward attempts */ + __s32 r_resend_mds; /* mds to resend to next, if any*/ + + /* waiter/callback? */ +}; + + +struct ceph_mds_client { + struct ceph_mdsmap *s_mdsmap; /* mds map */ + + /* mds sessions */ + struct ceph_mds_session **s_mds_sessions; /* sparse array; elements NULL if no session */ + int s_max_mds_sessions; /* size of s_mds_sessions array */ + + __u64 s_last_mds_tid; /* id of last mds request */ + struct radix_tree_root s_mds_requests; /* in-flight mds requests */ + +}; + +#endif diff --git a/branches/sage/mds/kernel/mdsmap.h b/branches/sage/mds/kernel/mdsmap.h index 4b3cb8460a9a1..c5a970992c36c 100644 --- a/branches/sage/mds/kernel/mdsmap.h +++ b/branches/sage/mds/kernel/mdsmap.h @@ -1,7 +1,3 @@ -/* -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- - * vim: ts=8 sw=8 smarttab - */ - #ifndef _FS_CEPH_MDSMAP_H #define _FS_CEPH_MDSMAP_H diff --git a/branches/sage/mds/kernel/monmap.h b/branches/sage/mds/kernel/monmap.h index 9f7e535264a8e..2f60c8a0c3436 100644 --- a/branches/sage/mds/kernel/monmap.h +++ b/branches/sage/mds/kernel/monmap.h @@ -1,10 +1,8 @@ -/* -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- - * vim: ts=8 sw=8 smarttab - */ - #ifndef _FS_CEPH_MONMAP_H #define _FS_CEPH_MONMAP_H +#include + /* * monitor map */ @@ -15,7 +13,7 @@ struct ceph_monmap { struct ceph_entity_inst m_mon_inst; }; -extern int ceph_monmap_pick_mon(ceph_monmap *m); -extern int ceph_monmap_decode(ceph_monmap *m, iovec *v); +extern int ceph_monmap_pick_mon(struct ceph_monmap *m); +extern int ceph_monmap_decode(struct ceph_monmap *m, struct kvec *v); #endif diff --git a/branches/sage/mds/kernel/osd_client.h b/branches/sage/mds/kernel/osd_client.h new file mode 100644 index 0000000000000..6efa3b8f2ab25 --- /dev/null +++ b/branches/sage/mds/kernel/osd_client.h @@ -0,0 +1,18 @@ +#ifndef _FS_CEPH_OSD_CLIENT_H +#define _FS_CEPH_OSD_CLIENT_H + +/* this will be equivalent to osdc/Objecter.h */ + + +/* do these later +#include "osdmap.h" +*/ +struct ceph_osdmap; + + +struct ceph_osd_client { + struct ceph_osdmap *s_osdmap; /* osd map */ + +}; + +#endif diff --git a/branches/sage/mds/kernel/super.h b/branches/sage/mds/kernel/super.h new file mode 100644 index 0000000000000..94418511ffa53 --- /dev/null +++ b/branches/sage/mds/kernel/super.h @@ -0,0 +1,75 @@ +#ifndef _FS_CEPH_CEPH_H +#define _FS_CEPH_CEPH_H + +/* #include */ + +#include "kmsg.h" +#include "monmap.h" +#include "mds_client.h" +#include "osd_client.h" + + + +/* + * CEPH per-filesystem client state + * + * possibly shared by multiple mount points, if they are + * mounting the same ceph filesystem/cluster. + */ +struct ceph_fs_client { + __u64 s_fsid; /* hmm this should be part of the monmap? */ + + __u32 s_whoami; /* my client number */ + struct ceph_kmsg *s_kmsg; /* messenger instance */ + + struct ceph_monmap *s_monmap; /* monitor map */ + + struct ceph_mds_client *s_mds_client; + struct ceph_osd_client *s_osd_client; + + int s_ref; /* reference count (for each sb_info that points to me) */ +}; + +/* + * directory of filesystems mounted by this host + * + * key: fsid? ipquad of monitor? hmm! + * value: struct ceph_fs_client* + */ +extern struct radix_tree ceph_fs_clients; + + +/* + * CEPH per-mount superblock info + */ +struct ceph_sb_info { + struct ceph_fs_client *sb_client; + + /* FIXME: add my relative offset into the filesystem, + so we can appropriately mangle/adjust path names in requests, etc. */ +}; + +/* + * CEPH file system in-core inode info + */ +struct ceph_inode_info { + struct ceph_file_layout i_layout; + struct inode vfs_inode; +}; + +static inline struct ceph_inode_info *CEPH_I(struct inode *inode) +{ + return list_entry(inode, struct ceph_inode_info, vfs_inode); +} + + +/* file.c */ +extern const struct inode_operations ceph_file_inops; +extern const struct file_operations ceph_file_operations; +extern const struct address_space_operations ceph_aops; + +/* dir.c */ +extern const struct inode_operations ceph_dir_inops; +extern const struct file_operations ceph_dir_operations; + +#endif /* _FS_CEPH_CEPH_H */ diff --git a/branches/sage/mds/mds/ClientMap.cc b/branches/sage/mds/mds/ClientMap.cc index 5170f3fe9b3eb..1d781b9ba48c3 100644 --- a/branches/sage/mds/mds/ClientMap.cc +++ b/branches/sage/mds/mds/ClientMap.cc @@ -61,7 +61,7 @@ void ClientMap::load(Context *onload) C_CM_Load *c = new C_CM_Load(this); mds->filer->read(inode, - 0, inode.layout.stripe_unit, + 0, inode.layout.fl_stripe_unit, &c->bl, c); diff --git a/branches/sage/mds/mds/IdAllocator.cc b/branches/sage/mds/mds/IdAllocator.cc index 3a490c48c263d..36a36ea9eb037 100644 --- a/branches/sage/mds/mds/IdAllocator.cc +++ b/branches/sage/mds/mds/IdAllocator.cc @@ -174,7 +174,7 @@ void IdAllocator::load(Context *onfinish) C_ID_Load *c = new C_ID_Load(this, onfinish); mds->filer->read(inode, - 0, inode.layout.stripe_unit, + 0, inode.layout.fl_stripe_unit, &c->bl, c); } diff --git a/branches/sage/mds/mds/MDLog.cc b/branches/sage/mds/mds/MDLog.cc index fc7cdffbe6e10..eeea99c721751 100644 --- a/branches/sage/mds/mds/MDLog.cc +++ b/branches/sage/mds/mds/MDLog.cc @@ -82,7 +82,7 @@ void MDLog::init_journaler() log_inode.layout = g_OSD_MDLogLayout; if (g_conf.mds_local_osd) - log_inode.layout.preferred = mds->get_nodeid() + g_conf.mds_local_osd_offset; // hack + log_inode.layout.fl_pg_preferred = mds->get_nodeid() + g_conf.mds_local_osd_offset; // hack // log streamer if (journaler) delete journaler; @@ -191,8 +191,8 @@ void MDLog::submit_entry( LogEvent *le, Context *c ) off_t last_seg = get_last_segment_offset(); if (!segments.empty() && !writing_subtree_map && - (journaler->get_write_pos() / log_inode.layout.period()) != (last_seg / log_inode.layout.period()) && - (journaler->get_write_pos() - last_seg > log_inode.layout.period()/2)) { + (journaler->get_write_pos() / ceph_file_layout_period(log_inode.layout) != (last_seg / ceph_file_layout_period(log_inode.layout)) && + (journaler->get_write_pos() - last_seg > ceph_file_layout_period(log_inode.layout)/2))) { dout(10) << "submit_entry also starting new segment: last = " << last_seg << ", cur pos = " << journaler->get_write_pos() << dendl; start_new_segment(); diff --git a/branches/sage/mds/msg/Message.h b/branches/sage/mds/msg/Message.h index a0de9a24ddab7..9f0175e7a7d1e 100644 --- a/branches/sage/mds/msg/Message.h +++ b/branches/sage/mds/msg/Message.h @@ -174,11 +174,11 @@ public: public: Message() { - env.source_port = env.dest_port = -1; + env.source_port = env.dest_port = 0; env.nchunks = 0; }; Message(int t) { - env.source_port = env.dest_port = -1; + env.source_port = env.dest_port = 0; env.nchunks = 0; env.type = t; } diff --git a/branches/sage/mds/msg/ceph_msg_types.h b/branches/sage/mds/msg/ceph_msg_types.h deleted file mode 100644 index 559c972a02bf8..0000000000000 --- a/branches/sage/mds/msg/ceph_msg_types.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- - * vim: ts=8 sw=8 smarttab - */ -#ifndef __CEPH_MSG_TYPES_H -#define __CEPH_MSG_TYPES_H - -/* - * entity_name - */ -struct ceph_entity_name { - __u32 type; - __u32 num; -}; - -#define CEPH_ENTITY_TYPE_MON 1 -#define CEPH_ENTITY_TYPE_MDS 2 -#define CEPH_ENTITY_TYPE_OSD 3 -#define CEPH_ENTITY_TYPE_CLIENT 4 -#define CEPH_ENTITY_TYPE_ADMIN 5 - - -/* - * entity_addr - * ipv4 only for now - */ -struct ceph_entity_addr { - __u8 ipq[4]; - __u32 port; - __u32 nonce; -}; - - -struct ceph_entity_inst { - struct ceph_entity_name name; - struct ceph_entity_addr addr; -}; - - -/* - * message header - */ -struct ceph_message_header { - __u32 type; - struct ceph_entity_inst src, dst; - __u32 source_port, dest_port; - __u32 nchunks; -}; - -#endif diff --git a/branches/sage/mds/msg/msg_types.h b/branches/sage/mds/msg/msg_types.h index 652525729cdfc..52b1e69c8886c 100644 --- a/branches/sage/mds/msg/msg_types.h +++ b/branches/sage/mds/msg/msg_types.h @@ -15,10 +15,6 @@ #ifndef __MSG_TYPES_H #define __MSG_TYPES_H -// raw C structs -#include "include/ceph_inttypes.h" -#include "ceph_msg_types.h" - #include "include/types.h" #include "include/blobhash.h" #include "tcp.h" @@ -97,11 +93,7 @@ namespace __gnu_cxx { }; } -// get rid of these -//#define MSG_ADDR_MDS(x) entity_name_t(entity_name_t::TYPE_MDS,x) -//#define MSG_ADDR_OSD(x) entity_name_t(entity_name_t::TYPE_OSD,x) -//#define MSG_ADDR_MON(x) entity_name_t(entity_name_t::TYPE_MON,x) -//#define MSG_ADDR_CLIENT(x) entity_name_t(entity_name_t::TYPE_CLIENT,x) + /* * an entity's network address. @@ -111,11 +103,9 @@ namespace __gnu_cxx { */ struct entity_addr_t { struct ceph_entity_addr v; - uint32_t _pad; - entity_addr_t() : _pad(0) { - v.port = v.nonce = 0; - v.ipq[0] = v.ipq[1] = v.ipq[2] = v.ipq[3] = 0; + entity_addr_t() { + memset(&v, 0, sizeof(v)); } void set_addr(tcpaddr_t a) { diff --git a/branches/sage/mds/osd/OSDMap.h b/branches/sage/mds/osd/OSDMap.h index b50f725687d23..fda57d73ef99e 100644 --- a/branches/sage/mds/osd/OSDMap.h +++ b/branches/sage/mds/osd/OSDMap.h @@ -316,7 +316,7 @@ private: // oid -> pg ObjectLayout file_to_object_layout(object_t oid, FileLayout& layout) { - return make_object_layout(oid, layout.pg_type, layout.pg_size, layout.preferred, layout.object_stripe_unit); + return make_object_layout(oid, layout.fl_pg_type, layout.fl_pg_size, layout.fl_pg_preferred, layout.fl_object_stripe_unit); } ObjectLayout make_object_layout(object_t oid, int pg_type, int pg_size, int preferred=-1, int object_stripe_unit = 0) { @@ -328,16 +328,16 @@ private: // calculate ps (placement seed) ps_t ps; switch (g_conf.osd_object_layout) { - case OBJECT_LAYOUT_LINEAR: + case CEPH_OBJECT_LAYOUT_LINEAR: ps = stable_mod(oid.bno + oid.ino, num, num_mask); break; - case OBJECT_LAYOUT_HASHINO: + case CEPH_OBJECT_LAYOUT_HASHINO: //ps = stable_mod(oid.bno + H(oid.bno+oid.ino)^H(oid.ino>>32), num, num_mask); ps = stable_mod(oid.bno + H(oid.ino)^H(oid.ino>>32), num, num_mask); break; - case OBJECT_LAYOUT_HASH: + case CEPH_OBJECT_LAYOUT_HASH: //ps = stable_mod(H( (oid.bno & oid.ino) ^ ((oid.bno^oid.ino) >> 32) ), num, num_mask); //ps = stable_mod(H(oid.bno) + H(oid.ino)^H(oid.ino>>32), num, num_mask); //ps = stable_mod(oid.bno + H(oid.bno+oid.ino)^H(oid.bno+oid.ino>>32), num, num_mask); @@ -361,7 +361,7 @@ private: vector& osds) { // list of osd addr's // map to osds[] switch (g_conf.osd_pg_layout) { - case PG_LAYOUT_CRUSH: + case CEPH_PG_LAYOUT_CRUSH: { // what crush rule? int rule; @@ -382,12 +382,12 @@ private: } break; - case PG_LAYOUT_LINEAR: + case CEPH_PG_LAYOUT_LINEAR: for (int i=0; i= 0 && - g_conf.osd_pg_layout != PG_LAYOUT_CRUSH) { + g_conf.osd_pg_layout != CEPH_PG_LAYOUT_CRUSH) { int osd = pg.preferred(); // already in there? diff --git a/branches/sage/mds/osd/PG.h b/branches/sage/mds/osd/PG.h index 21de9e8f00b0d..7c68ecec2b6e4 100644 --- a/branches/sage/mds/osd/PG.h +++ b/branches/sage/mds/osd/PG.h @@ -214,7 +214,6 @@ public: int op; // write, zero, trunc, remove object_t oid; eversion_t version; - objectrev_t rev; osdreqid_t reqid; // caller+tid to uniquely identify request diff --git a/branches/sage/mds/osd/osd_types.h b/branches/sage/mds/osd/osd_types.h index 08292252934ec..24dd9eca74234 100644 --- a/branches/sage/mds/osd/osd_types.h +++ b/branches/sage/mds/osd/osd_types.h @@ -82,41 +82,33 @@ typedef uint8_t pruleset_t; // placement group id struct pg_t { public: - static const int TYPE_REP = 1; - static const int TYPE_RAID4 = 2; + static const int TYPE_REP = CEPH_PG_TYPE_REP; + static const int TYPE_RAID4 = CEPH_PG_TYPE_RAID4; private: - union { - struct { - int32_t preferred; - uint8_t type; - uint8_t size; - uint16_t ps; - } fields; - uint64_t val; // 64 - } u; + union ceph_pg u; public: - pg_t() { u.val = 0; } - pg_t(const pg_t& o) { u.val = o.u.val; } + pg_t() { u.pg64 = 0; } + pg_t(const pg_t& o) { u.pg64 = o.u.pg64; } pg_t(int type, int size, ps_t seed, int pref) {//, pruleset_t r=0) { - u.fields.type = type; - u.fields.size = size; - u.fields.ps = seed; - u.fields.preferred = pref; // hack: avoid negative. - //u.fields.ruleset = r; - assert(sizeof(u.fields) == sizeof(u.val)); + u.pg.type = type; + u.pg.size = size; + u.pg.ps = seed; + u.pg.preferred = pref; // hack: avoid negative. + //u.pg.ruleset = r; + assert(sizeof(u.pg) == sizeof(u.pg64)); } - pg_t(uint64_t v) { u.val = v; } + pg_t(uint64_t v) { u.pg64 = v; } - int type() { return u.fields.type; } + int type() { return u.pg.type; } bool is_rep() { return type() == TYPE_REP; } bool is_raid4() { return type() == TYPE_RAID4; } - int size() { return u.fields.size; } - ps_t ps() { return u.fields.ps; } - //pruleset_t ruleset() { return u.fields.ruleset; } - int preferred() { return u.fields.preferred; } // hack: avoid negative. + int size() { return u.pg.size; } + ps_t ps() { return u.pg.ps; } + //pruleset_t ruleset() { return u.pg.ruleset; } + int preferred() { return u.pg.preferred; } // hack: avoid negative. /* pg_t operator=(uint64_t v) { u.val = v; return *this; } @@ -125,9 +117,9 @@ public: pg_t operator-=(pg_t o) { u.val -= o.val; return *this; } pg_t operator++() { ++u.val; return *this; } */ - operator uint64_t() const { return u.val; } + operator uint64_t() const { return u.pg64; } - object_t to_object() const { return object_t(PG_INO, u.val >> 32, u.val & 0xffffffff); } + object_t to_object() const { return object_t(PG_INO, u.pg64 >> 32, u.pg64 & 0xffffffff); } }; inline ostream& operator<<(ostream& out, pg_t pg) @@ -282,14 +274,12 @@ class ObjectExtent { off_t start; // in object size_t length; // in object - objectrev_t rev; // which revision? - ObjectLayout layout; // object layout (pgid, etc.) map buffer_extents; // off -> len. extents in buffer being mapped (may be fragmented bc of striping!) - ObjectExtent() : start(0), length(0), rev(0) {} - ObjectExtent(object_t o, off_t s=0, size_t l=0) : oid(o), start(s), length(l), rev(0) { } + ObjectExtent() : start(0), length(0) {} + ObjectExtent(object_t o, off_t s=0, size_t l=0) : oid(o), start(s), length(l) { } }; inline ostream& operator<<(ostream& out, ObjectExtent &ex) diff --git a/branches/sage/mds/osdc/Filer.cc b/branches/sage/mds/osdc/Filer.cc index 5d13174d55952..193089d3915b1 100644 --- a/branches/sage/mds/osdc/Filer.cc +++ b/branches/sage/mds/osdc/Filer.cc @@ -57,7 +57,7 @@ int Filer::probe_fwd(inode_t& inode, Probe *probe = new Probe(inode, start_from, end, onfinish); // period (bytes before we jump unto a new set of object(s)) - off_t period = inode.layout.period(); + off_t period = ceph_file_layout_period(inode.layout); // start with 1+ periods. probe->probing_len = period; @@ -132,7 +132,7 @@ void Filer::_probed(Probe *probe, object_t oid, off_t size) if (end == 0) { // keep probing! dout(10) << "_probed didn't find end, probing further" << dendl; - off_t period = probe->inode.layout.object_size * probe->inode.layout.stripe_count; + off_t period = probe->inode.layout.fl_object_size * probe->inode.layout.fl_stripe_count; probe->from += probe->probing_len; probe->probing_len = period; _probe(probe); @@ -170,36 +170,35 @@ void Filer::file_to_extents(inode_t inode, */ map< object_t, ObjectExtent > object_extents; - assert(inode.layout.object_size >= inode.layout.stripe_unit); - off_t stripes_per_object = inode.layout.object_size / inode.layout.stripe_unit; + assert(inode.layout.fl_object_size >= inode.layout.fl_stripe_unit); + off_t stripes_per_object = inode.layout.fl_object_size / inode.layout.fl_stripe_unit; dout(20) << " stripes_per_object " << stripes_per_object << dendl; off_t cur = offset; off_t left = len; while (left > 0) { // layout into objects - off_t blockno = cur / inode.layout.stripe_unit; // which block - off_t stripeno = blockno / inode.layout.stripe_count; // which horizontal stripe (Y) - off_t stripepos = blockno % inode.layout.stripe_count; // which object in the object set (X) + off_t blockno = cur / inode.layout.fl_stripe_unit; // which block + off_t stripeno = blockno / inode.layout.fl_stripe_count; // which horizontal stripe (Y) + off_t stripepos = blockno % inode.layout.fl_stripe_count; // which object in the object set (X) off_t objectsetno = stripeno / stripes_per_object; // which object set - off_t objectno = objectsetno * inode.layout.stripe_count + stripepos; // object id + off_t objectno = objectsetno * inode.layout.fl_stripe_count + stripepos; // object id // find oid, extent ObjectExtent *ex = 0; - object_t oid( inode.ino, objectno ); + object_t oid( inode.ino, objectno, rev ); if (object_extents.count(oid)) ex = &object_extents[oid]; else { ex = &object_extents[oid]; ex->oid = oid; - ex->rev = rev; ex->layout = objecter->osdmap->file_to_object_layout( oid, inode.layout ); } // map range into object - off_t block_start = (stripeno % stripes_per_object)*inode.layout.stripe_unit; - off_t block_off = cur % inode.layout.stripe_unit; - off_t max = inode.layout.stripe_unit - block_off; + off_t block_start = (stripeno % stripes_per_object)*inode.layout.fl_stripe_unit; + off_t block_off = cur % inode.layout.fl_stripe_unit; + off_t max = inode.layout.fl_stripe_unit - block_off; off_t x_offset = block_start + block_off; off_t x_len; diff --git a/branches/sage/mds/osdc/Journaler.cc b/branches/sage/mds/osdc/Journaler.cc index c2719549e2247..363b7c60de9aa 100644 --- a/branches/sage/mds/osdc/Journaler.cc +++ b/branches/sage/mds/osdc/Journaler.cc @@ -31,7 +31,7 @@ void Journaler::reset() state = STATE_ACTIVE; write_pos = flush_pos = ack_pos = read_pos = requested_pos = received_pos = - expire_pos = trimming_pos = trimmed_pos = inode.layout.period(); + expire_pos = trimming_pos = trimmed_pos = ceph_file_layout_period(inode.layout); } @@ -239,7 +239,7 @@ off_t Journaler::append_entry(bufferlist& bl, Context *onsync) if (!g_conf.journaler_allow_split_entries) { // will we span a stripe boundary? - int p = inode.layout.stripe_unit; + int p = inode.layout.fl_stripe_unit; if (write_pos / p != (write_pos + (off_t)(bl.length() + sizeof(s))) / p) { // yes. // move write_pos forward. @@ -613,7 +613,7 @@ public: void Journaler::trim() { off_t trim_to = last_committed.expire_pos; - trim_to -= trim_to % inode.layout.period(); + trim_to -= trim_to % ceph_file_layout_period(inode.layout); dout(10) << "trim last_commited head was " << last_committed << ", can trim to " << trim_to << dendl; diff --git a/branches/sage/mds/osdc/Journaler.h b/branches/sage/mds/osdc/Journaler.h index 6463d9caf0e6f..a90ec5f9e348f 100644 --- a/branches/sage/mds/osdc/Journaler.h +++ b/branches/sage/mds/osdc/Journaler.h @@ -183,7 +183,7 @@ public: // prefetch intelligently. // (watch out, this is big if you use big objects or weird striping) if (!fetch_len) - fetch_len = inode.layout.object_size*inode.layout.stripe_count * + fetch_len = inode.layout.fl_object_size*inode.layout.fl_stripe_count * g_conf.journaler_prefetch_periods; if (!prefetch_from) prefetch_from = fetch_len / 2; diff --git a/branches/sage/mds/osdc/Objecter.cc b/branches/sage/mds/osdc/Objecter.cc index e6efee1aa4a33..84563b0af9720 100644 --- a/branches/sage/mds/osdc/Objecter.cc +++ b/branches/sage/mds/osdc/Objecter.cc @@ -320,13 +320,11 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) // stat ----------------------------------- -tid_t Objecter::stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish, - objectrev_t rev) +tid_t Objecter::stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish) { OSDStat *st = new OSDStat(size); st->extents.push_back(ObjectExtent(oid, 0, 0)); st->extents.front().layout = ol; - st->extents.front().rev = rev; st->onfinish = onfinish; return stat_submit(st); @@ -424,14 +422,12 @@ void Objecter::handle_osd_stat_reply(MOSDOpReply *m) // read ----------------------------------- -tid_t Objecter::read(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist *bl, - Context *onfinish, - objectrev_t rev) +tid_t Objecter::read(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist *bl, + Context *onfinish) { OSDRead *rd = new OSDRead(bl); rd->extents.push_back(ObjectExtent(oid, off, len)); rd->extents.front().layout = ol; - rd->extents.front().rev = rev; readx(rd, onfinish); return last_tid; } @@ -665,14 +661,12 @@ void Objecter::handle_osd_read_reply(MOSDOpReply *m) // write ------------------------------------ tid_t Objecter::write(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist &bl, - Context *onack, Context *oncommit, - objectrev_t rev) + Context *onack, Context *oncommit) { OSDWrite *wr = new OSDWrite(bl); wr->extents.push_back(ObjectExtent(oid, off, len)); wr->extents.front().layout = ol; wr->extents.front().buffer_extents[0] = len; - wr->extents.front().rev = rev; modifyx(wr, onack, oncommit); return last_tid; } @@ -681,13 +675,11 @@ tid_t Objecter::write(object_t oid, off_t off, size_t len, ObjectLayout ol, buff // zero tid_t Objecter::zero(object_t oid, off_t off, size_t len, ObjectLayout ol, - Context *onack, Context *oncommit, - objectrev_t rev) + Context *onack, Context *oncommit) { OSDModify *z = new OSDModify(OSD_OP_ZERO); z->extents.push_back(ObjectExtent(oid, off, len)); z->extents.front().layout = ol; - z->extents.front().rev = rev; modifyx(z, onack, oncommit); return last_tid; } @@ -760,7 +752,6 @@ tid_t Objecter::modifyx_submit(OSDModify *wr, ObjectExtent &ex, tid_t usetid) wr->op); m->set_length(ex.length); m->set_offset(ex.start); - m->set_rev(ex.rev); if (usetid > 0) m->set_retry_attempt(true); diff --git a/branches/sage/mds/osdc/Objecter.h b/branches/sage/mds/osdc/Objecter.h index ed5c44745604e..82a437aa04f8d 100644 --- a/branches/sage/mds/osdc/Objecter.h +++ b/branches/sage/mds/osdc/Objecter.h @@ -213,16 +213,12 @@ class Objecter { // even lazier tid_t read(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist *bl, - Context *onfinish, - objectrev_t rev=0); + Context *onfinish); tid_t write(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist &bl, - Context *onack, Context *oncommit, - objectrev_t rev=0); + Context *onack, Context *oncommit); tid_t zero(object_t oid, off_t off, size_t len, ObjectLayout ol, - Context *onack, Context *oncommit, - objectrev_t rev=0); - tid_t stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish, - objectrev_t rev=0); + Context *onack, Context *oncommit); + tid_t stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish); tid_t lock(int op, object_t oid, ObjectLayout ol, Context *onack, Context *oncommit); -- 2.39.5