From 189110389616454b433298842aefa0b27a7ef9e5 Mon Sep 17 00:00:00 2001 From: sageweil Date: Sat, 13 Oct 2007 20:15:39 +0000 Subject: [PATCH] more data type rework git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1938 29311d96-e01e-0410-9327-a35deaab8ce9 --- trunk/ceph/client/Client.cc | 6 +- trunk/ceph/client/SyntheticClient.cc | 4 +- trunk/ceph/config.cc | 85 ++++++++++++------- trunk/ceph/config.h | 8 +- trunk/ceph/include/ceph_fs.h | 119 +++++++++++++++++++++++++++ trunk/ceph/include/ceph_inttypes.h | 22 +++++ trunk/ceph/include/types.h | 79 ++---------------- trunk/ceph/kernel/ceph_fs.h | 35 ++------ trunk/ceph/kernel/mds_client.h | 43 ++++++++++ trunk/ceph/kernel/osd_client.h | 22 +++++ trunk/ceph/mds/ClientMap.cc | 2 +- trunk/ceph/mds/IdAllocator.cc | 2 +- trunk/ceph/mds/MDLog.cc | 6 +- trunk/ceph/msg/ceph_msg_types.h | 4 +- trunk/ceph/msg/msg_types.h | 6 +- trunk/ceph/osd/OSDMap.h | 18 ++-- trunk/ceph/osd/osd_types.h | 10 +-- trunk/ceph/osdc/Filer.cc | 25 +++--- trunk/ceph/osdc/Journaler.cc | 6 +- trunk/ceph/osdc/Journaler.h | 2 +- trunk/ceph/osdc/Objecter.cc | 19 ++--- trunk/ceph/osdc/Objecter.h | 12 +-- 22 files changed, 329 insertions(+), 206 deletions(-) create mode 100644 trunk/ceph/include/ceph_fs.h create mode 100644 trunk/ceph/kernel/mds_client.h create mode 100644 trunk/ceph/kernel/osd_client.h diff --git a/trunk/ceph/client/Client.cc b/trunk/ceph/client/Client.cc index 4a7e6baacaf5a..67c5af7101ed5 100644 --- a/trunk/ceph/client/Client.cc +++ b/trunk/ceph/client/Client.cc @@ -3847,21 +3847,21 @@ int Client::get_stripe_unit(int fd) { FileLayout layout; describe_layout(fd, &layout); - return layout.stripe_unit; + return layout.fl_stripe_unit; } int Client::get_stripe_width(int fd) { FileLayout layout; describe_layout(fd, &layout); - return layout.stripe_width(); + return ceph_file_layout_stripe_width(layout); } int Client::get_stripe_period(int fd) { FileLayout layout; describe_layout(fd, &layout); - return layout.period(); + return ceph_file_layout_period(layout); } int Client::enumerate_layout(int fd, list& result, diff --git a/trunk/ceph/client/SyntheticClient.cc b/trunk/ceph/client/SyntheticClient.cc index 931ea790625bb..8f16c6c2596ef 100644 --- a/trunk/ceph/client/SyntheticClient.cc +++ b/trunk/ceph/client/SyntheticClient.cc @@ -1817,7 +1817,7 @@ int SyntheticClient::create_objects(int nobj, int osize, int inflight) if (time_to_stop()) break; object_t oid(0x1000, i); - ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.pg_size); + ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.fl_pg_size); if (i % inflight == 0) { dout(6) << "create_objects " << i << "/" << (nobj+1) << dendl; @@ -1919,7 +1919,7 @@ int SyntheticClient::object_rw(int nobj, int osize, int wrpc, } object_t oid(0x1000, o); - ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.pg_size); + ObjectLayout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, g_OSD_FileLayout.fl_pg_size); client->client_lock.Lock(); utime_t start = g_clock.now(); diff --git a/trunk/ceph/config.cc b/trunk/ceph/config.cc index f9dea43f893a1..185fea8b9ce00 100644 --- a/trunk/ceph/config.cc +++ b/trunk/ceph/config.cc @@ -41,10 +41,37 @@ ostream *_dout = &std::cout; ostream *_derr = &std::cerr; // file layouts -FileLayout g_OSD_FileLayout( 1<<22, 1, 1<<22, pg_t::TYPE_REP, 2 ); // 4M objects, 2x replication -FileLayout g_OSD_MDDirLayout( 1<<22, 1, 1<<22, pg_t::TYPE_REP, 2 ); // 4M objects, 2x replication. (a lie, just object layout policy) -FileLayout g_OSD_MDLogLayout( 1<<20, 1, 1<<20, pg_t::TYPE_REP, 2 ); // 1M objects -FileLayout g_OSD_MDAnchorTableLayout( 1<<20, 1, 1<<20, pg_t::TYPE_REP, 2 ); // 1M objects. (a lie, just object layout policy) +struct ceph_file_layout g_OSD_FileLayout = { + fl_stripe_unit: 1<<22, + fl_stripe_count: 1, + fl_object_size: 1<<22, + fl_pg_type: CEPH_PG_TYPE_REP, + fl_pg_size: 2 +}; + +struct ceph_file_layout g_OSD_MDDirLayout = { + fl_stripe_unit: 1<<22, + fl_stripe_count: 1, + fl_object_size: 1<<22, + fl_pg_type: CEPH_PG_TYPE_REP, + fl_pg_size: 2 +}; + +struct ceph_file_layout g_OSD_MDLogLayout = { + fl_stripe_unit: 1<<20, + fl_stripe_count: 1, + fl_object_size: 1<<20, + fl_pg_type: CEPH_PG_TYPE_REP, + fl_pg_size: 2 +}; + +struct ceph_file_layout g_OSD_MDAnchorTableLayout = { + fl_stripe_unit: 1<<20, + fl_stripe_count: 1, + fl_object_size: 1<<20, + fl_pg_type: CEPH_PG_TYPE_REP, + fl_pg_size: 2 +}; #include @@ -266,8 +293,8 @@ md_config_t g_conf = { osd_stat_refresh_interval: .5, osd_pg_bits: 4, // bits per osd - osd_object_layout: OBJECT_LAYOUT_HASHINO,//LINEAR,//HASHINO, - osd_pg_layout: PG_LAYOUT_CRUSH,//LINEAR,//CRUSH, + osd_object_layout: CEPH_OBJECT_LAYOUT_HASHINO,//LINEAR,//HASHINO, + osd_pg_layout: CEPH_PG_LAYOUT_CRUSH,//LINEAR,//CRUSH, osd_max_rep: 4, osd_min_raid_width: 4, osd_max_raid_width: 3, //6, @@ -896,18 +923,18 @@ void parse_config_options(std::vector& args) else if (strcmp(args[i], "--osd_object_layout") == 0) { i++; - if (strcmp(args[i], "linear") == 0) g_conf.osd_object_layout = OBJECT_LAYOUT_LINEAR; - else if (strcmp(args[i], "hashino") == 0) g_conf.osd_object_layout = OBJECT_LAYOUT_HASHINO; - else if (strcmp(args[i], "hash") == 0) g_conf.osd_object_layout = OBJECT_LAYOUT_HASH; + if (strcmp(args[i], "linear") == 0) g_conf.osd_object_layout = CEPH_OBJECT_LAYOUT_LINEAR; + else if (strcmp(args[i], "hashino") == 0) g_conf.osd_object_layout = CEPH_OBJECT_LAYOUT_HASHINO; + else if (strcmp(args[i], "hash") == 0) g_conf.osd_object_layout = CEPH_OBJECT_LAYOUT_HASH; else assert(0); } else if (strcmp(args[i], "--osd_pg_layout") == 0) { i++; - if (strcmp(args[i], "linear") == 0) g_conf.osd_pg_layout = PG_LAYOUT_LINEAR; - else if (strcmp(args[i], "hash") == 0) g_conf.osd_pg_layout = PG_LAYOUT_HASH; - else if (strcmp(args[i], "hybrid") == 0) g_conf.osd_pg_layout = PG_LAYOUT_HYBRID; - else if (strcmp(args[i], "crush") == 0) g_conf.osd_pg_layout = PG_LAYOUT_CRUSH; + if (strcmp(args[i], "linear") == 0) g_conf.osd_pg_layout = CEPH_PG_LAYOUT_LINEAR; + else if (strcmp(args[i], "hash") == 0) g_conf.osd_pg_layout = CEPH_PG_LAYOUT_HASH; + else if (strcmp(args[i], "hybrid") == 0) g_conf.osd_pg_layout = CEPH_PG_LAYOUT_HYBRID; + else if (strcmp(args[i], "crush") == 0) g_conf.osd_pg_layout = CEPH_PG_LAYOUT_CRUSH; else assert(0); } @@ -917,38 +944,38 @@ void parse_config_options(std::vector& args) g_conf.tick = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_unit") == 0) - g_OSD_FileLayout.stripe_unit = atoi(args[++i]); + g_OSD_FileLayout.fl_stripe_unit = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_count") == 0) - g_OSD_FileLayout.stripe_count = atoi(args[++i]); + g_OSD_FileLayout.fl_stripe_count = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_osize") == 0) - g_OSD_FileLayout.object_size = atoi(args[++i]); + g_OSD_FileLayout.fl_object_size = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_pg_type") == 0) - g_OSD_FileLayout.pg_type = atoi(args[++i]); + g_OSD_FileLayout.fl_pg_type = atoi(args[++i]); else if (strcmp(args[i], "--file_layout_pg_size") == 0) - g_OSD_FileLayout.pg_size = atoi(args[++i]); + g_OSD_FileLayout.fl_pg_size = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_unit") == 0) - g_OSD_MDDirLayout.stripe_unit = atoi(args[++i]); + g_OSD_MDDirLayout.fl_stripe_unit = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_scount") == 0) - g_OSD_MDDirLayout.stripe_count = atoi(args[++i]); + g_OSD_MDDirLayout.fl_stripe_count = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_osize") == 0) - g_OSD_MDDirLayout.object_size = atoi(args[++i]); + g_OSD_MDDirLayout.fl_object_size = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_pg_type") == 0) - g_OSD_MDDirLayout.pg_type = atoi(args[++i]); + g_OSD_MDDirLayout.fl_pg_type = atoi(args[++i]); else if (strcmp(args[i], "--meta_dir_layout_pg_size") == 0) - g_OSD_MDDirLayout.pg_size = atoi(args[++i]); + g_OSD_MDDirLayout.fl_pg_size = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_unit") == 0) - g_OSD_MDLogLayout.stripe_unit = atoi(args[++i]); + g_OSD_MDLogLayout.fl_stripe_unit = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_scount") == 0) - g_OSD_MDLogLayout.stripe_count = atoi(args[++i]); + g_OSD_MDLogLayout.fl_stripe_count = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_osize") == 0) - g_OSD_MDLogLayout.object_size = atoi(args[++i]); + g_OSD_MDLogLayout.fl_object_size = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_pg_type") == 0) - g_OSD_MDLogLayout.pg_type = atoi(args[++i]); + g_OSD_MDLogLayout.fl_pg_type = atoi(args[++i]); else if (strcmp(args[i], "--meta_log_layout_pg_size") == 0) { - g_OSD_MDLogLayout.pg_size = atoi(args[++i]); - if (!g_OSD_MDLogLayout.pg_size) + g_OSD_MDLogLayout.fl_pg_size = atoi(args[++i]); + if (!g_OSD_MDLogLayout.fl_pg_size) g_conf.mds_log = false; } diff --git a/trunk/ceph/config.h b/trunk/ceph/config.h index 3c56f6af20941..b5cdf6cbd586d 100644 --- a/trunk/ceph/config.h +++ b/trunk/ceph/config.h @@ -15,10 +15,10 @@ #ifndef __CONFIG_H #define __CONFIG_H -extern class FileLayout g_OSD_FileLayout; -extern class FileLayout g_OSD_MDDirLayout; -extern class FileLayout g_OSD_MDLogLayout; -extern class FileLayout g_OSD_MDAnchorTableLayout; +extern struct ceph_file_layout g_OSD_FileLayout; +extern struct ceph_file_layout g_OSD_MDDirLayout; +extern struct ceph_file_layout g_OSD_MDLogLayout; +extern struct ceph_file_layout g_OSD_MDAnchorTableLayout; #include #include diff --git a/trunk/ceph/include/ceph_fs.h b/trunk/ceph/include/ceph_fs.h new file mode 100644 index 0000000000000..9c1b1faf4eab9 --- /dev/null +++ b/trunk/ceph/include/ceph_fs.h @@ -0,0 +1,119 @@ +/* -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- + * vim: ts=8 sw=8 smarttab + */ + +/* ceph_fs.h + * + * C data types to share between kernel and userspace + */ + +#ifndef _FS_CEPH_CEPH_FS_H +#define _FS_CEPH_CEPH_FS_H + +typedef u64 ceph_ino_t; + +/** + * object id + */ +struct ceph_object { + ceph_ino_t ino; /* inode "file" identifier */ + u32 bno; /* "block" (object) in that "file" */ + u32 rev; /* revision. normally ctime (as epoch). */ +}; +typedef struct ceph_object ceph_object_t; + + + + +/** object layout + * how objects are mapped into PGs + */ +#define CEPH_OBJECT_LAYOUT_HASH 1 +#define CEPH_OBJECT_LAYOUT_LINEAR 2 +#define CEPH_OBJECT_LAYOUT_HASHINO 3 + +/** + * pg layout -- how PGs are mapped into (sets of) OSDs + */ +#define CEPH_PG_LAYOUT_CRUSH 0 +#define CEPH_PG_LAYOUT_HASH 1 +#define CEPH_PG_LAYOUT_LINEAR 2 +#define CEPH_PG_LAYOUT_HYBRID 3 + + +/** + * ceph_file_layout - describe data layout for a file/inode + */ +struct ceph_file_layout { + /* file -> object mapping */ + __u32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple of page size. */ + __u32 fl_stripe_count; /* over this many objects */ + __u32 fl_object_size; /* until objects are this big, then move to new objects */ + + /* object -> pg layout */ + __u8 fl_pg_type; /* pg type; see PG_TYPE_* */ + __u8 fl_pg_size; /* pg size (num replicas, raid stripe width, etc. */ + __u16 __pad; + __u32 fl_preferred; /* preferred primary for pg */ + + /* pg -> disk layout */ + __u32 fl_object_stripe_unit; /* for per-object raid */ +}; +typedef struct ceph_file_layout ceph_file_layout_t; + +#define ceph_file_layout_stripe_width(l) (l.fl_stripe_unit * l.fl_stripe_count) + +/* period = bytes before i start on a new set of objects */ +#define ceph_file_layout_period(l) (l.fl_object_size * l.fl_stripe_count) + + + +/** + * placement group id + */ +#define CEPH_PG_TYPE_REP 1 +#define CEPH_PG_TYPE_RAID4 2 + +union ceph_pg { + u64 pg64; + struct { + s32 preferred; /* preferred primary osd */ + u16 ps; /* placement seed */ + u8 type; + u8 size; + } pg; +}; +typedef union ceph_pg ceph_pg_t; + +#define ceph_pg_is_rep(pg) (pg.pg.type == CEPH_PG_TYPE_REP) +#define ceph_pg_is_raid4(pg) (pg.pg.type == CEPH_PG_TYPE_RAID4) + +/** + * object layout + * + * describe how a given object should be stored. + */ +struct ceph_object_layout { + ceph_pg_t ol_pgid; + __u32 ol_stripe_unit; +}; +typedef struct ceph_object_layout ceph_object_layout_t; + + + +/** + * object extent + */ +struct ceph_object_extent { + ceph_object_t oe_oid; + u64 oe_start; + u64 oe_length; + ceph_object_layout_t oe_object_layout; + + /* buffer extent reverse mapping? */ +}; +typedef ceph_object_extent ceph_object_extent_t; + + + +#endif diff --git a/trunk/ceph/include/ceph_inttypes.h b/trunk/ceph/include/ceph_inttypes.h index c31c76ace1c5d..3c4679e728ad1 100644 --- a/trunk/ceph/include/ceph_inttypes.h +++ b/trunk/ceph/include/ceph_inttypes.h @@ -1,8 +1,30 @@ #ifndef __CEPH_INTTYPES_H #define __CEPH_INTTYPES_H +/* + * these are int types defined in the kernel. + * this header should be included prior to ceph_fs.h when used from userspace. + * i suspect kernel_compat.h (or whatever) serves a similar purpose? + */ + +typedef uint32_t __u64; typedef uint32_t __u32; typedef uint16_t __u16; typedef uint8_t __u8; +typedef int32_t __s64; +typedef int32_t __s32; +typedef int16_t __s16; +typedef int8_t __s8; + +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint16_t u8; + +typedef int64_t s64; +typedef int32_t s32; +typedef int16_t s16; +typedef int16_t s8; + #endif diff --git a/trunk/ceph/include/types.h b/trunk/ceph/include/types.h index 92bcb94c6dc5f..39a94c4dfb809 100644 --- a/trunk/ceph/include/types.h +++ b/trunk/ceph/include/types.h @@ -37,6 +37,10 @@ using namespace std; using namespace __gnu_cxx; +#include "ceph_inttypes.h" +#include "ceph_fs.h" + + #include "object.h" #include "utime.h" @@ -111,70 +115,14 @@ typedef uint32_t epoch_t; // map epoch (32bits -> 13 epochs/second for 10 #define O_LAZY 01000000 -/** object layout - * how objects are mapped into PGs - */ -#define OBJECT_LAYOUT_HASH 1 -#define OBJECT_LAYOUT_LINEAR 2 -#define OBJECT_LAYOUT_HASHINO 3 - -/** pg layout - * how PGs are mapped into (sets of) OSDs - */ -#define PG_LAYOUT_CRUSH 0 -#define PG_LAYOUT_HASH 1 -#define PG_LAYOUT_LINEAR 2 -#define PG_LAYOUT_HYBRID 3 - - - -// ----------------------- -// FileLayout - -/** FileLayout - * specifies a striping and replication strategy - */ - -//#define FILE_LAYOUT_CRUSH 0 // stripe via crush -//#define FILE_LAYOUT_LINEAR 1 // stripe linearly across cluster - -struct FileLayout { - // -- file -> object mapping -- - int32_t stripe_unit; // stripe unit, in bytes - int32_t stripe_count; // over this many objects - int32_t object_size; // until objects are this big, then move to new objects - - int stripe_width() { return stripe_unit * stripe_count; } - - // period = bytes before i start on a new set of objects. - int period() { return object_size * stripe_count; } - - // -- object -> pg layout -- - char pg_type; // pg type (replicated, raid, etc.) (see pg_t::TYPE_*) - char pg_size; // pg size (num replicas, or raid4 stripe width) - int32_t preferred; // preferred primary osd? - - // -- pg -> disk layout -- - int32_t object_stripe_unit; // for per-object raid - - FileLayout() { } - FileLayout(int su, int sc, int os, int pgt, int pgs, int o=-1) : - stripe_unit(su), stripe_count(sc), object_size(os), - pg_type(pgt), pg_size(pgs), preferred(o), - object_stripe_unit(su) // note: bad default, we pbly want su/(pgs-1) - { - assert(object_size % stripe_unit == 0); - } - -}; - +typedef ceph_file_layout FileLayout; // -------------------------------------- // inode -typedef uint64_t _inodeno_t; +typedef ceph_ino_t _inodeno_t; struct inodeno_t { _inodeno_t val; @@ -228,14 +176,6 @@ namespace __gnu_cxx { inline int DT_TO_MODE(int dt) { return dt << 12; - /* - switch (dt) { - case DT_REG: return INODE_MODE_FILE; - case DT_DIR: return INODE_MODE_DIR; - case DT_LNK: return INODE_MODE_SYMLINK; - default: assert(0); return 0; - } - */ } struct inode_t { @@ -278,13 +218,6 @@ struct inode_t { inline unsigned char MODE_TO_DT(int mode) { return mode >> 12; - /* - if (S_ISREG(mode)) return inode_t::DT_REG; - if (S_ISLNK(mode)) return inode_t::DT_LNK; - if (S_ISDIR(mode)) return inode_t::DT_DIR; - assert(0); - return 0; - */ } diff --git a/trunk/ceph/kernel/ceph_fs.h b/trunk/ceph/kernel/ceph_fs.h index 5804b495a3907..94ec93814f65d 100644 --- a/trunk/ceph/kernel/ceph_fs.h +++ b/trunk/ceph/kernel/ceph_fs.h @@ -8,29 +8,12 @@ /* #include */ #include "kmsg.h" - -#include "mdsmap.h" #include "monmap.h" - -/* do these later -#include "osdmap.h" -*/ -struct ceph_osdmap; +#include "mds_client.h" +#include "osd_client.h" -/* - * state associated with an individual MDS<->client session - */ -struct ceph_mds_session { - __u64 s_push_seq; - /* wait queue? */ -}; - -struct ceph_mds_request { - -}; - /* * CEPH file system in-core superblock info */ @@ -39,25 +22,17 @@ struct ceph_sb_info { struct ceph_kmsg *s_kmsg; /* messenger instance */ struct ceph_monmap *s_monmap; /* monitor map */ - struct ceph_mdsmap *s_mdsmap; /* mds map */ - struct ceph_osdmap *s_osdmap; /* osd map */ - /* mds sessions */ - struct ceph_mds_session **s_mds_sessions; /* sparse array; elements NULL if no session */ - int s_max_mds_sessions; /* size of s_mds_sessions array */ + struct ceph_mds_client *s_mds_client; + struct ceph_osd_client *s_osd_client; - - - /* current requests */ - /* ... */ - __u64 last_tid; }; /* * CEPH file system in-core inode info */ struct ceph_inode_info { - unsigned long val; /* inode from types.h is uint64_t */ + struct ceph_file_layout i_layout; struct inode vfs_inode; }; diff --git a/trunk/ceph/kernel/mds_client.h b/trunk/ceph/kernel/mds_client.h new file mode 100644 index 0000000000000..b6860481c1769 --- /dev/null +++ b/trunk/ceph/kernel/mds_client.h @@ -0,0 +1,43 @@ +/* -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- + * vim: ts=8 sw=8 smarttab + */ + +#ifndef _FS_CEPH_MDS_CLIENT_H +#define _FS_CEPH_MDS_CLIENT_H + +/* + * state associated with an individual MDS<->client session + */ +struct ceph_mds_session { + __u64 s_push_seq; + /* wait queue? */ +}; + +struct ceph_mds_request { + __u64 r_tid; + ceph_message *r_msg; + __u8 r_idempotent; + + __u32 r_mds[4]; /* set of mds's with whom request may be outstanding */ + __u32 r_num_mds; /* items in r_mds */ + + __u32 r_num_fwd; /* number of forward attempts */ + __s32 r_resend_mds; /* mds to resend to next, if any*/ + + /* waiter/callback? */ +}; + + +struct ceph_mds_client { + struct ceph_mdsmap *s_mdsmap; /* mds map */ + + /* mds sessions */ + struct ceph_mds_session **s_mds_sessions; /* sparse array; elements NULL if no session */ + int s_max_mds_sessions; /* size of s_mds_sessions array */ + + __u64 s_last_mds_tid; /* id of last mds request */ + struct radix_tree s_mds_requests; /* in-flight mds requests */ + +}; + +#endif diff --git a/trunk/ceph/kernel/osd_client.h b/trunk/ceph/kernel/osd_client.h new file mode 100644 index 0000000000000..b2031cf4e9783 --- /dev/null +++ b/trunk/ceph/kernel/osd_client.h @@ -0,0 +1,22 @@ +/* -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*- + * vim: ts=8 sw=8 smarttab + */ + +#ifndef _FS_CEPH_OSD_CLIENT_H +#define _FS_CEPH_OSD_CLIENT_H + +/* this will be equivalent to osdc/Objecter.h */ + + +/* do these later +#include "osdmap.h" +*/ +struct ceph_osdmap; + + +struct ceph_osd_client { + struct ceph_osdmap *s_osdmap; /* osd map */ + +}; + +#endif diff --git a/trunk/ceph/mds/ClientMap.cc b/trunk/ceph/mds/ClientMap.cc index 5170f3fe9b3eb..1d781b9ba48c3 100644 --- a/trunk/ceph/mds/ClientMap.cc +++ b/trunk/ceph/mds/ClientMap.cc @@ -61,7 +61,7 @@ void ClientMap::load(Context *onload) C_CM_Load *c = new C_CM_Load(this); mds->filer->read(inode, - 0, inode.layout.stripe_unit, + 0, inode.layout.fl_stripe_unit, &c->bl, c); diff --git a/trunk/ceph/mds/IdAllocator.cc b/trunk/ceph/mds/IdAllocator.cc index 3a490c48c263d..36a36ea9eb037 100644 --- a/trunk/ceph/mds/IdAllocator.cc +++ b/trunk/ceph/mds/IdAllocator.cc @@ -174,7 +174,7 @@ void IdAllocator::load(Context *onfinish) C_ID_Load *c = new C_ID_Load(this, onfinish); mds->filer->read(inode, - 0, inode.layout.stripe_unit, + 0, inode.layout.fl_stripe_unit, &c->bl, c); } diff --git a/trunk/ceph/mds/MDLog.cc b/trunk/ceph/mds/MDLog.cc index fc7cdffbe6e10..5dd19ff5ee652 100644 --- a/trunk/ceph/mds/MDLog.cc +++ b/trunk/ceph/mds/MDLog.cc @@ -82,7 +82,7 @@ void MDLog::init_journaler() log_inode.layout = g_OSD_MDLogLayout; if (g_conf.mds_local_osd) - log_inode.layout.preferred = mds->get_nodeid() + g_conf.mds_local_osd_offset; // hack + log_inode.layout.fl_preferred = mds->get_nodeid() + g_conf.mds_local_osd_offset; // hack // log streamer if (journaler) delete journaler; @@ -191,8 +191,8 @@ void MDLog::submit_entry( LogEvent *le, Context *c ) off_t last_seg = get_last_segment_offset(); if (!segments.empty() && !writing_subtree_map && - (journaler->get_write_pos() / log_inode.layout.period()) != (last_seg / log_inode.layout.period()) && - (journaler->get_write_pos() - last_seg > log_inode.layout.period()/2)) { + (journaler->get_write_pos() / ceph_file_layout_period(log_inode.layout) != (last_seg / ceph_file_layout_period(log_inode.layout)) && + (journaler->get_write_pos() - last_seg > ceph_file_layout_period(log_inode.layout)/2))) { dout(10) << "submit_entry also starting new segment: last = " << last_seg << ", cur pos = " << journaler->get_write_pos() << dendl; start_new_segment(); diff --git a/trunk/ceph/msg/ceph_msg_types.h b/trunk/ceph/msg/ceph_msg_types.h index 559c972a02bf8..35e046d00aa9e 100644 --- a/trunk/ceph/msg/ceph_msg_types.h +++ b/trunk/ceph/msg/ceph_msg_types.h @@ -24,9 +24,9 @@ struct ceph_entity_name { * ipv4 only for now */ struct ceph_entity_addr { - __u8 ipq[4]; + __u64 nonce; __u32 port; - __u32 nonce; + __u8 ipq[4]; }; diff --git a/trunk/ceph/msg/msg_types.h b/trunk/ceph/msg/msg_types.h index 652525729cdfc..440f7964288fb 100644 --- a/trunk/ceph/msg/msg_types.h +++ b/trunk/ceph/msg/msg_types.h @@ -111,11 +111,9 @@ namespace __gnu_cxx { */ struct entity_addr_t { struct ceph_entity_addr v; - uint32_t _pad; - entity_addr_t() : _pad(0) { - v.port = v.nonce = 0; - v.ipq[0] = v.ipq[1] = v.ipq[2] = v.ipq[3] = 0; + entity_addr_t() { + memset(&v, 0, sizeof(v)); } void set_addr(tcpaddr_t a) { diff --git a/trunk/ceph/osd/OSDMap.h b/trunk/ceph/osd/OSDMap.h index b50f725687d23..b644c42fe9cc9 100644 --- a/trunk/ceph/osd/OSDMap.h +++ b/trunk/ceph/osd/OSDMap.h @@ -316,7 +316,7 @@ private: // oid -> pg ObjectLayout file_to_object_layout(object_t oid, FileLayout& layout) { - return make_object_layout(oid, layout.pg_type, layout.pg_size, layout.preferred, layout.object_stripe_unit); + return make_object_layout(oid, layout.fl_pg_type, layout.fl_pg_size, layout.fl_preferred, layout.fl_object_stripe_unit); } ObjectLayout make_object_layout(object_t oid, int pg_type, int pg_size, int preferred=-1, int object_stripe_unit = 0) { @@ -328,16 +328,16 @@ private: // calculate ps (placement seed) ps_t ps; switch (g_conf.osd_object_layout) { - case OBJECT_LAYOUT_LINEAR: + case CEPH_OBJECT_LAYOUT_LINEAR: ps = stable_mod(oid.bno + oid.ino, num, num_mask); break; - case OBJECT_LAYOUT_HASHINO: + case CEPH_OBJECT_LAYOUT_HASHINO: //ps = stable_mod(oid.bno + H(oid.bno+oid.ino)^H(oid.ino>>32), num, num_mask); ps = stable_mod(oid.bno + H(oid.ino)^H(oid.ino>>32), num, num_mask); break; - case OBJECT_LAYOUT_HASH: + case CEPH_OBJECT_LAYOUT_HASH: //ps = stable_mod(H( (oid.bno & oid.ino) ^ ((oid.bno^oid.ino) >> 32) ), num, num_mask); //ps = stable_mod(H(oid.bno) + H(oid.ino)^H(oid.ino>>32), num, num_mask); //ps = stable_mod(oid.bno + H(oid.bno+oid.ino)^H(oid.bno+oid.ino>>32), num, num_mask); @@ -361,7 +361,7 @@ private: vector& osds) { // list of osd addr's // map to osds[] switch (g_conf.osd_pg_layout) { - case PG_LAYOUT_CRUSH: + case CEPH_PG_LAYOUT_CRUSH: { // what crush rule? int rule; @@ -382,12 +382,12 @@ private: } break; - case PG_LAYOUT_LINEAR: + case CEPH_PG_LAYOUT_LINEAR: for (int i=0; i= 0 && - g_conf.osd_pg_layout != PG_LAYOUT_CRUSH) { + g_conf.osd_pg_layout != CEPH_PG_LAYOUT_CRUSH) { int osd = pg.preferred(); // already in there? diff --git a/trunk/ceph/osd/osd_types.h b/trunk/ceph/osd/osd_types.h index 08292252934ec..c46797d950f10 100644 --- a/trunk/ceph/osd/osd_types.h +++ b/trunk/ceph/osd/osd_types.h @@ -82,8 +82,8 @@ typedef uint8_t pruleset_t; // placement group id struct pg_t { public: - static const int TYPE_REP = 1; - static const int TYPE_RAID4 = 2; + static const int TYPE_REP = CEPH_PG_TYPE_REP; + static const int TYPE_RAID4 = CEPH_PG_TYPE_RAID4; private: union { @@ -282,14 +282,12 @@ class ObjectExtent { off_t start; // in object size_t length; // in object - objectrev_t rev; // which revision? - ObjectLayout layout; // object layout (pgid, etc.) map buffer_extents; // off -> len. extents in buffer being mapped (may be fragmented bc of striping!) - ObjectExtent() : start(0), length(0), rev(0) {} - ObjectExtent(object_t o, off_t s=0, size_t l=0) : oid(o), start(s), length(l), rev(0) { } + ObjectExtent() : start(0), length(0) {} + ObjectExtent(object_t o, off_t s=0, size_t l=0) : oid(o), start(s), length(l) { } }; inline ostream& operator<<(ostream& out, ObjectExtent &ex) diff --git a/trunk/ceph/osdc/Filer.cc b/trunk/ceph/osdc/Filer.cc index 5d13174d55952..193089d3915b1 100644 --- a/trunk/ceph/osdc/Filer.cc +++ b/trunk/ceph/osdc/Filer.cc @@ -57,7 +57,7 @@ int Filer::probe_fwd(inode_t& inode, Probe *probe = new Probe(inode, start_from, end, onfinish); // period (bytes before we jump unto a new set of object(s)) - off_t period = inode.layout.period(); + off_t period = ceph_file_layout_period(inode.layout); // start with 1+ periods. probe->probing_len = period; @@ -132,7 +132,7 @@ void Filer::_probed(Probe *probe, object_t oid, off_t size) if (end == 0) { // keep probing! dout(10) << "_probed didn't find end, probing further" << dendl; - off_t period = probe->inode.layout.object_size * probe->inode.layout.stripe_count; + off_t period = probe->inode.layout.fl_object_size * probe->inode.layout.fl_stripe_count; probe->from += probe->probing_len; probe->probing_len = period; _probe(probe); @@ -170,36 +170,35 @@ void Filer::file_to_extents(inode_t inode, */ map< object_t, ObjectExtent > object_extents; - assert(inode.layout.object_size >= inode.layout.stripe_unit); - off_t stripes_per_object = inode.layout.object_size / inode.layout.stripe_unit; + assert(inode.layout.fl_object_size >= inode.layout.fl_stripe_unit); + off_t stripes_per_object = inode.layout.fl_object_size / inode.layout.fl_stripe_unit; dout(20) << " stripes_per_object " << stripes_per_object << dendl; off_t cur = offset; off_t left = len; while (left > 0) { // layout into objects - off_t blockno = cur / inode.layout.stripe_unit; // which block - off_t stripeno = blockno / inode.layout.stripe_count; // which horizontal stripe (Y) - off_t stripepos = blockno % inode.layout.stripe_count; // which object in the object set (X) + off_t blockno = cur / inode.layout.fl_stripe_unit; // which block + off_t stripeno = blockno / inode.layout.fl_stripe_count; // which horizontal stripe (Y) + off_t stripepos = blockno % inode.layout.fl_stripe_count; // which object in the object set (X) off_t objectsetno = stripeno / stripes_per_object; // which object set - off_t objectno = objectsetno * inode.layout.stripe_count + stripepos; // object id + off_t objectno = objectsetno * inode.layout.fl_stripe_count + stripepos; // object id // find oid, extent ObjectExtent *ex = 0; - object_t oid( inode.ino, objectno ); + object_t oid( inode.ino, objectno, rev ); if (object_extents.count(oid)) ex = &object_extents[oid]; else { ex = &object_extents[oid]; ex->oid = oid; - ex->rev = rev; ex->layout = objecter->osdmap->file_to_object_layout( oid, inode.layout ); } // map range into object - off_t block_start = (stripeno % stripes_per_object)*inode.layout.stripe_unit; - off_t block_off = cur % inode.layout.stripe_unit; - off_t max = inode.layout.stripe_unit - block_off; + off_t block_start = (stripeno % stripes_per_object)*inode.layout.fl_stripe_unit; + off_t block_off = cur % inode.layout.fl_stripe_unit; + off_t max = inode.layout.fl_stripe_unit - block_off; off_t x_offset = block_start + block_off; off_t x_len; diff --git a/trunk/ceph/osdc/Journaler.cc b/trunk/ceph/osdc/Journaler.cc index c2719549e2247..363b7c60de9aa 100644 --- a/trunk/ceph/osdc/Journaler.cc +++ b/trunk/ceph/osdc/Journaler.cc @@ -31,7 +31,7 @@ void Journaler::reset() state = STATE_ACTIVE; write_pos = flush_pos = ack_pos = read_pos = requested_pos = received_pos = - expire_pos = trimming_pos = trimmed_pos = inode.layout.period(); + expire_pos = trimming_pos = trimmed_pos = ceph_file_layout_period(inode.layout); } @@ -239,7 +239,7 @@ off_t Journaler::append_entry(bufferlist& bl, Context *onsync) if (!g_conf.journaler_allow_split_entries) { // will we span a stripe boundary? - int p = inode.layout.stripe_unit; + int p = inode.layout.fl_stripe_unit; if (write_pos / p != (write_pos + (off_t)(bl.length() + sizeof(s))) / p) { // yes. // move write_pos forward. @@ -613,7 +613,7 @@ public: void Journaler::trim() { off_t trim_to = last_committed.expire_pos; - trim_to -= trim_to % inode.layout.period(); + trim_to -= trim_to % ceph_file_layout_period(inode.layout); dout(10) << "trim last_commited head was " << last_committed << ", can trim to " << trim_to << dendl; diff --git a/trunk/ceph/osdc/Journaler.h b/trunk/ceph/osdc/Journaler.h index 6463d9caf0e6f..a90ec5f9e348f 100644 --- a/trunk/ceph/osdc/Journaler.h +++ b/trunk/ceph/osdc/Journaler.h @@ -183,7 +183,7 @@ public: // prefetch intelligently. // (watch out, this is big if you use big objects or weird striping) if (!fetch_len) - fetch_len = inode.layout.object_size*inode.layout.stripe_count * + fetch_len = inode.layout.fl_object_size*inode.layout.fl_stripe_count * g_conf.journaler_prefetch_periods; if (!prefetch_from) prefetch_from = fetch_len / 2; diff --git a/trunk/ceph/osdc/Objecter.cc b/trunk/ceph/osdc/Objecter.cc index e6efee1aa4a33..84563b0af9720 100644 --- a/trunk/ceph/osdc/Objecter.cc +++ b/trunk/ceph/osdc/Objecter.cc @@ -320,13 +320,11 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) // stat ----------------------------------- -tid_t Objecter::stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish, - objectrev_t rev) +tid_t Objecter::stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish) { OSDStat *st = new OSDStat(size); st->extents.push_back(ObjectExtent(oid, 0, 0)); st->extents.front().layout = ol; - st->extents.front().rev = rev; st->onfinish = onfinish; return stat_submit(st); @@ -424,14 +422,12 @@ void Objecter::handle_osd_stat_reply(MOSDOpReply *m) // read ----------------------------------- -tid_t Objecter::read(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist *bl, - Context *onfinish, - objectrev_t rev) +tid_t Objecter::read(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist *bl, + Context *onfinish) { OSDRead *rd = new OSDRead(bl); rd->extents.push_back(ObjectExtent(oid, off, len)); rd->extents.front().layout = ol; - rd->extents.front().rev = rev; readx(rd, onfinish); return last_tid; } @@ -665,14 +661,12 @@ void Objecter::handle_osd_read_reply(MOSDOpReply *m) // write ------------------------------------ tid_t Objecter::write(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist &bl, - Context *onack, Context *oncommit, - objectrev_t rev) + Context *onack, Context *oncommit) { OSDWrite *wr = new OSDWrite(bl); wr->extents.push_back(ObjectExtent(oid, off, len)); wr->extents.front().layout = ol; wr->extents.front().buffer_extents[0] = len; - wr->extents.front().rev = rev; modifyx(wr, onack, oncommit); return last_tid; } @@ -681,13 +675,11 @@ tid_t Objecter::write(object_t oid, off_t off, size_t len, ObjectLayout ol, buff // zero tid_t Objecter::zero(object_t oid, off_t off, size_t len, ObjectLayout ol, - Context *onack, Context *oncommit, - objectrev_t rev) + Context *onack, Context *oncommit) { OSDModify *z = new OSDModify(OSD_OP_ZERO); z->extents.push_back(ObjectExtent(oid, off, len)); z->extents.front().layout = ol; - z->extents.front().rev = rev; modifyx(z, onack, oncommit); return last_tid; } @@ -760,7 +752,6 @@ tid_t Objecter::modifyx_submit(OSDModify *wr, ObjectExtent &ex, tid_t usetid) wr->op); m->set_length(ex.length); m->set_offset(ex.start); - m->set_rev(ex.rev); if (usetid > 0) m->set_retry_attempt(true); diff --git a/trunk/ceph/osdc/Objecter.h b/trunk/ceph/osdc/Objecter.h index ed5c44745604e..82a437aa04f8d 100644 --- a/trunk/ceph/osdc/Objecter.h +++ b/trunk/ceph/osdc/Objecter.h @@ -213,16 +213,12 @@ class Objecter { // even lazier tid_t read(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist *bl, - Context *onfinish, - objectrev_t rev=0); + Context *onfinish); tid_t write(object_t oid, off_t off, size_t len, ObjectLayout ol, bufferlist &bl, - Context *onack, Context *oncommit, - objectrev_t rev=0); + Context *onack, Context *oncommit); tid_t zero(object_t oid, off_t off, size_t len, ObjectLayout ol, - Context *onack, Context *oncommit, - objectrev_t rev=0); - tid_t stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish, - objectrev_t rev=0); + Context *onack, Context *oncommit); + tid_t stat(object_t oid, off_t *size, ObjectLayout ol, Context *onfinish); tid_t lock(int op, object_t oid, ObjectLayout ol, Context *onack, Context *oncommit); -- 2.39.5