]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: more pg_pool, pg changes
authorSage Weil <sage@newdream.net>
Thu, 30 Apr 2009 16:44:39 +0000 (09:44 -0700)
committerSage Weil <sage@newdream.net>
Thu, 30 Apr 2009 16:44:39 +0000 (09:44 -0700)
Remove 'type' from pg_t.  It's implied based on the pool.

Clean out file_layout.  The pool implies a type and namespace.

src/client/SyntheticClient.cc
src/config.cc
src/include/ceph_fs.h
src/kernel/osd_client.c
src/kernel/osdmap.c
src/mds/mdstypes.h
src/mon/PGMonitor.cc
src/osd/OSD.cc
src/osd/OSDMap.h
src/osd/osd_types.h

index 53be64de6b27cd3c315ab3de6489b094581af404..db10860889ff7127e1cb54c3401402f5aa6c95ad 100644 (file)
@@ -1345,7 +1345,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only)
       int64_t ol = t.get_int();
       object_t oid(oh, ol);
       lock.Lock();
-      ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2, 0);
+      ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE);
       __u64 size;
       client->objecter->stat(oid, layout, &size, 0, new C_SafeCond(&lock, &cond, &ack));
       while (!ack) cond.Wait(lock);
@@ -1358,7 +1358,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only)
       int64_t len = t.get_int();
       object_t oid(oh, ol);
       lock.Lock();
-      ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2, 0);
+      ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE);
       bufferlist bl;
       client->objecter->read(oid, layout, off, len, &bl, 0, new C_SafeCond(&lock, &cond, &ack));
       while (!ack) cond.Wait(lock);
@@ -1371,7 +1371,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only)
       int64_t len = t.get_int();
       object_t oid(oh, ol);
       lock.Lock();
-      ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2, 0);
+      ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE);
       bufferptr bp(len);
       bufferlist bl;
       bl.push_back(bp);
@@ -1389,7 +1389,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only)
       int64_t len = t.get_int();
       object_t oid(oh, ol);
       lock.Lock();
-      ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 2, 0);
+      ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE);
       SnapContext snapc;
       client->objecter->zero(oid, layout, off, len, snapc, g_clock.now(), 0,
                             new C_SafeCond(&lock, &cond, &ack),
@@ -2146,7 +2146,7 @@ int SyntheticClient::create_objects(int nobj, int osize, int inflight)
     if (time_to_stop()) break;
 
     object_t oid(0x1000, i);
-    ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 0);
+    ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE);
     SnapContext snapc;
     
     if (i % inflight == 0) {
@@ -2249,7 +2249,7 @@ int SyntheticClient::object_rw(int nobj, int osize, int wrpc,
     }
     object_t oid(0x1000, o);
 
-    ceph_object_layout layout = client->osdmap->make_object_layout(oid, pg_t::TYPE_REP, 0);
+    ceph_object_layout layout = client->osdmap->make_object_layout(oid, CEPH_CASDATA_RULE);
     SnapContext snapc;
     
     client->client_lock.Lock();
index edf553409911da941521cfbaa3f59b9dc1786489..21ee85b8f6c237499371d8c32ba602b74d1e34a6 100644 (file)
@@ -85,7 +85,6 @@ struct ceph_file_layout g_default_file_layout = {
  fl_cas_hash: init_le32(0),
  fl_object_stripe_unit: init_le32(0),
  fl_pg_preferred: init_le32(-1),
- fl_pg_type: CEPH_PG_TYPE_REP,
  fl_pg_pool: {CEPH_DATA_RULE},
 };
 
@@ -96,7 +95,6 @@ struct ceph_file_layout g_default_casdata_layout = {
  fl_cas_hash: init_le32(0),
  fl_object_stripe_unit: init_le32(0),
  fl_pg_preferred: init_le32(-1),
- fl_pg_type: CEPH_PG_TYPE_REP,
  fl_pg_pool: {CEPH_CASDATA_RULE},
 };
 
@@ -107,7 +105,6 @@ struct ceph_file_layout g_default_mds_dir_layout = {
  fl_cas_hash: init_le32(0),
  fl_object_stripe_unit: init_le32(0),
  fl_pg_preferred: init_le32(-1),
- fl_pg_type: CEPH_PG_TYPE_REP,
  fl_pg_pool: {CEPH_METADATA_RULE},
 };
 
@@ -118,7 +115,6 @@ struct ceph_file_layout g_default_mds_log_layout = {
  fl_cas_hash: init_le32(0),
  fl_object_stripe_unit: init_le32(0),
  fl_pg_preferred: init_le32(-1),
- fl_pg_type: CEPH_PG_TYPE_REP,
  fl_pg_pool: {CEPH_METADATA_RULE},
 };
 
@@ -129,7 +125,6 @@ struct ceph_file_layout g_default_mds_anchortable_layout = {
  fl_cas_hash: init_le32(0),
  fl_object_stripe_unit: init_le32(0),
  fl_pg_preferred: init_le32(-1),
- fl_pg_type: CEPH_PG_TYPE_REP,
  fl_pg_pool: {CEPH_METADATA_RULE},
 };
 
index e79f583611fb126c16a05b542a0fd6da9ac170d1..122c9b01770cc96ef42c90b235da4f2d32f18d44 100644 (file)
@@ -43,7 +43,7 @@
 #define CEPH_OSD_PROTOCOL     5 /* cluster internal */
 #define CEPH_MDS_PROTOCOL     9 /* cluster internal */
 #define CEPH_MON_PROTOCOL     4 /* cluster internal */
-#define CEPH_OSDC_PROTOCOL    7 /* public/client */
+#define CEPH_OSDC_PROTOCOL    8 /* public/client */
 #define CEPH_MDSC_PROTOCOL   18 /* public/client */
 #define CEPH_MONC_PROTOCOL   11 /* public/client */
 
@@ -264,9 +264,7 @@ struct ceph_file_layout {
 
        /* object -> pg layout */
        __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
-       __u8   fl_pg_type;
-       __le16 fl_pg_pool;      /* implies crush ruleset, rep level */
-       __le16 fl_pg_ns;        /* object namespace */
+       __le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
 } __attribute__ ((packed));
 
 #define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
@@ -297,9 +295,7 @@ union ceph_pg {
        struct {
                __s16 preferred; /* preferred primary osd */
                __u16 ps;        /* placement seed */
-               __u16 pool;      /* implies crush ruleset */
-               __u8 type;
-               __u8 __pad;
+               __u32 pool;      /* implies crush ruleset */
        } pg;
 } __attribute__ ((packed));
 
index 1fc0e0958dd813ab7fda3b1dbae236a5285a3b1a..2022b8cf47ec082bd30e790f5b6b78e5838f7f87 100644 (file)
@@ -333,10 +333,10 @@ static int map_osds(struct ceph_osd_client *osdc,
                return -1;
        pool = &osdc->osdmap->pg_pool[req->r_pgid.pg.pool];
        ruleno = crush_find_rule(osdc->osdmap->crush, pool->v.crush_ruleset,
-                                req->r_pgid.pg.type, pool->v.size);
+                                pool->v.type, pool->v.size);
        if (ruleno < 0) {
                derr(0, "map_osds no crush rule for pool %d type %d size %d\n",
-                    req->r_pgid.pg.pool, req->r_pgid.pg.type, pool->v.size);
+                    req->r_pgid.pg.pool, pool->v.type, pool->v.size);
                return -1;
        }
 
index 7030ef80f17a44a544b839b96956ed22e78ce534..4b3c43573feac673e0d1715da5ddbeea9be89869 100644 (file)
@@ -668,7 +668,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
        pgid.pg64 = 0;   /* start with it zeroed out */
        pgid.pg.ps = bno + crush_hash32_2(ino, ino>>32);
        pgid.pg.preferred = preferred;
-       pgid.pg.type = fl->fl_pg_type;
        pgid.pg.pool = fl->fl_pg_pool;
 
        ol->ol_pgid = cpu_to_le64(pgid.pg64);
index 005d920444d2cf436be5305b9d19670fb0548ed8..5cde0b26c0b38a9fa2a5dbb516c78115a8786445 100644 (file)
@@ -18,7 +18,7 @@ using namespace std;
 #include "include/xlist.h"
 #include "include/nstring.h"
 
-#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v002"
+#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v003"
 
 
 #define MDS_REF_SET      // define me for improved debug output, sanity checking
index e606932b927ec1527c1e0858b566272f7855e652..0738e51cfa05ae886169c46964e8045cfb77ac9a 100644 (file)
@@ -435,7 +435,6 @@ bool PGMonitor::register_new_pgs()
        p++) {
     int poolid = p->first;
     pg_pool_t &pool = p->second;
-    int type = pool.get_type();
     int ruleno = pool.get_crush_ruleset();
     if (!mon->osdmon()->osdmap.crush.rule_exists(ruleno)) 
       continue;
@@ -449,7 +448,7 @@ bool PGMonitor::register_new_pgs()
     dout(10) << "register_new_pgs scanning " << pool << dendl;
 
     for (ps_t ps = 0; ps < pool.get_pg_num(); ps++) {
-      pg_t pgid(type, ps, poolid, -1);
+      pg_t pgid(ps, poolid, -1);
       if (pg_map.pg_stat.count(pgid)) {
        dout(20) << "register_new_pgs  have " << pgid << dendl;
        continue;
index 461586e573e20136d6119b95133b6590f39352c4..11149fc7ef092621be365d4a73da27f1595d374e 100644 (file)
@@ -552,7 +552,7 @@ PG *OSD::_open_lock_pg(pg_t pgid)
 
   // create
   PG *pg;
-  if (pgid.is_rep())
+  if (osdmap->get_pg_type(pgid) == CEPH_PG_TYPE_REP)
     pg = new ReplicatedPG(this, pgid);
   //else if (pgid.is_raid4())
   //pg = new RAID4PG(this, pgid);
@@ -2457,8 +2457,7 @@ void OSD::split_pg(PG *parent, map<pg_t,PG*>& children, ObjectStore::Transaction
 
   for (vector<pobject_t>::iterator p = olist.begin(); p != olist.end(); p++) {
     pobject_t poid = *p;
-    ceph_object_layout l = osdmap->make_object_layout(poid.oid, parentid.type(), 
-                                                     parentid.pool(), parentid.preferred());
+    ceph_object_layout l = osdmap->make_object_layout(poid.oid, parentid.pool(), parentid.preferred());
     if (le64_to_cpu(l.ol_pgid) != parentid.u.pg64) {
       pg_t pgid(le64_to_cpu(l.ol_pgid));
       dout(20) << "  moving " << poid << " from " << parentid << " -> " << pgid << dendl;
index 34c4e8375b9c0cc746f34a3f0c8102f1f92e38cc..58264419c45e885ff8285460e441d7198b8f517a 100644 (file)
@@ -620,13 +620,12 @@ private:
 
   // oid -> pg
   ceph_object_layout file_to_object_layout(object_t oid, ceph_file_layout& layout) {
-    return make_object_layout(oid, layout.fl_pg_type,
-                             layout.fl_pg_pool,
+    return make_object_layout(oid, layout.fl_pg_pool,
                              ceph_file_layout_pg_preferred(layout),
                              ceph_file_layout_object_su(layout));
   }
 
-  ceph_object_layout make_object_layout(object_t oid, int pg_type, int pg_pool, int preferred=-1, int object_stripe_unit = 0) {
+  ceph_object_layout make_object_layout(object_t oid, int pg_pool, int preferred=-1, int object_stripe_unit = 0) {
     // calculate ps (placement seed)
     ps_t ps;  // NOTE: keep full precision, here!
     switch (g_conf.osd_object_layout) {
@@ -653,7 +652,7 @@ private:
     //cout << "preferred " << preferred << " num " << num << " mask " << num_mask << " ps " << ps << endl;
 
     // construct object layout
-    pg_t pgid = pg_t(pg_type, ps, pg_pool, preferred);
+    pg_t pgid = pg_t(ps, pg_pool, preferred);
     ceph_object_layout layout;
     layout.ol_pgid = pgid.u.pg64;
     layout.ol_stripe_unit = object_stripe_unit;
@@ -675,7 +674,7 @@ private:
     case CEPH_PG_LAYOUT_CRUSH:
       {
        // what crush rule?
-       int ruleno = crush.find_rule(pool.get_crush_ruleset(), pg.type(), size);
+       int ruleno = crush.find_rule(pool.get_crush_ruleset(), pool.get_type(), size);
        if (ruleno >= 0)
          crush.do_rule(ruleno, pps, osds, size, pg.preferred(), osd_weight);
       }
@@ -769,11 +768,21 @@ private:
 
 
 
+  const pg_pool_t& get_pg_pool(int p) {
+    assert(pools.count(p));
+    return pools[p];
+  }
   unsigned get_pg_size(pg_t pg) {
     assert(pools.count(pg.pool()));
     pg_pool_t &pool = pools[pg.pool()];
     return pool.get_size();
   }
+  int get_pg_type(pg_t pg) {
+    assert(pools.count(pg.pool()));
+    pg_pool_t &pool = pools[pg.pool()];
+    return pool.get_type();
+  }
+
 
   pg_t raw_pg_to_pg(pg_t pg) {
     assert(pools.count(pg.pool()));
index 9d074b8170377ba94f922c78060664f57b894118..190ba850f1219ef66ac17c5a53fa18a670cb71a8 100644 (file)
@@ -25,7 +25,7 @@
 
 
 
-#define CEPH_OSD_ONDISK_MAGIC "ceph osd volume v012"
+#define CEPH_OSD_ONDISK_MAGIC "ceph osd volume v013"
 
 
 
@@ -109,19 +109,12 @@ enum {
 
 // placement group id
 struct pg_t {
-public:
-  static const int TYPE_REP   = CEPH_PG_TYPE_REP;
-  static const int TYPE_RAID4 = CEPH_PG_TYPE_RAID4;
-
-  //private:
   union ceph_pg u;
 
-public:
   pg_t() { u.pg64 = 0; }
   pg_t(const pg_t& o) { u.pg64 = o.u.pg64; }
-  pg_t(int type, ps_t seed, int pool, int pref) {
+  pg_t(ps_t seed, int pool, int pref) {
     u.pg64 = 0;
-    u.pg.type = type;
     u.pg.ps = seed;
     u.pg.pool = pool;
     u.pg.preferred = pref;   // hack: avoid negative.
@@ -132,10 +125,6 @@ public:
     u = cpg;
   }
 
-  int type()      { return u.pg.type; }
-  bool is_rep()   { return type() == TYPE_REP; }
-  bool is_raid4() { return type() == TYPE_RAID4; }
-
   ps_t ps() { return u.pg.ps; }
   int pool() { return u.pg.pool; }
   int preferred() { return u.pg.preferred; }   // hack: avoid negative.
@@ -161,7 +150,6 @@ public:
     int r = sscanf(s, "%d.%x", &pool, &ps);
     if (r < 3)
       return false;
-    u.pg.type = TYPE_REP;
     u.pg.pool = pool;
     u.pg.ps = ps;
     u.pg.preferred = -1;
@@ -431,6 +419,9 @@ struct pg_pool_t {
   int get_crush_ruleset() const { return v.crush_ruleset; }
   epoch_t get_last_change() const { return v.last_change; }
 
+  bool is_rep()   const { return get_type() == CEPH_PG_TYPE_REP; }
+  bool is_raid4() const { return get_type() == CEPH_PG_TYPE_RAID4; }
+
   void calc_pg_masks() {
     pg_num_mask = (1 << calc_bits_of(v.pg_num-1)) - 1;
     pgp_num_mask = (1 << calc_bits_of(v.pgp_num-1)) - 1;
@@ -470,15 +461,20 @@ struct pg_pool_t {
 WRITE_CLASS_ENCODER(pg_pool_t)
 
 inline ostream& operator<<(ostream& out, const pg_pool_t& p) {
-  return out << "pg_pool(type " << p.get_type()
-            << " size " << p.get_size()
-            << " ruleset " << p.get_crush_ruleset()
-            << " pg_num " << p.get_pg_num()
-            << " pgp_num " << p.get_pgp_num()
-            << " lpg_num " << p.get_lpg_num()
-            << " lpgp_num " << p.get_lpgp_num()
-            << " last_change " << p.get_last_change()
-            << ")";
+  out << "pg_pool(";
+  switch (p.get_type()) {
+  case CEPH_PG_TYPE_REP: out << "rep"; break;
+  default: out << "type " << p.get_type();
+  }
+  out << " size " << p.get_size()
+      << " ruleset " << p.get_crush_ruleset()
+      << " pg_num " << p.get_pg_num()
+      << " pgp_num " << p.get_pgp_num()
+      << " lpg_num " << p.get_lpg_num()
+      << " lpgp_num " << p.get_lpgp_num()
+      << " last_change " << p.get_last_change()
+      << ")";
+  return out;
 }
 
 /** pg_stat