]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
store data and metadata in separate pg_pools; name crush rules
authorSage Weil <sage@newdream.net>
Thu, 12 Jun 2008 04:26:55 +0000 (21:26 -0700)
committerSage Weil <sage@newdream.net>
Thu, 12 Jun 2008 04:26:55 +0000 (21:26 -0700)
src/TODO
src/config.cc
src/config.h
src/osd/OSDMap.cc

index d1231af1eede5ccb0feb35636d40cc9ea1847068..7b936be8e9076b44d892591499fe003f41e5f89f 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -8,7 +8,6 @@ big items
 - client, user authentication
 - cas
 
-- meta vs data crush rules
 - use libuuid
 
 userspace client
@@ -20,6 +19,7 @@ userspace client
 - fix lease validation to check session ttl
 - clean up ll_ interface, now that we have leases!
 - clean up client mds session vs mdsmap behavior?
+- stop using mds's inode_t?
 
 kernel client
 - flush caps on sync, fsync, etc.
index c9a3591649027b718280617fdc1fb1b84c418530..8a560fc6f64dc0b1d87850e6366bec6eedbdbb3d 100644 (file)
@@ -107,7 +107,19 @@ struct ceph_file_layout g_default_file_layout = {
  fl_pg_preferred: init_le32(-1),
  fl_pg_type: CEPH_PG_TYPE_REP,
  fl_pg_size: 2,
- fl_pg_pool: 0
+ fl_pg_pool: 1
+};
+
+struct ceph_file_layout g_default_casdata_layout = {
+ fl_stripe_unit: init_le32(1<<22),
+ fl_stripe_count: init_le32(1),
+ fl_object_size: init_le32(1<<22),
+ fl_cas_hash: init_le32(0),
+ fl_object_stripe_unit: init_le32(0),
+ fl_pg_preferred: init_le32(-1),
+ fl_pg_type: CEPH_PG_TYPE_REP,
+ fl_pg_size: 2,
+ fl_pg_pool: 2
 };
 
 struct ceph_file_layout g_default_mds_dir_layout = {
@@ -146,6 +158,16 @@ struct ceph_file_layout g_default_mds_anchortable_layout = {
  fl_pg_pool: 0
 };
 
+const char *get_pool_name(int pool) 
+{
+  switch (pool) {
+  case 0: return "metadata";
+  case 1: return "data";
+  case 2: return "casdata";
+  default: return "";
+  }
+}
+
 #include <msg/msg_types.h>
 
 // fake osd failures: osd -> time
index 293ec25c756ea61aed9e0ab74a0151f606e59b53..be12340b75547d0388730f9a91269d734546fd12 100644 (file)
 #define __CEPH_CONFIG_H
 
 extern struct ceph_file_layout g_default_file_layout;
+extern struct ceph_file_layout g_default_casdata_layout;
 extern struct ceph_file_layout g_default_mds_dir_layout;
 extern struct ceph_file_layout g_default_mds_log_layout;
 extern struct ceph_file_layout g_default_mds_anchortable_layout;
 
+extern const char *get_pool_name(int pool);
+
 #include <vector>
 #include <map>
 
index 9ff5962e0b6b10fd7e566ccd83e27a7eb923a3bf..e08f9199775a5a229a81f4a8e3635cf9392e0560 100644 (file)
@@ -60,6 +60,8 @@ void OSDMap::build_simple_crush_map(CrushWrapper& crush, int num_osd, map<int,do
   crush.set_type_name(1, "domain");
   crush.set_type_name(2, "pool");
 
+  int npools = 3;
+
   int minrep = g_conf.osd_min_rep;
   int ndom = MAX(g_conf.osd_max_rep, g_conf.osd_max_raid_width);
   if (num_osd >= ndom*3 &&
@@ -101,25 +103,27 @@ void OSDMap::build_simple_crush_map(CrushWrapper& crush, int num_osd, map<int,do
 
     // rules
     // replication
-    for (int pool=0; pool<1; pool++) {
+    for (int pool=0; pool<npools; pool++) {
       // size minrep..ndom
       crush_rule *rule = crush_make_rule(4, pool, CEPH_PG_TYPE_REP, minrep, ndom);
       crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0);
       crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_FIRSTN, CRUSH_CHOOSE_N, 1); // choose N domains
       crush_rule_set_step(rule, 2, CRUSH_RULE_CHOOSE_FIRSTN, 1, 0);  // and 1 device in each
       crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0);
-      crush_add_rule(crush.crush, rule, -1);
+      int rno = crush_add_rule(crush.crush, rule, -1);
+      crush.set_rule_name(rno, get_pool_name(pool));
     }
 
     // raid
-    for (int pool=0; pool<1; pool++) {
-      crush_rule *rule = crush_make_rule(4, pool, CEPH_PG_TYPE_RAID4, g_conf.osd_min_raid_width, g_conf.osd_max_raid_width);
-      crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0);
-      crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, CRUSH_CHOOSE_N, 1);
-      crush_rule_set_step(rule, 2, CRUSH_RULE_CHOOSE_INDEP, 1, 0);
-      crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0);
-      crush_add_rule(crush.crush, rule, -1);
-    }
+    if (g_conf.osd_min_raid_width <= g_conf.osd_max_raid_width)
+      for (int pool=0; pool<npools; pool++) {
+       crush_rule *rule = crush_make_rule(4, pool, CEPH_PG_TYPE_RAID4, g_conf.osd_min_raid_width, g_conf.osd_max_raid_width);
+       crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0);
+       crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, CRUSH_CHOOSE_N, 1);
+       crush_rule_set_step(rule, 2, CRUSH_RULE_CHOOSE_INDEP, 1, 0);
+       crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0);
+       crush_add_rule(crush.crush, rule, -1);
+      }
     
   } else {
     // one bucket
@@ -133,22 +137,24 @@ void OSDMap::build_simple_crush_map(CrushWrapper& crush, int num_osd, map<int,do
     crush.set_item_name(rootid, "root");
 
     // replication
-    for (int pool=0; pool<1; pool++) {
+    for (int pool=0; pool<npools; pool++) {
       crush_rule *rule = crush_make_rule(3, pool, CEPH_PG_TYPE_REP, g_conf.osd_min_rep, g_conf.osd_max_rep);
       crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0);
       crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_FIRSTN, CRUSH_CHOOSE_N, 0);
       crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0);
-      crush_add_rule(crush.crush, rule, -1);
+      int rno = crush_add_rule(crush.crush, rule, -1);
+      crush.set_rule_name(rno, get_pool_name(pool));
     }
 
     // raid4
-    for (int pool=0; pool<1; pool++) {
-      crush_rule *rule = crush_make_rule(3, pool, CEPH_PG_TYPE_RAID4, g_conf.osd_min_raid_width, g_conf.osd_max_raid_width);
-      crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0);
-      crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, CRUSH_CHOOSE_N, 0);
-      crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0);
-      crush_add_rule(crush.crush, rule, -1);
-    }
+    if (g_conf.osd_min_raid_width <= g_conf.osd_max_raid_width)
+      for (int pool=0; pool<npools; pool++) {
+       crush_rule *rule = crush_make_rule(3, pool, CEPH_PG_TYPE_RAID4, g_conf.osd_min_raid_width, g_conf.osd_max_raid_width);
+       crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0);
+       crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_INDEP, CRUSH_CHOOSE_N, 0);
+       crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0);
+       crush_add_rule(crush.crush, rule, -1);
+      }
   }
 
   crush.finalize();