]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
more stuff
authorsageweil <sageweil@29311d96-e01e-0410-9327-a35deaab8ce9>
Thu, 1 Mar 2007 03:21:08 +0000 (03:21 +0000)
committersageweil <sageweil@29311d96-e01e-0410-9327-a35deaab8ce9>
Thu, 1 Mar 2007 03:21:08 +0000 (03:21 +0000)
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1144 29311d96-e01e-0410-9327-a35deaab8ce9

branches/sage/pgs/mon/OSDMonitor.cc
branches/sage/pgs/osd/OSD.cc
branches/sage/pgs/osd/PG.h
branches/sage/pgs/osd/ReplicatedPG.cc
branches/sage/pgs/osd/ReplicatedPG.h

index 43ec4eddf2eca8fbde1b6e53bce523415509c757..270f261b1d702dde2919ffdf4bc647259585374c 100644 (file)
@@ -173,11 +173,27 @@ void OSDMonitor::create_initial()
     int nroot = osdmap.crush.add_bucket(root);    
     
     // rules
+    // replication
     for (int i=1; i<=ndom; i++) {
-      osdmap.crush.rules[i].steps.push_back(RuleStep(CRUSH_RULE_TAKE, nroot));
-      osdmap.crush.rules[i].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, i, 1));
-      osdmap.crush.rules[i].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, 1, 0));      
-      osdmap.crush.rules[i].steps.push_back(RuleStep(CRUSH_RULE_EMIT));
+      int r = CRUSH_REP_RULE(i);
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_TAKE, nroot));
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, i, 1));
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, 1, 0));      
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_EMIT));
+    }
+    // raid
+    for (int i=g_conf.osd_min_raid_width; i <= g_conf.osd_max_raid_width; i++) {
+      int r = CRUSH_RAID_RULE(i);      
+      if (ndom >= i) {
+       osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_TAKE, nroot));
+       osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE_INDEP, i, 1));
+       osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE_INDEP, 1, 0));      
+       osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_EMIT));
+      } else {
+       osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_TAKE, nroot));
+       osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE_INDEP, i, 0));
+       osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_EMIT));
+      }
     }
     
     // test
@@ -193,10 +209,20 @@ void OSDMonitor::create_initial()
       b->add_item(i, 1.0);
     }
     
+    // rules
+    // replication
     for (int i=1; i<=g_conf.osd_max_rep; i++) {
-      osdmap.crush.rules[i].steps.push_back(RuleStep(CRUSH_RULE_TAKE, root));
-      osdmap.crush.rules[i].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, i, 0));
-      osdmap.crush.rules[i].steps.push_back(RuleStep(CRUSH_RULE_EMIT));
+      int r = CRUSH_REP_RULE(i);
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_TAKE, root));
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, i, 0));
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_EMIT));
+    }
+    // raid
+    for (int i=g_conf.osd_min_raid_width; i <= g_conf.osd_max_raid_width; i++) {
+      int r = CRUSH_RAID_RULE(i);      
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_TAKE, root));
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_CHOOSE_INDEP, i, 0));
+      osdmap.crush.rules[r].steps.push_back(RuleStep(CRUSH_RULE_EMIT));
     }
   }
   
index 83576b12babdccd879dc693f1d62088421d7bae9..beed4e69f504cd9ade40dca4a602868a712b4b74 100644 (file)
 #include "osbdb/OSBDB.h"
 #endif // USE_OSBDB
 
+
+#include "ReplicatedPG.h"
+#include "RAID4PG.h"
+
 #include "Ager.h"
 
 
@@ -628,14 +632,6 @@ void OSD::dispatch(Message *m)
 
     // -- don't need OSDMap --
 
-    /*
-    // host monitor
-  case MSG_PING_ACK:
-  case MSG_FAILURE_ACK:
-    monitor->proc_message(m);
-    break;
-    */
-
     // map and replication
   case MSG_OSD_MAP:
     handle_osd_map((MOSDMap*)m);
@@ -1479,7 +1475,13 @@ void OSD::load_pgs()
        it++) {
     pg_t pgid = *it;
 
-    PG *pg = new PG(this, pgid);
+    PG *pg = 0;
+    if (pgid->is_rep())
+      new ReplicatedPG(this, pgid);
+    else if (pgid->is_raid())
+      new RAID4PG(this, pgid);
+    else 
+      assert(0);
     pg_map[pgid] = pg;
 
     // read pg info
@@ -2201,32 +2203,26 @@ void OSD::handle_op(MOSDOp *op)
       waiting_for_pg[pgid].push_back(op);
       return;
     }
-    
-    if (read) {
-      // read. am i the (same) acker?
-      if (//pg->get_acker() != whoami ||
-          op->get_map_epoch() < pg->info.history.same_acker_since) {
-        dout(7) << "acting acker is osd" << pg->get_acker()
-                << " since " << pg->info.history.same_acker_since 
-                << ", dropping" << endl;
-        assert(op->get_map_epoch() < osdmap->get_epoch());
-        delete op;
-        return;
-      }
-    } else {
-      // write. am i the (same) primary?
-      if (pg->get_primary() != whoami ||
-          op->get_map_epoch() < pg->info.history.same_primary_since) {
-        dout(7) << "acting primary is osd" << pg->get_primary()
-                << " since " << pg->info.history.same_primary_since 
-                << ", dropping" << endl;
-        assert(op->get_map_epoch() < osdmap->get_epoch());
-        delete op;
-        return;
-      }
+
+    // pg must be same-ish...
+    if (read && !pg->same_for_read_since(op->get_map_epoch())) {
+      dout(7) << "handle_rep_op pg changed " << pg->info.history
+             << " after " << op->get_map_epoch() 
+             << ", dropping" << endl;
+      assert(op->get_map_epoch() < osdmap->get_epoch());
+      delete op;
+      return;
+    }
+    if (!read && !pg->same_for_modify_since(op->get_map_epoch())) {
+      dout(7) << "handle_rep_op pg changed " << pg->info.history
+             << " after " << op->get_map_epoch() 
+             << ", dropping" << endl;
+      assert(op->get_map_epoch() < osdmap->get_epoch());
+      delete op;
+      return;
     }
     
-    // must be active.
+    // pg must be active.
     if (!pg->is_active()) {
       // replay?
       if (op->get_version().version > 0) {
@@ -2334,23 +2330,13 @@ void OSD::handle_op(MOSDOp *op)
     }
     
     // check osd map: same set, or primary+acker?
-    if (g_conf.osd_rep == OSD_REP_CHAIN &&
-        op->get_map_epoch() < pg->info.history.same_since) {
+    if (!pg->same_for_rep_modify_since(op->get_map_epoch())) {
       dout(10) << "handle_rep_op pg changed " << pg->info.history
                << " after " << op->get_map_epoch() 
                << ", dropping" << endl;
       delete op;
       return;
     }
-    if (g_conf.osd_rep != OSD_REP_CHAIN &&
-        (op->get_map_epoch() < pg->info.history.same_primary_since ||
-         op->get_map_epoch() < pg->info.history.same_acker_since)) {
-      dout(10) << "handle_rep_op pg primary|acker changed " << pg->info.history
-               << " after " << op->get_map_epoch() 
-               << ", dropping" << endl;
-      delete op;
-      return;
-    }
 
     assert(pg->get_role() >= 0);
     dout(7) << "handle_rep_op " << op << " in " << *pg << endl;
index 3273ffc9324753b64bbd29144954598a729daa22..b5a245ca96c173df301868867ca8465407161c65 100644 (file)
@@ -553,6 +553,13 @@ public:
   int op_read(MOSDOp *op) = 0;
   void op_modify(MOSDOp *op) = 0;
 
+  bool same_for_read_since(epoch_t e);
+  bool same_for_modify_since(epoch_t e);
+  bool same_for_rep_modify_since(epoch_t e);
+
+  bool is_missing_object(object_t oid);
+  void wait_for_missing_object(object_t oid, op);
+
 };
 
 
index 112bd1f99203a95f1a63c89c6aff832b1570d217..931f4d67698ac7e8328cec19b00ba33056c29f5b 100644 (file)
 #define  dout(l)    if (l<=g_conf.debug || l<=g_conf.debug_osd) cout << g_clock.now() << " osd" << osd->whoami << " " << (osd->osdmap ? osd->osdmap->get_epoch():0) << " " << *this << " "
 
 
+
+bool ReplicatedPG::same_for_read_since(epoch_t e)
+{
+  return (e >= info.history.same_acker_since);
+}
+
+bool ReplicatedPG::same_for_modify_since(epoch_t e)
+{
+  return (get_primary() == whoami &&
+          e >= info.history.same_primary_since);
+}
+
+bool ReplicatedPG::same_for_rep_modify_since(epoch_t e)
+{
+  // check osd map: same set, or primary+acker?
+
+  if (g_conf.osd_rep == OSD_REP_CHAIN) {
+    return e >= info.history.same_since;   // whole pg set same
+  } else {
+    // primary, splay
+    return (e >= info.history.same_primary_since &&|
+           e >= info.history.same_acker_since);    
+  }
+}
+
+
+bool ReplicatedPG::is_missing_object(object_t oid)
+{
+  return missing.missing.count(oid);
+}
+
+
+void ReplicatedPG::wait_for_missing_object(object_t oid, op)
+{
+  
+}
+
+
+
+
 // ========================================================================
 // READS
 
index 23823eefe294e6f7e1d8395c180d8d8002ca8bf5..dfb52a6cb556d4ac62a596277e8f43c55089859d 100644 (file)
@@ -83,6 +83,13 @@ public:
   int op_read(MOSDOp *op);
   void op_modify(MOSDOp *op);
   
+  bool same_for_read_since(epoch_t e);
+  bool same_for_modify_since(epoch_t e);
+  bool same_for_rep_modify_since(epoch_t e);
+
+  bool is_missing_object(object_t oid);
+  void wait_for_missing_object(object_t oid, op);
+
 };