]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: use MOSDScrubReserve instead of MOSDSubOp for scrub reservations
authorSage Weil <sage@redhat.com>
Mon, 13 Feb 2017 21:36:37 +0000 (16:36 -0500)
committerSage Weil <sage@redhat.com>
Fri, 31 Mar 2017 18:42:11 +0000 (14:42 -0400)
This is the last MOSDSubOp user.

Note that while the next step is to move to AsyncReserver internally,
this isn't quite yet possible since AsyncReserve "blocks" indefinitely
so we wouldn't generate a REJECT.  Changing how we schdule scrubs
internally will take a bit more work.

Signed-off-by: Sage Weil <sage@redhat.com>
src/messages/MOSDScrubReserve.h [new file with mode: 0644]
src/messages/MRecoveryReserve.h
src/msg/Message.cc
src/msg/Message.h
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.cc
src/osd/PG.h
src/osd/PrimaryLogPG.cc

diff --git a/src/messages/MOSDScrubReserve.h b/src/messages/MOSDScrubReserve.h
new file mode 100644 (file)
index 0000000..6eb3996
--- /dev/null
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MOSDSCRUBRESERVE_H
+#define CEPH_MOSDSCRUBRESERVE_H
+
+#include "MOSDFastDispatchOp.h"
+
+class MOSDScrubReserve : public MOSDFastDispatchOp {
+  static const int HEAD_VERSION = 1;
+  static const int COMPAT_VERSION = 1;
+public:
+  spg_t pgid;
+  epoch_t map_epoch;
+  enum {
+    REQUEST = 0,
+    GRANT = 1,
+    RELEASE = 2,
+    REJECT = 3,
+  };
+  int32_t type;
+  pg_shard_t from;
+
+  epoch_t get_map_epoch() const override {
+    return map_epoch;
+  }
+  spg_t get_spg() const override {
+    return pgid;
+  }
+
+  MOSDScrubReserve()
+    : MOSDFastDispatchOp(MSG_OSD_SCRUB_RESERVE, HEAD_VERSION, COMPAT_VERSION),
+      map_epoch(0), type(-1) {}
+  MOSDScrubReserve(spg_t pgid,
+                  epoch_t map_epoch,
+                  int type,
+                  pg_shard_t from)
+    : MOSDFastDispatchOp(MSG_OSD_SCRUB_RESERVE, HEAD_VERSION, COMPAT_VERSION),
+      pgid(pgid), map_epoch(map_epoch),
+      type(type), from(from) {}
+
+  const char *get_type_name() const {
+    return "MOSDScrubReserve";
+  }
+
+  void print(ostream& out) const {
+    out << "MOSDScrubReserve(" << pgid << " ";
+    switch (type) {
+    case REQUEST:
+      out << "REQUEST ";
+      break;
+    case GRANT:
+      out << "GRANT ";
+      break;
+    case REJECT:
+      out << "REJECT ";
+      break;
+    case RELEASE:
+      out << "RELEASE ";
+      break;
+    }
+    out << "e" << map_epoch << ")";
+    return;
+  }
+
+  void decode_payload() {
+    bufferlist::iterator p = payload.begin();
+    ::decode(pgid, p);
+    ::decode(map_epoch, p);
+    ::decode(type, p);
+    ::decode(from, p);
+  }
+
+  void encode_payload(uint64_t features) {
+    ::encode(pgid, payload);
+    ::encode(map_epoch, payload);
+    ::encode(type, payload);
+    ::encode(from, payload);
+  }
+};
+
+#endif
index 66cce149af669915cf3241564e4171d0d04ff68d..c0e975004d84dacc9690b44ae1642ef3d0e7d0c1 100644 (file)
@@ -45,19 +45,19 @@ public:
   }
 
   void print(ostream& out) const override {
-    out << "MRecoveryReserve ";
+    out << "MRecoveryReserve(" << pgid;
     switch (type) {
     case REQUEST:
-      out << "REQUEST ";
+      out << " REQUEST";
       break;
     case GRANT:
-      out << "GRANT ";
+      out << " GRANT";
       break;
     case RELEASE:
-      out << "RELEASE ";
+      out << " RELEASE";
       break;
     }
-    out << " pgid: " << pgid << ", query_epoch: " << query_epoch;
+    out << " e" << query_epoch << ")";
     return;
   }
 
index 39b0575ff655cd8beca31ea96e893010bf0275cf..e19fd017b1c0963502620e0eedd17fdd1bd0e516 100644 (file)
@@ -81,6 +81,7 @@ using namespace std;
 #include "messages/MOSDPGCreate.h"
 #include "messages/MOSDPGTrim.h"
 #include "messages/MOSDScrub.h"
+#include "messages/MOSDScrubReserve.h"
 #include "messages/MOSDRepScrub.h"
 #include "messages/MOSDRepScrubMap.h"
 #include "messages/MOSDPGScan.h"
@@ -511,6 +512,9 @@ Message *decode_message(CephContext *cct, int crcflags,
   case MSG_OSD_SCRUB:
     m = new MOSDScrub;
     break;
+  case MSG_OSD_SCRUB_RESERVE:
+    m = new MOSDScrubReserve;
+    break;
   case MSG_REMOVE_SNAPS:
     m = new MRemoveSnaps;
     break;
index f5e5acc686631198b8ca13428db8d6df6f7403dc..67c1edce82d07bf0655838bcd29ad08c931f8d6f 100644 (file)
@@ -88,7 +88,7 @@
 #define MSG_REMOVE_SNAPS       90
 
 #define MSG_OSD_SCRUB          91
-//#define MSG_OSD_PG_MISSING     92  // obsolete
+#define MSG_OSD_SCRUB_RESERVE  92  // previous PG_MISSING
 #define MSG_OSD_REP_SCRUB      93
 
 #define MSG_OSD_PG_SCAN        94
index 4e3afa6e17415f2d1b99efa90c263757f41d8240..a7ce5da7bb10423c534f621369a99f9dee161e62 100644 (file)
 #include "messages/MOSDAlive.h"
 
 #include "messages/MOSDScrub.h"
+#include "messages/MOSDScrubReserve.h"
 #include "messages/MOSDRepScrub.h"
 
 #include "messages/MMonCommand.h"
index 2f14e049f6d8f49c0ed69a279b9688ecae8398d7..33f54765e73ec273dc891ce1704bb183227223b2 100644 (file)
@@ -2336,6 +2336,7 @@ protected:
     case MSG_OSD_EC_WRITE_REPLY:
     case MSG_OSD_EC_READ:
     case MSG_OSD_EC_READ_REPLY:
+    case MSG_OSD_SCRUB_RESERVE:
     case MSG_OSD_REP_SCRUB:
     case MSG_OSD_REP_SCRUBMAP:
     case MSG_OSD_PG_UPDATE_LOG_MISSING:
index 477e00e9c372fce40f6db7e630756efab5458ede..a017e8b0045aa04651eb3ccd01c27c1899dda4d2 100644 (file)
@@ -49,7 +49,7 @@
 #include "messages/MOSDPGUpdateLogMissing.h"
 #include "messages/MOSDPGUpdateLogMissingReply.h"
 #include "messages/MOSDBackoff.h"
-
+#include "messages/MOSDScrubReserve.h"
 #include "messages/MOSDSubOp.h"
 #include "messages/MOSDRepOp.h"
 #include "messages/MOSDSubOpReply.h"
@@ -3724,67 +3724,75 @@ void PG::_request_scrub_map(
     replica.osd, repscrubop, get_osdmap()->get_epoch());
 }
 
-void PG::sub_op_scrub_reserve(OpRequestRef op)
+void PG::handle_scrub_reserve_request(OpRequestRef op)
 {
-  const MOSDSubOp *m = static_cast<const MOSDSubOp*>(op->get_req());
-  assert(m->get_type() == MSG_OSD_SUBOP);
-  dout(7) << "sub_op_scrub_reserve" << dendl;
-
+  dout(7) << __func__ << " " << *op->get_req() << dendl;
+  op->mark_started();
   if (scrubber.reserved) {
-    dout(10) << "Ignoring reserve request: Already reserved" << dendl;
+    dout(10) << __func__ << " ignoring reserve request: Already reserved"
+            << dendl;
     return;
   }
-
-  op->mark_started();
-
   scrubber.reserved = osd->inc_scrubs_pending();
-
-  MOSDSubOpReply *reply = new MOSDSubOpReply(
-    m, pg_whoami, 0, get_osdmap()->get_epoch(), CEPH_OSD_FLAG_ACK);
-  ::encode(scrubber.reserved, reply->get_data());
-  osd->send_message_osd_cluster(reply, m->get_connection());
+  if (op->get_req()->get_type() == MSG_OSD_SCRUB_RESERVE) {
+    const MOSDScrubReserve *m =
+      static_cast<const MOSDScrubReserve*>(op->get_req());
+    Message *reply = new MOSDScrubReserve(
+      spg_t(info.pgid.pgid, primary.shard),
+      m->map_epoch,
+      scrubber.reserved ? MOSDScrubReserve::GRANT : MOSDScrubReserve::REJECT,
+      pg_whoami);
+    osd->send_message_osd_cluster(reply, op->get_req()->get_connection());
+  } else {
+    // for jewel compat only
+    const MOSDSubOp *req = static_cast<const MOSDSubOp*>(op->get_req());
+    assert(req->get_type() == MSG_OSD_SUBOP);
+    MOSDSubOpReply *reply = new MOSDSubOpReply(
+      req, pg_whoami, 0, get_osdmap()->get_epoch(), CEPH_OSD_FLAG_ACK);
+    ::encode(scrubber.reserved, reply->get_data());
+    osd->send_message_osd_cluster(reply, op->get_req()->get_connection());
+  }
 }
 
-void PG::sub_op_scrub_reserve_reply(OpRequestRef op)
+void PG::handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from)
 {
-  const MOSDSubOpReply *reply = static_cast<const MOSDSubOpReply*>(op->get_req());
-  assert(reply->get_type() == MSG_OSD_SUBOPREPLY);
-  dout(7) << "sub_op_scrub_reserve_reply" << dendl;
-
+  dout(7) << __func__ << " " << *op->get_req() << dendl;
+  op->mark_started();
   if (!scrubber.reserved) {
     dout(10) << "ignoring obsolete scrub reserve reply" << dendl;
     return;
   }
+  if (scrubber.reserved_peers.find(from) != scrubber.reserved_peers.end()) {
+    dout(10) << " already had osd." << from << " reserved" << dendl;
+  } else {
+    dout(10) << " osd." << from << " scrub reserve = success" << dendl;
+    scrubber.reserved_peers.insert(from);
+    sched_scrub();
+  }
+}
 
+void PG::handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from)
+{
+  dout(7) << __func__ << " " << *op->get_req() << dendl;
   op->mark_started();
-
-  pg_shard_t from = reply->from;
-  bufferlist::iterator p = const_cast<bufferlist&>(reply->get_data()).begin();
-  bool reserved;
-  ::decode(reserved, p);
-
+  if (!scrubber.reserved) {
+    dout(10) << "ignoring obsolete scrub reserve reply" << dendl;
+    return;
+  }
   if (scrubber.reserved_peers.find(from) != scrubber.reserved_peers.end()) {
     dout(10) << " already had osd." << from << " reserved" << dendl;
   } else {
-    if (reserved) {
-      dout(10) << " osd." << from << " scrub reserve = success" << dendl;
-      scrubber.reserved_peers.insert(from);
-    } else {
-      /* One decline stops this pg from being scheduled for scrubbing. */
-      dout(10) << " osd." << from << " scrub reserve = fail" << dendl;
-      scrubber.reserve_failed = true;
-    }
+    /* One decline stops this pg from being scheduled for scrubbing. */
+    dout(10) << " osd." << from << " scrub reserve = fail" << dendl;
+    scrubber.reserve_failed = true;
     sched_scrub();
   }
 }
 
-void PG::sub_op_scrub_unreserve(OpRequestRef op)
+void PG::handle_scrub_reserve_release(OpRequestRef op)
 {
-  assert(op->get_req()->get_type() == MSG_OSD_SUBOP);
-  dout(7) << "sub_op_scrub_unreserve" << dendl;
-
+  dout(7) << __func__ << " " << *op->get_req() << dendl;
   op->mark_started();
-
   clear_scrub_reserved();
 }
 
@@ -3828,17 +3836,27 @@ void PG::scrub_reserve_replicas()
        ++i) {
     if (*i == pg_whoami) continue;
     dout(10) << "scrub requesting reserve from osd." << *i << dendl;
-    vector<OSDOp> scrub(1);
-    scrub[0].op.op = CEPH_OSD_OP_SCRUB_RESERVE;
-    hobject_t poid;
-    eversion_t v;
-    osd_reqid_t reqid;
-    MOSDSubOp *subop = new MOSDSubOp(
-      reqid, pg_whoami, spg_t(info.pgid.pgid, i->shard), poid, 0,
-      get_osdmap()->get_epoch(), osd->get_tid(), v);
-    subop->ops = scrub;
-    osd->send_message_osd_cluster(
-      i->osd, subop, get_osdmap()->get_epoch());
+    if (HAVE_FEATURE(get_min_acting_features(), SERVER_LUMINOUS)) {
+      osd->send_message_osd_cluster(
+       i->osd,
+       new MOSDScrubReserve(spg_t(info.pgid.pgid, i->shard),
+                            get_osdmap()->get_epoch(),
+                            MOSDScrubReserve::REQUEST, pg_whoami),
+       get_osdmap()->get_epoch());
+    } else {
+      // for jewel compat only
+      vector<OSDOp> scrub(1);
+      scrub[0].op.op = CEPH_OSD_OP_SCRUB_RESERVE;
+      hobject_t poid;
+      eversion_t v;
+      osd_reqid_t reqid;
+      MOSDSubOp *subop = new MOSDSubOp(
+       reqid, pg_whoami, spg_t(info.pgid.pgid, i->shard), poid, 0,
+       get_osdmap()->get_epoch(), osd->get_tid(), v);
+      subop->ops = scrub;
+      osd->send_message_osd_cluster(
+       i->osd, subop, get_osdmap()->get_epoch());
+    }
   }
 }
 
@@ -3850,16 +3868,26 @@ void PG::scrub_unreserve_replicas()
        ++i) {
     if (*i == pg_whoami) continue;
     dout(10) << "scrub requesting unreserve from osd." << *i << dendl;
-    vector<OSDOp> scrub(1);
-    scrub[0].op.op = CEPH_OSD_OP_SCRUB_UNRESERVE;
-    hobject_t poid;
-    eversion_t v;
-    osd_reqid_t reqid;
-    MOSDSubOp *subop = new MOSDSubOp(
-      reqid, pg_whoami, spg_t(info.pgid.pgid, i->shard), poid, 0,
-      get_osdmap()->get_epoch(), osd->get_tid(), v);
-    subop->ops = scrub;
-    osd->send_message_osd_cluster(i->osd, subop, get_osdmap()->get_epoch());
+    if (HAVE_FEATURE(get_min_acting_features(), SERVER_LUMINOUS)) {
+      osd->send_message_osd_cluster(
+       i->osd,
+       new MOSDScrubReserve(spg_t(info.pgid.pgid, i->shard),
+                            get_osdmap()->get_epoch(),
+                            MOSDScrubReserve::RELEASE, pg_whoami),
+       get_osdmap()->get_epoch());
+    } else {
+      // for jewel compat only
+      vector<OSDOp> scrub(1);
+      scrub[0].op.op = CEPH_OSD_OP_SCRUB_UNRESERVE;
+      hobject_t poid;
+      eversion_t v;
+      osd_reqid_t reqid;
+      MOSDSubOp *subop = new MOSDSubOp(
+       reqid, pg_whoami, spg_t(info.pgid.pgid, i->shard), poid, 0,
+       get_osdmap()->get_epoch(), osd->get_tid(), v);
+      subop->ops = scrub;
+      osd->send_message_osd_cluster(i->osd, subop, get_osdmap()->get_epoch());
+    }
   }
 }
 
@@ -5545,6 +5573,8 @@ bool PG::can_discard_request(OpRequestRef& op)
     return can_discard_replica_op<MOSDECSubOpReadReply, MSG_OSD_EC_READ_REPLY>(op);
   case MSG_OSD_REP_SCRUB:
     return can_discard_replica_op<MOSDRepScrub, MSG_OSD_REP_SCRUB>(op);
+  case MSG_OSD_SCRUB_RESERVE:
+    return can_discard_replica_op<MOSDScrubReserve, MSG_OSD_SCRUB_RESERVE>(op);
   case MSG_OSD_REP_SCRUBMAP:
     return can_discard_replica_op<MOSDRepScrubMap, MSG_OSD_REP_SCRUBMAP>(op);
   case MSG_OSD_PG_UPDATE_LOG_MISSING:
@@ -5651,6 +5681,11 @@ bool PG::op_must_wait_for_map(epoch_t cur_epoch, OpRequestRef& op)
       cur_epoch,
       static_cast<const MOSDRepScrub*>(op->get_req())->map_epoch);
 
+  case MSG_OSD_SCRUB_RESERVE:
+    return !have_same_or_newer_map(
+      cur_epoch,
+      static_cast<const MOSDScrubReserve*>(op->get_req())->map_epoch);
+
   case MSG_OSD_REP_SCRUBMAP:
     return !have_same_or_newer_map(
       cur_epoch,
index 3aa4776785ae554577bee0e9c0c232f99e8a6836..853ef96fa8490f60117fb473f7219ff7d35e3176 100644 (file)
@@ -1334,9 +1334,11 @@ public:
     ThreadPool::TPHandle &handle);
   void do_replica_scrub_map(OpRequestRef op);
   void sub_op_scrub_map(OpRequestRef op);
-  void sub_op_scrub_reserve(OpRequestRef op);
-  void sub_op_scrub_reserve_reply(OpRequestRef op);
-  void sub_op_scrub_unreserve(OpRequestRef op);
+
+  void handle_scrub_reserve_request(OpRequestRef op);
+  void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from);
+  void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from);
+  void handle_scrub_reserve_release(OpRequestRef op);
 
   void reject_reservation();
   void schedule_backfill_full_retry();
index 1481b4104bb2ce7e282372b7fbe309f308059608..75a5eea675d1a52d693688dc40c1441c36a8ceb7 100644 (file)
@@ -41,6 +41,7 @@
 #include "messages/MOSDPGUpdateLogMissing.h"
 #include "messages/MOSDPGUpdateLogMissingReply.h"
 #include "messages/MCommandReply.h"
+#include "messages/MOSDScrubReserve.h"
 #include "mds/inode_backtrace.h" // Ugh
 #include "common/EventTrace.h"
 
@@ -1685,6 +1686,27 @@ void PrimaryLogPG::do_request(
     do_backfill_remove(op);
     break;
 
+  case MSG_OSD_SCRUB_RESERVE:
+    {
+      const MOSDScrubReserve *m =
+       static_cast<const MOSDScrubReserve*>(op->get_req());
+      switch (m->type) {
+      case MOSDScrubReserve::REQUEST:
+       handle_scrub_reserve_request(op);
+       break;
+      case MOSDScrubReserve::GRANT:
+       handle_scrub_reserve_grant(op, m->from);
+       break;
+      case MOSDScrubReserve::REJECT:
+       handle_scrub_reserve_reject(op, m->from);
+       break;
+      case MOSDScrubReserve::RELEASE:
+       handle_scrub_reserve_release(op);
+       break;
+      }
+    }
+    break;
+
   case MSG_OSD_REP_SCRUB:
     replica_scrub(op, handle);
     break;
@@ -3255,10 +3277,10 @@ void PrimaryLogPG::do_sub_op(OpRequestRef op)
       sub_op_remove(op);
       return;
     case CEPH_OSD_OP_SCRUB_RESERVE:
-      sub_op_scrub_reserve(op);
+      handle_scrub_reserve_request(op);
       return;
     case CEPH_OSD_OP_SCRUB_UNRESERVE:
-      sub_op_scrub_unreserve(op);
+      handle_scrub_reserve_release(op);
       return;
     case CEPH_OSD_OP_SCRUB_MAP:
       sub_op_scrub_map(op);
@@ -3275,7 +3297,17 @@ void PrimaryLogPG::do_sub_op_reply(OpRequestRef op)
     const OSDOp& first = r->ops[0];
     switch (first.op.op) {
     case CEPH_OSD_OP_SCRUB_RESERVE:
-      sub_op_scrub_reserve_reply(op);
+      {
+       pg_shard_t from = r->from;
+       bufferlist::iterator p = const_cast<bufferlist&>(r->get_data()).begin();
+       bool reserved;
+       ::decode(reserved, p);
+       if (reserved) {
+         handle_scrub_reserve_grant(op, from);
+       } else {
+         handle_scrub_reserve_reject(op, from);
+       }
+      }
       return;
     }
   }