1. The current pool_last_map_marked_full tracking is buggy.
2. We need to recheck this each time we consider the op, not just when it
is received off the wire. Otherwise, we might get a message, queue it
for some reason, get a map indicating the cluster or pool is full, and
then requeue and process the op instead of discarding it.
3. For now, silently drop ops when failsafe check fails. This will lead to
stalled client IO. This needs a more robust fix.
Signed-off-by: Sage Weil <sage@redhat.com>
pg_t _pgid = m->get_pg();
int64_t pool = _pgid.pool();
if (op->may_write()) {
- // full?
- if ((service.check_failsafe_full() ||
- osdmap->test_flag(CEPH_OSDMAP_FULL) ||
- m->get_map_epoch() < superblock.last_map_marked_full) &&
- !m->get_source().is_mds()) { // FIXME: we'll exclude mds writes for now.
- // Drop the request, since the client will retry when the full
- // flag is unset.
- return;
- }
-
const pg_pool_t *pi = osdmap->get_pg_pool(pool);
if (!pi) {
return;
}
- // pool is full ?
- map<int64_t, epoch_t> &pool_last_map_marked_full = superblock.pool_last_map_marked_full;
- if ((pi->has_flag(pg_pool_t::FLAG_FULL) ||
- (pool_last_map_marked_full.count(pool) && (m->get_map_epoch() < pool_last_map_marked_full[pool]))) && !m->get_source().is_mds()) {
- return;
- }
// invalid?
if (m->get_snapid() != CEPH_NOSNAP) {
return;
}
+ // discard due to cluster full transition? (we discard any op that
+ // originates before the cluster or pool is marked full; the client
+ // will resend after the full flag is removed or if they expect the
+ // op to succeed despite being full). The except is FULL_FORCE ops,
+ // which there is no reason to discard because they bypass all full
+ // checks anyway.
+ // FIXME: we exclude mds writes for now.
+ if (!(m->get_source().is_mds() || m->has_flag(CEPH_OSD_FLAG_FULL_FORCE)) &&
+ info.history.last_epoch_marked_full > m->get_map_epoch()) {
+ dout(10) << __func__ << " discarding op sent before full " << m << " "
+ << *m << dendl;
+ return;
+ }
+ if (osd->check_failsafe_full()) {
+ dout(10) << __func__ << " fail-safe full check failed, dropping request"
+ << dendl;
+ return;
+ }
+
// order this op as a write?
bool write_ordered =
op->may_write() ||