bufferlist *bl) {
if (!sinfo.supports_direct_reads()) {
- return -EOPNOTSUPP;
+ return -EOPNOTSUPP; // For exec calls
}
- if (get_parent()->get_local_missing().is_missing(hoid)) {
- return -EIO; // Permission denied (cos its missing)
- }
+ // Cannot return EAGAIN here: the op would get dropped. This check must have
+ // been done earlier.
+ ceph_assert(!get_parent()->get_local_missing().is_missing(hoid));
auto [shard_offset, shard_len] = extent_to_shard_extent(off, len);
std::map<uint64_t, uint64_t>& m,
uint32_t op_flags,
ceph::buffer::list *bl) {
- if (get_parent()->get_local_missing().is_missing(hoid)) {
- return -EACCES; // Permission denied (cos its missing)
- }
- // Not using extent set, since we need the one used by readv.
+ // Cannot return EAGAIN here: the op would get dropped. This check must have
+ // been done earlier.
+ ceph_assert(!get_parent()->get_local_missing().is_missing(hoid));
auto shard = get_parent()->whoami_shard().shard;
+ // Not using extent_set, since we need the one used by readv.
interval_set im(std::move(m));
m.clear(); // Make m safe to write to again.
auto r = switcher->store->readv(switcher->ch, ghobject_t(hoid, ghobject_t::NO_GEN, shard), im, *bl, op_flags);
return;
}
+ // Missing direct read (EC version)
+ if (m->has_flag(CEPH_OSD_FLAG_EC_DIRECT_READ) &&
+ get_local_missing().is_missing(head)) {
+ dout(20) << __func__ << ": oid=" << head << " missing in direct read" << dendl;
+ osd->reply_op_error(op, -EAGAIN);
+ return;
+ }
+
if (write_ordered) {
// degraded object?
if (is_degraded_or_backfilling_object(head)) {
// read into a buffer
int result = 0;
+ uint64_t bytes_read = 0; // Track actual bytes read for statistics
+
if (trimmed_read && op.extent.length == 0) {
// read size was trimmed to zero and it is expected to do nothing
// a read operation of 0 bytes does *not* do nothing, this is why
maybe_crc = oi.data_digest;
if (ctx->op->ec_direct_read()) {
- result = pgbackend->objects_read_local(
+ int r = pgbackend->objects_read_local(
soid, op.extent.offset, op.extent.length, op.flags, &osd_op.outdata);
+ if (r >= 0) {
+ bytes_read = r;
+ // Don't update op.extent.length - causes issues with recursive
+ // calls from operations like CHECKSUM
+ } else if (r == -EAGAIN) {
+ result = -EAGAIN;
+ } else {
+ result = r;
+ }
dout(20) << " EC local read for " << soid << " result=" << result << dendl;
} else if (ctx->op->ec_sync_read()) {
result = pgbackend->objects_read_sync(
oi.size, ctx->op->coro_handles);
dout(20) << " EC sync read for " << soid << " result=" << result << dendl;
} else {
- ctx->pending_async_reads.push_back(
- make_pair(
- boost::make_tuple(op.extent.offset, op.extent.length, op.flags),
- make_pair(&osd_op.outdata,
- new FillInVerifyExtent(&op.extent.length, &osd_op.rval,
- &osd_op.outdata, maybe_crc, oi.size,
- osd, soid, op.flags))));
- dout(10) << " async_read noted for " << soid << dendl;
+ ctx->pending_async_reads.push_back(
+ make_pair(
+ boost::make_tuple(op.extent.offset, op.extent.length, op.flags),
+ make_pair(&osd_op.outdata,
+ new FillInVerifyExtent(&op.extent.length, &osd_op.rval,
+ &osd_op.outdata, maybe_crc, oi.size,
+ osd, soid, op.flags))));
+ dout(10) << " async_read noted for " << soid << dendl;
- ctx->op_finishers[ctx->current_osd_subop_num].reset(
+ ctx->op_finishers[ctx->current_osd_subop_num].reset(
new ReadFinisher(osd_op));
+ // For async reads, op.extent.length will be updated by FillInVerifyExtent
+ bytes_read = op.extent.length;
}
} else {
int r = pgbackend->objects_read_sync(
if (r == -EIO) {
r = rep_repair_primary_object(soid, ctx);
}
- if (r >= 0)
+ if (r >= 0) {
op.extent.length = r;
- else if (r == -EAGAIN) {
+ bytes_read = r;
+ } else if (r == -EAGAIN) {
result = -EAGAIN;
} else {
result = r;
<< " bytes from obj " << soid << dendl;
}
if (result >= 0) {
- ctx->delta_stats.num_rd_kb += shift_round_up(op.extent.length, 10);
+ ctx->delta_stats.num_rd_kb += shift_round_up(bytes_read, 10);
ctx->delta_stats.num_rd++;
}
return result;
uint64_t size = oi.size;
uint64_t offset = op.extent.offset;
uint64_t length = op.extent.length;
+ uint64_t bytes_read = 0; // Track actual bytes read for statistics
// are we beyond truncate_size?
if ((oi.truncate_seq < op.extent.truncate_seq) &&
make_pair(
boost::make_tuple(offset, length, op.flags),
make_pair(
- &osd_op.outdata,
- new ToSparseReadResult(&osd_op.rval, &osd_op.outdata, offset,
- &op.extent.length))));
+ &osd_op.outdata,
+ new ToSparseReadResult(&osd_op.rval, &osd_op.outdata, offset,
+ &op.extent.length))));
dout(10) << " async_read (was sparse_read) noted for " << soid << dendl;
ctx->op_finishers[ctx->current_osd_subop_num].reset(
new ReadFinisher(osd_op));
+ // For async reads, op.extent.length will be updated by ToSparseReadResult
+ bytes_read = length;
} else {
dout(10) << " sparse read ended up empty for " << soid << dendl;
map<uint64_t, uint64_t> extents;
map<uint64_t, uint64_t> m;
auto [shard_offset, shard_length] = pgbackend->extent_to_shard_extent(offset, length);
int r = osd->store->fiemap(ch, ghobject_t(soid, ghobject_t::NO_GEN,
- info.pgid.shard),
- shard_offset, shard_length, m);
+ info.pgid.shard),
+ shard_offset, shard_length, m);
if (r < 0) {
return r;
}
<< " != expected 0x" << oi.data_digest
<< std::dec << " on " << soid;
r = rep_repair_primary_object(soid, ctx);
- if (r < 0) {
- return r;
- }
+ if (r < 0) {
+ return r;
+ }
}
}
- op.extent.length = r;
+ bytes_read = r;
+ // Only set op.extent.length for non-EC-direct-read to avoid issues
+ // with recursive calls from operations like CHECKSUM
+ if (!ctx->op->ec_direct_read()) {
+ op.extent.length = r;
+ }
encode(m, osd_op.outdata); // re-encode since it might be modified
::encode_destructively(data_bl, osd_op.outdata);
<< " bytes from object " << soid << dendl;
}
- ctx->delta_stats.num_rd_kb += shift_round_up(op.extent.length, 10);
+ ctx->delta_stats.num_rd_kb += shift_round_up(bytes_read, 10);
ctx->delta_stats.num_rd++;
return 0;
}