#include "events/ESessions.h"
#include "InoTable.h"
+#include "fscrypt.h"
#include "common/Timer.h"
_truncate_inode(in, ls);
}
+struct C_IO_MDC_TruncateWriteFinish : public MDCacheIOContext {
+ CInode *in;
+ LogSegment *ls;
+ uint32_t block_size;
+ C_IO_MDC_TruncateWriteFinish(MDCache *c, CInode *i, LogSegment *l, uint32_t bs) :
+ MDCacheIOContext(c, false), in(i), ls(l), block_size(bs) {
+ }
+ void finish(int r) override {
+ ceph_assert(r == 0 || r == -CEPHFS_ENOENT);
+ mdcache->truncate_inode_write_finish(in, ls, block_size);
+ }
+ void print(ostream& out) const override {
+ out << "file_truncate_write(" << in->ino() << ")";
+ }
+};
+
struct C_IO_MDC_TruncateFinish : public MDCacheIOContext {
CInode *in;
LogSegment *ls;
{
const auto& pi = in->get_inode();
dout(10) << "_truncate_inode "
- << pi->truncate_from << " -> " << pi->truncate_size
- << " on " << *in << dendl;
+ << pi->truncate_from << " -> " << pi->truncate_size
+ << " fscrypt last block length is " << pi->fscrypt_last_block.length()
+ << " on " << *in << dendl;
ceph_assert(pi->is_truncating());
ceph_assert(pi->truncate_size < (1ULL << 63));
ceph_assert(pi->truncate_from < (1ULL << 63));
- ceph_assert(pi->truncate_size < pi->truncate_from);
+ ceph_assert(pi->truncate_size < pi->truncate_from ||
+ (pi->truncate_size == pi->truncate_from &&
+ pi->fscrypt_last_block.length()));
SnapRealm *realm = in->find_snaprealm();
snapc = &nullsnap;
ceph_assert(in->last == CEPH_NOSNAP);
}
- dout(10) << "_truncate_inode snapc " << snapc << " on " << *in << dendl;
+ dout(10) << "_truncate_inode snapc " << snapc << " on " << *in
+ << " fscrypt_last_block length is " << pi->fscrypt_last_block.length()
+ << dendl;
auto layout = pi->layout;
- filer.truncate(in->ino(), &layout, *snapc,
- pi->truncate_size, pi->truncate_from-pi->truncate_size,
- pi->truncate_seq, ceph::real_time::min(), 0,
- new C_OnFinisher(new C_IO_MDC_TruncateFinish(this, in, ls),
- mds->finisher));
+ struct ceph_fscrypt_last_block_header header;
+ memset(&header, 0, sizeof(header));
+ bufferlist data;
+ if (pi->fscrypt_last_block.length()) {
+ auto bl = pi->fscrypt_last_block.cbegin();
+ DECODE_START(1, bl);
+ decode(header.change_attr, bl);
+ decode(header.file_offset, bl);
+ decode(header.block_size, bl);
+
+ /*
+ * The block_size will be in unit of KB, so if the last block is not
+ * located in a file hole, the struct_len should be larger than the
+ * header.block_size.
+ */
+ if (struct_len > header.block_size) {
+ bl.copy(header.block_size, data);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ if (data.length()) {
+ dout(10) << "_truncate_inode write on inode " << *in << " change_attr: "
+ << header.change_attr << " offset: " << header.file_offset << " blen: "
+ << header.block_size << dendl;
+ filer.write(in->ino(), &layout, *snapc, header.file_offset, header.block_size,
+ data, ceph::real_time::min(), 0,
+ new C_OnFinisher(new C_IO_MDC_TruncateWriteFinish(this, in, ls,
+ header.block_size),
+ mds->finisher));
+ } else { // located in file hole.
+ uint64_t length = pi->truncate_from - pi->truncate_size;
+
+ /*
+ * When the fscrypt is enabled the truncate_from and truncate_size
+ * possibly equal and both are aligned up to header.block_size. In
+ * this case we will always request a larger length to make sure the
+ * OSD won't miss truncating the last object.
+ */
+ if (pi->fscrypt_last_block.length()) {
+ dout(10) << "_truncate_inode truncate on inode " << *in << " hits a hole!" << dendl;
+ length += header.block_size;
+ }
+ ceph_assert(length);
+
+ dout(10) << "_truncate_inode truncate on inode " << *in << dendl;
+ filer.truncate(in->ino(), &layout, *snapc, pi->truncate_size, length,
+ pi->truncate_seq, ceph::real_time::min(), 0,
+ new C_OnFinisher(new C_IO_MDC_TruncateFinish(this, in, ls),
+ mds->finisher));
+ }
+
}
struct C_MDC_TruncateLogged : public MDCacheLogContext {
}
};
+void MDCache::truncate_inode_write_finish(CInode *in, LogSegment *ls,
+ uint32_t block_size)
+{
+ const auto& pi = in->get_inode();
+ dout(10) << "_truncate_inode_write "
+ << pi->truncate_from << " -> " << pi->truncate_size
+ << " on " << *in << dendl;
+
+ ceph_assert(pi->is_truncating());
+ ceph_assert(pi->truncate_size < (1ULL << 63));
+ ceph_assert(pi->truncate_from < (1ULL << 63));
+ ceph_assert(pi->truncate_size < pi->truncate_from ||
+ (pi->truncate_size == pi->truncate_from &&
+ pi->fscrypt_last_block.length()));
+
+
+ SnapRealm *realm = in->find_snaprealm();
+ SnapContext nullsnap;
+ const SnapContext *snapc;
+ if (realm) {
+ dout(10) << " realm " << *realm << dendl;
+ snapc = &realm->get_snap_context();
+ } else {
+ dout(10) << " NO realm, using null context" << dendl;
+ snapc = &nullsnap;
+ ceph_assert(in->last == CEPH_NOSNAP);
+ }
+ dout(10) << "_truncate_inode_write snapc " << snapc << " on " << *in
+ << " fscrypt_last_block length is " << pi->fscrypt_last_block.length()
+ << dendl;
+ auto layout = pi->layout;
+ /*
+ * When the fscrypt is enabled the truncate_from and truncate_size
+ * possibly equal and both are aligned up to header.block_size. In
+ * this case we will always request a larger length to make sure the
+ * OSD won't miss truncating the last object.
+ */
+ uint64_t length = pi->truncate_from - pi->truncate_size + block_size;
+ filer.truncate(in->ino(), &layout, *snapc, pi->truncate_size, length,
+ pi->truncate_seq, ceph::real_time::min(), 0,
+ new C_OnFinisher(new C_IO_MDC_TruncateFinish(this, in, ls),
+ mds->finisher));
+}
+
void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls)
{
dout(10) << "truncate_inode_finish " << *in << dendl;
pi.inode->version = in->pre_dirty();
pi.inode->truncate_from = 0;
pi.inode->truncate_pending--;
+ pi.inode->fscrypt_last_block = bufferlist();
EUpdate *le = new EUpdate(mds->mdlog, "truncate finish");
mds->mdlog->start_entry(le);
#include "common/perf_counters.h"
#include "include/compat.h"
#include "osd/OSDMap.h"
+#include "fscrypt.h"
#include <errno.h>
bool truncating_smaller = false;
if (mask & CEPH_SETATTR_SIZE) {
- truncating_smaller = req->head.args.setattr.size < old_size;
+ if (req->get_data().length() >
+ sizeof(struct ceph_fscrypt_last_block_header) + fscrypt_last_block_max_size) {
+ dout(10) << __func__ << ": the last block size is too large" << dendl;
+ respond_to_request(mdr, -CEPHFS_EINVAL);
+ return;
+ }
+
+ truncating_smaller = req->head.args.setattr.size < old_size ||
+ (req->head.args.setattr.size == old_size && req->get_data().length());
if (truncating_smaller && pip->is_truncating()) {
dout(10) << " waiting for pending truncate from " << pip->truncate_from
<< " to " << pip->truncate_size << " to complete on " << *cur << dendl;
cur->add_waiter(CInode::WAIT_TRUNC, new C_MDS_RetryRequest(mdcache, mdr));
return;
}
+
+ if (truncating_smaller && req->get_data().length()) {
+ struct ceph_fscrypt_last_block_header header;
+ memset(&header, 0, sizeof(header));
+ auto bl = req->get_data().cbegin();
+ DECODE_START(1, bl);
+ decode(header.change_attr, bl);
+ DECODE_FINISH(bl);
+
+ dout(20) << __func__ << " mdr->retry:" << mdr->retry
+ << " header.change_attr: " << header.change_attr
+ << " header.file_offset: " << header.file_offset
+ << " header.block_size: " << header.block_size
+ << dendl;
+
+ if (header.change_attr != pip->change_attr) {
+ dout(5) << __func__ << ": header.change_attr:" << header.change_attr
+ << " != current change_attr:" << pip->change_attr
+ << ", let client retry it!" << dendl;
+ // flush the journal to make sure the clients will get the lasted
+ // change_attr as possible for the next retry
+ mds->mdlog->flush();
+ respond_to_request(mdr, -CEPHFS_EAGAIN);
+ return;
+ }
+ }
}
bool changed_ranges = false;
pi.inode->time_warp_seq++; // maybe not a timewarp, but still a serialization point.
if (mask & CEPH_SETATTR_SIZE) {
if (truncating_smaller) {
- pi.inode->truncate(old_size, req->head.args.setattr.size);
+ pi.inode->truncate(old_size, req->head.args.setattr.size, req->get_data());
le->metablob.add_truncate_start(cur->ino());
} else {
pi.inode->size = req->head.args.setattr.size;
bool is_file() const { return (mode & S_IFMT) == S_IFREG; }
bool is_truncating() const { return (truncate_pending > 0); }
+ void truncate(uint64_t old_size, uint64_t new_size, const bufferlist &fbl) {
+ truncate(old_size, new_size);
+ fscrypt_last_block = fbl;
+ }
void truncate(uint64_t old_size, uint64_t new_size) {
- ceph_assert(new_size < old_size);
+ ceph_assert(new_size <= old_size);
if (old_size > max_size_ever)
max_size_ever = old_size;
truncate_from = old_size;
std::vector<uint8_t> fscrypt_auth;
std::vector<uint8_t> fscrypt_file;
+ bufferlist fscrypt_last_block;
+
private:
bool older_is_consistent(const inode_t &other) const;
};
template<template<typename> class Allocator>
void inode_t<Allocator>::encode(ceph::buffer::list &bl, uint64_t features) const
{
- ENCODE_START(18, 6, bl);
+ ENCODE_START(19, 6, bl);
encode(ino, bl);
encode(rdev, bl);
encode(!fscrypt_auth.empty(), bl);
encode(fscrypt_auth, bl);
encode(fscrypt_file, bl);
+ encode(fscrypt_last_block, bl);
ENCODE_FINISH(bl);
}
template<template<typename> class Allocator>
void inode_t<Allocator>::decode(ceph::buffer::list::const_iterator &p)
{
- DECODE_START_LEGACY_COMPAT_LEN(18, 6, 6, p);
+ DECODE_START_LEGACY_COMPAT_LEN(19, 6, 6, p);
decode(ino, p);
decode(rdev, p);
decode(fscrypt_auth, p);
decode(fscrypt_file, p);
}
+
+ if (struct_v >= 19) {
+ decode(fscrypt_last_block, p);
+ }
DECODE_FINISH(p);
}