// init log
FileRef log_file = ceph::make_ref<File>();
log_file->fnode.ino = 1;
- log_file->vselector_hint = vselector->get_hint_by_device(BDEV_WAL);
+ log_file->vselector_hint = vselector->get_hint_for_log();
int r = _allocate(
vselector->select_prefer_bdev(log_file->vselector_hint),
cct->_conf->bluefs_max_log_runway,
if (!noop) {
log_file->fnode = super.log_fnode;
log_file->vselector_hint =
- vselector->get_hint_by_device(BDEV_WAL);
+ vselector->get_hint_for_log();
} else {
// do not use fnode from superblock in 'noop' mode - log_file's one should
// be fine and up-to-date
FileRef file;
bool create = false;
+ bool truncate = false;
map<string,FileRef>::iterator q = dir->file_map.find(filename);
if (q == dir->file_map.end()) {
if (overwrite) {
for (auto& p : file->fnode.extents) {
pending_release[p.bdev].insert(p.offset, p.length);
}
+ truncate = true;
file->fnode.clear_extents();
}
file->fnode.mtime = ceph_clock_now();
file->vselector_hint = vselector->get_hint_by_dir(dirname);
+ if (create || truncate) {
+ vselector->add_usage(file->vselector_hint, file->fnode); // update file count
+ }
dout(20) << __func__ << " mapping " << dirname << "/" << filename
<< " vsel_hint " << file->vselector_hint
// ===============================================
// OriginalVolumeSelector
-void* OriginalVolumeSelector::get_hint_by_device(uint8_t dev) const {
- return reinterpret_cast<void*>(dev);
+void* OriginalVolumeSelector::get_hint_for_log() const {
+ return reinterpret_cast<void*>(BlueFS::BDEV_WAL);
}
void* OriginalVolumeSelector::get_hint_by_dir(const string& dirname) const {
uint8_t res = BlueFS::BDEV_DB;
virtual ~BlueFSVolumeSelector() {
}
- virtual void* get_hint_by_device(uint8_t dev) const = 0;
- virtual void* get_hint_by_dir(const string& dirname) const = 0;
+ virtual void* get_hint_for_log() const = 0;
+ virtual void* get_hint_by_dir(const std::string& dirname) const = 0;
virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
virtual void sub_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
uint64_t _slow_total)
: wal_total(_wal_total), db_total(_db_total), slow_total(_slow_total) {}
- void* get_hint_by_device(uint8_t dev) const override;
- void* get_hint_by_dir(const string& dirname) const override;
+ void* get_hint_for_log() const override;
+ void* get_hint_by_dir(const std::string& dirname) const override;
void add_usage(void* hint, const bluefs_fnode_t& fnode) override {
// do nothing
// - observed maximums on DB dev for DB/WAL/UNSORTED data
// - observed maximum spillovers
uint64_t max_db_use = 0; // max db usage we potentially observed
+ max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_LOG - LEVEL_FIRST);
max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_WAL - LEVEL_FIRST);
max_db_use += per_level_per_dev_max.at(BlueFS::BDEV_DB, LEVEL_DB - LEVEL_FIRST);
// this could go to db hence using it in the estimation
}
}
break;
+ case LEVEL_LOG:
case LEVEL_WAL:
res = BlueFS::BDEV_WAL;
break;
<< ", slow_total:" << l_totals[LEVEL_SLOW - LEVEL_FIRST]
<< ", db_avail:" << db_avail4slow << std::endl
<< "Usage matrix:" << std::endl;
- constexpr std::array<const char*, 7> names{ {
+ constexpr std::array<const char*, 8> names{ {
"DEV/LEV",
"WAL",
"DB",
"SLOW",
"*",
"*",
- "REAL"
+ "REAL",
+ "FILES",
} };
const size_t width = 12;
for (size_t i = 0; i < names.size(); ++i) {
sout.setf(std::ios::left, std::ios::adjustfield);
sout.width(width);
switch (l + LEVEL_FIRST) {
+ case LEVEL_LOG:
+ sout << "LOG"; break;
case LEVEL_WAL:
sout << "WAL"; break;
case LEVEL_DB:
case LEVEL_MAX:
sout << "TOTALS"; break;
}
- for (size_t d = 0; d < max_x - 1; d++) {
+ for (size_t d = 0; d < max_x; d++) {
sout.setf(std::ios::left, std::ios::adjustfield);
sout.width(width);
sout << stringify(byte_u_t(per_level_per_dev_usage.at(d, l)));
}
sout.setf(std::ios::left, std::ios::adjustfield);
sout.width(width);
- sout << stringify(byte_u_t(per_level_per_dev_usage.at(max_x - 1, l)))
- << std::endl;
+ sout << stringify(per_level_files[l]) << std::endl;
}
ceph_assert(max_x == per_level_per_dev_max.get_max_x());
ceph_assert(max_y == per_level_per_dev_max.get_max_y());
sout.setf(std::ios::left, std::ios::adjustfield);
sout.width(width);
switch (l + LEVEL_FIRST) {
+ case LEVEL_LOG:
+ sout << "LOG"; break;
case LEVEL_WAL:
sout << "WAL"; break;
case LEVEL_DB:
enum {
// use 0/nullptr as unset indication
LEVEL_FIRST = 1,
- LEVEL_WAL = LEVEL_FIRST,
+ LEVEL_LOG = LEVEL_FIRST, // BlueFS log
+ LEVEL_WAL,
LEVEL_DB,
LEVEL_SLOW,
LEVEL_MAX
typedef matrix_2d<uint64_t, BlueFS::MAX_BDEV + 1, LEVEL_MAX - LEVEL_FIRST + 1> per_level_per_dev_usage_t;
per_level_per_dev_usage_t per_level_per_dev_usage;
+ // file count per level, add +1 to keep total file count
+ uint64_t per_level_files[LEVEL_MAX - LEVEL_FIRST + 1] = { 0 };
// Note: maximum per-device totals below might be smaller than corresponding
// perf counters by up to a single alloc unit (1M) due to superblock extent.
uint64_t reserved,
bool new_pol)
{
+ l_totals[LEVEL_LOG - LEVEL_FIRST] = 0; // not used at the moment
l_totals[LEVEL_WAL - LEVEL_FIRST] = _wal_total;
l_totals[LEVEL_DB - LEVEL_FIRST] = _db_total;
l_totals[LEVEL_SLOW - LEVEL_FIRST] = _slow_total;
}
}
- void* get_hint_by_device(uint8_t dev) const override {
- ceph_assert(dev == BlueFS::BDEV_WAL); // others aren't used atm
- return reinterpret_cast<void*>(LEVEL_WAL);
+ void* get_hint_for_log() const override {
+ return reinterpret_cast<void*>(LEVEL_LOG);
}
void* get_hint_by_dir(const string& dirname) const override;
max = cur;
}
}
+ ++per_level_files[pos];
+ ++per_level_files[LEVEL_MAX - LEVEL_FIRST];
}
void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
if (hint == nullptr)
auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos);
ceph_assert(cur >= fnode.size);
cur -= fnode.size;
+ ceph_assert(per_level_files[pos] > 0);
+ --per_level_files[pos];
+ ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0);
+ --per_level_files[LEVEL_MAX - LEVEL_FIRST];
}
void add_usage(void* hint, uint64_t fsize) override {
if (hint == nullptr)