* the callback function passed to LibRGWFS.readdir() now accepts a ``flags``
parameter. it will be the last parameter passed to ``readdir()` method.
+* The 'cephfs-data-scan scan_links' now automatically repair inotables.
+
>=13.1.0
--------
It is **important** to ensure that all workers have completed the
scan_extents phase before any workers enter the scan_inodes phase.
-Output of 'scan_links' command includes max used inode number for each
-MDS rank. You may need to update InoTables of each MDS rank.
-
-::
- cephfs-table-tool recovery-fs:x show inode
- cephfs-table-tool recovery-fs:x take_inos <max ino of mds.x)
-
-
After completing the metadata recovery, you may want to run cleanup
operation to delete ancillary data geneated during recovery.
import logging
import os
+import time
from textwrap import dedent
import traceback
from collections import namedtuple, defaultdict
log.info("{0}: {1}".format(pg_str, lines))
self.assertSetEqual(set(lines), set(pgs_to_files[pg_str]))
- def test_scan_links(self):
+ def test_rebuild_linkage(self):
"""
The scan_links command fixes linkage errors
"""
# link count was adjusted?
file1_nlink = self.mount_a.path_to_nlink("testdir1/file1")
self.assertEqual(file1_nlink, 2)
+
+ def test_rebuild_inotable(self):
+ """
+ The scan_links command repair inotables
+ """
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+
+ active_mds_names = self.fs.get_active_names()
+ mds0_id = active_mds_names[0]
+ mds1_id = active_mds_names[1]
+
+ self.mount_a.run_shell(["mkdir", "dir1"])
+ dir_ino = self.mount_a.path_to_ino("dir1")
+ self.mount_a.setfattr("dir1", "ceph.dir.pin", "1")
+ # wait for subtree migration
+
+ file_ino = 0;
+ while True:
+ time.sleep(1)
+ # allocate an inode from mds.1
+ self.mount_a.run_shell(["touch", "dir1/file1"])
+ file_ino = self.mount_a.path_to_ino("dir1/file1")
+ if file_ino >= (2 << 40):
+ break
+ self.mount_a.run_shell(["rm", "-f", "dir1/file1"])
+
+ self.mount_a.umount_wait()
+
+ self.fs.mds_asok(["flush", "journal"], mds0_id)
+ self.fs.mds_asok(["flush", "journal"], mds1_id)
+ self.mds_cluster.mds_stop()
+
+ self.fs.rados(["rm", "mds0_inotable"])
+ self.fs.rados(["rm", "mds1_inotable"])
+
+ self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
+
+ mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"]))
+ self.assertGreaterEqual(
+ mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino)
+
+ mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"]))
+ self.assertGreaterEqual(
+ mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino)
dout(10) << "repair: after status. ino = " << id << " pver =" << projected_version << " ver= " << version << dendl;
return true;
}
+
+bool InoTable::force_consume_to(inodeno_t ino)
+{
+ auto it = free.begin();
+ if (it != free.end() && it.get_start() <= ino) {
+ inodeno_t min = it.get_start();
+ derr << "erasing " << min << " to " << ino << dendl;
+ free.erase(min, ino - min + 1);
+ projected_free = free;
+ projected_version = ++version;
+ return true;
+ } else {
+ return false;
+ }
+}
*
* @return true if the table was modified
*/
- bool force_consume_to(inodeno_t ino)
- {
- if (free.contains(ino)) {
- inodeno_t min = free.begin().get_start();
- std::cerr << "Erasing " << min << " to " << ino << std::endl;
- free.erase(min, ino - min + 1);
- projected_free = free;
- projected_version = ++version;
- return true;
- } else {
- return false;
- }
- }
+ bool force_consume_to(inodeno_t ino);
};
WRITE_CLASS_ENCODER(InoTable)
{
char n[50];
if (per_mds)
- snprintf(n, sizeof(n), "mds%d_%s", int(mds->get_nodeid()), table_name);
+ snprintf(n, sizeof(n), "mds%d_%s", int(rank), table_name);
else
snprintf(n, sizeof(n), "mds_%s", table_name);
return object_t(n);
bool per_mds;
mds_rank_t rank;
- object_t get_object_name() const;
static const int STATE_UNDEF = 0;
static const int STATE_OPENING = 1;
if (is_active()) save(0);
}
+ object_t get_object_name() const;
void load(MDSInternalContextBase *onfinish);
void load_2(int, bufferlist&, Context *onfinish);
#include "include/util.h"
#include "mds/CInode.h"
+#include "mds/InoTable.h"
#include "cls/cephfs/cls_cephfs_client.h"
#include "PgFiles.h"
}
for (auto& p : max_ino_map) {
- std::cout << "mds." << p.first << " max used ino " << p.second << std::endl;
+ InoTable inotable(nullptr);
+ inotable.set_rank(p.first);
+ bool dirty = false;
+ int r = metadata_driver->load_table(&inotable);
+ if (r < 0) {
+ inotable.reset_state();
+ dirty = true;
+ }
+ if (inotable.force_consume_to(p.second))
+ dirty = true;
+ if (dirty) {
+ r = metadata_driver->save_table(&inotable);
+ if (r < 0)
+ return r;
+ }
}
return 0;
return 0;
}
+int MetadataDriver::load_table(MDSTable *table)
+{
+ object_t table_oid = table->get_object_name();
+
+ bufferlist table_bl;
+ int r = metadata_io.read(table_oid.name, table_bl, 0, 0);
+ if (r < 0) {
+ derr << "unable to read mds table '" << table_oid.name << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ try {
+ version_t table_ver;
+ auto p = table_bl.cbegin();
+ decode(table_ver, p);
+ table->decode_state(p);
+ table->force_replay_version(table_ver);
+ } catch (const buffer::error &err) {
+ derr << "unable to decode mds table '" << table_oid.name << "': "
+ << err.what() << dendl;
+ return -EIO;
+ }
+ return 0;
+}
+
+int MetadataDriver::save_table(MDSTable *table)
+{
+ object_t table_oid = table->get_object_name();
+
+ bufferlist table_bl;
+ encode(table->get_version(), table_bl);
+ table->encode_state(table_bl);
+ int r = metadata_io.write_full(table_oid.name, table_bl);
+ if (r != 0) {
+ derr << "error updating mds table " << table_oid.name
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
int MetadataDriver::inject_lost_and_found(
inodeno_t ino, const InodeStore &dentry)
{
#include "include/rados/librados.hpp"
class InodeStore;
+class MDSTable;
class RecoveryDriver {
protected:
int init_roots(int64_t data_pool_id) override;
int check_roots(bool *result) override;
+
+ int load_table(MDSTable *table);
+ int save_table(MDSTable *table);
};
class DataScan : public MDSUtility, public MetadataTool