]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os, osd, tools: Add backportable compatibility checking for sharded objects
authorDavid Zafman <david.zafman@inktank.com>
Wed, 25 Sep 2013 16:19:16 +0000 (09:19 -0700)
committerDavid Zafman <david.zafman@inktank.com>
Thu, 21 Nov 2013 02:38:36 +0000 (18:38 -0800)
OSD
  New CEPH_OSD_FEATURE_INCOMPAT_SHARDS
FileStore
  NEW CEPH_FS_FEATURE_INCOMPAT_SHARDS
  Add FSSuperblock with feature CompatSet in it
  Store sharded_objects state using CompatSet
  Add set_allow_sharded_objects() and get_allow_sharded_objects() to FileStore/ObjectStore
  Add read_superblock()/write_superblock() internal filestore functions
ceph_filestore_dump
  Add OSDsuperblock to export format
  Use CompatSet from OSD code itself in filestore-dump tool
  Always check compatibility of OSD features with on-disk features
  On import verify compatibility of on-disk features with export data
  Bump super_ver due to export format change

Backport: dumpling, cuttlefish

Signed-off-by: David Zafman <david.zafman@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
(cherry picked from commit c6b83180f9f769de27ca7890f5f8ec507ee743ca)

Conflicts:

src/os/FileStore.cc
src/os/FileStore.h
src/osd/OSD.cc

Excluded from cherry-pick:
  Didn't add set_allow_sharded_objects() and get_allow_sharded_objects() to FileStore/ObjectStore
  Didn't add code to check for incomplete transition to sharded objects in ceph-filestore-dump

src/os/FileStore.cc
src/os/FileStore.h
src/osd/OSD.cc
src/osd/OSD.h
src/osd/osd_types.h
src/tools/ceph-filestore-dump.cc

index 7418039ece6328515a1a4ffba827ec4aef27f328..62e902212262ee99644c0d7e09d7753b4cffc063 100644 (file)
@@ -101,6 +101,22 @@ static const __SWORD_TYPE XFS_SUPER_MAGIC(0x58465342);
 #define REPLAY_GUARD_XATTR "user.cephos.seq"
 #define GLOBAL_REPLAY_GUARD_XATTR "user.cephos.gseq"
 
+//Initial features in new superblock.
+static CompatSet get_fs_initial_compat_set() {
+  CompatSet::FeatureSet ceph_osd_feature_compat;
+  CompatSet::FeatureSet ceph_osd_feature_ro_compat;
+  CompatSet::FeatureSet ceph_osd_feature_incompat;
+  return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
+                  ceph_osd_feature_incompat);
+}
+
+//Features are added here that this FileStore supports.
+static CompatSet get_fs_supported_compat_set() {
+  CompatSet compat =  get_fs_initial_compat_set();
+  //Any features here can be set in code, but not in initial superblock
+  return compat;
+}
+
 /*
  * long file names will have the following format:
  *
@@ -478,6 +494,8 @@ FileStore::FileStore(const std::string &base, const std::string &jdev, const cha
 
   g_ceph_context->get_perfcounters_collection()->add(logger);
   g_ceph_context->_conf->add_observer(this);
+
+  superblock.compat_features = get_fs_initial_compat_set();
 }
 
 FileStore::~FileStore()
@@ -679,6 +697,13 @@ int FileStore::mkfs()
     goto close_fsid_fd;
   }
 
+  ret = write_superblock();
+  if (ret < 0) {
+    derr << "mkfs: write_superblock() failed: "
+        << cpp_strerror(ret) << dendl;
+    goto close_fsid_fd;
+  }
+
   struct statfs basefs;
   ret = ::fstatfs(basedir_fd, &basefs);
   if (ret < 0) {
@@ -1339,6 +1364,52 @@ int FileStore::_sanity_check_fs()
   return 0;
 }
 
+int FileStore::write_superblock()
+{
+  char fn[PATH_MAX];
+  snprintf(fn, sizeof(fn), "%s/superblock", basedir.c_str());
+  int fd = ::open(fn, O_WRONLY|O_CREAT|O_TRUNC, 0644);
+  if (fd < 0)
+    return -errno;
+  bufferlist bl;
+  ::encode(superblock, bl);
+
+  int ret = safe_write(fd, bl.c_str(), bl.length());
+  if (ret < 0)
+    goto out;
+  ret = ::fsync(fd);
+  if (ret < 0)
+    ret = -errno;
+  // XXX: fsync() man page says I need to sync containing directory
+out:
+  TEMP_FAILURE_RETRY(::close(fd));
+  return ret;
+}
+
+int FileStore::read_superblock()
+{
+  char fn[PATH_MAX];
+  snprintf(fn, sizeof(fn), "%s/superblock", basedir.c_str());
+  int fd = ::open(fn, O_RDONLY, 0644);
+  if (fd < 0) {
+    if (errno == ENOENT) {
+      // If the file doesn't exist write initial CompatSet
+      return write_superblock();
+    } else
+      return -errno;
+  }
+  bufferptr bp(PATH_MAX);
+  int ret = safe_read(fd, bp.c_str(), bp.length());
+  TEMP_FAILURE_RETRY(::close(fd));
+  if (ret < 0)
+    return ret;
+  bufferlist bl;
+  bl.push_back(bp);
+  bufferlist::iterator i = bl.begin();
+  ::decode(superblock, i);
+  return 0;
+}
+
 int FileStore::update_version_stamp()
 {
   return write_version_stamp();
@@ -1426,6 +1497,7 @@ int FileStore::mount()
   char buf[PATH_MAX];
   uint64_t initial_op_seq;
   set<string> cluster_snaps;
+  CompatSet supported_compat_set = get_fs_supported_compat_set();
 
   dout(5) << "basedir " << basedir << " journal " << journalpath << dendl;
   
@@ -1490,6 +1562,20 @@ int FileStore::mount()
     }
   }
 
+  ret = read_superblock();
+  if (ret < 0) {
+    ret = -EINVAL;
+    goto close_fsid_fd;
+  }
+
+  // Check if this FileStore supports all the necessary features to mount
+  if (supported_compat_set.compare(superblock.compat_features) == -1) {
+    derr << "FileStore::mount : Incompatible features set "
+          << superblock.compat_features << dendl;
+    ret = -EINVAL;
+    goto close_fsid_fd;
+  }
+
   // open some dir handles
   basedir_fd = ::open(basedir.c_str(), O_RDONLY);
   if (basedir_fd < 0) {
@@ -5037,3 +5123,39 @@ void FileStore::dump_transactions(list<ObjectStore::Transaction*>& ls, uint64_t
   m_filestore_dump_fmt.flush(m_filestore_dump);
   m_filestore_dump.flush();
 }
+
+// -- FSSuperblock --
+
+void FSSuperblock::encode(bufferlist &bl) const
+{
+  ENCODE_START(1, 1, bl);
+  compat_features.encode(bl);
+  ENCODE_FINISH(bl);
+}
+
+void FSSuperblock::decode(bufferlist::iterator &bl)
+{
+  DECODE_START(1, bl);
+  compat_features.decode(bl);
+  DECODE_FINISH(bl);
+}
+
+void FSSuperblock::dump(Formatter *f) const
+{
+  f->open_object_section("compat");
+  compat_features.dump(f);
+  f->close_section();
+}
+
+void FSSuperblock::generate_test_instances(list<FSSuperblock*>& o)
+{
+  FSSuperblock z;
+  o.push_back(new FSSuperblock(z));
+  CompatSet::FeatureSet feature_compat;
+  CompatSet::FeatureSet feature_ro_compat;
+  CompatSet::FeatureSet feature_incompat;
+  feature_incompat.insert(CEPH_FS_FEATURE_INCOMPAT_SHARDS);
+  z.compat_features = CompatSet(feature_compat, feature_ro_compat,
+                                feature_incompat);
+  o.push_back(new FSSuperblock(z));
+}
index 86d267dddf1ccd454c57211ae8b00b34351b2ede..fd85c7714f30c503fff371ca032f0274180a8dac 100644 (file)
@@ -51,6 +51,26 @@ using namespace __gnu_cxx;
 # define FALLOC_FL_PUNCH_HOLE 0x2
 #endif
 
+#define CEPH_FS_FEATURE_INCOMPAT_SHARDS CompatSet::Feature(1, "sharded objects")
+
+class FSSuperblock {
+public:
+  CompatSet compat_features;
+
+  FSSuperblock() { }
+
+  void encode(bufferlist &bl) const;
+  void decode(bufferlist::iterator &bl);
+  void dump(Formatter *f) const;
+  static void generate_test_instances(list<FSSuperblock*>& o);
+};
+WRITE_CLASS_ENCODER(FSSuperblock)
+
+inline ostream& operator<<(ostream& out, const FSSuperblock& sb)
+{
+  return out << "sb(" << sb.compat_features << ")";
+}
+
 class FileStore : public JournalingObjectStore,
                   public md_config_obs_t
 {
@@ -528,6 +548,25 @@ private:
   std::ofstream m_filestore_dump;
   JSONFormatter m_filestore_dump_fmt;
   atomic_t m_filestore_kill_at;
+  FSSuperblock superblock;
+
+  /**
+   * write_superblock()
+   *
+   * Write superblock to persisent storage
+   *
+   * return value: 0 on success, otherwise negative errno
+   */
+  int write_superblock();
+
+  /**
+   * read_superblock()
+   *
+   * Fill in FileStore::superblock by reading persistent storage
+   *
+   * return value: 0 on success, otherwise negative errno
+   */
+  int read_superblock();
 };
 
 ostream& operator<<(ostream& out, const FileStore::OpSequencer& s);
index b67e406afbd346f1c795a79d1d0fedab2265aeee..d1234a1f66be4ff988b61e8a94a446ee832702ff 100644 (file)
@@ -135,7 +135,9 @@ static ostream& _prefix(std::ostream* _dout, int whoami, OSDMapRef osdmap) {
 
 const coll_t coll_t::META_COLL("meta");
 
-static CompatSet get_osd_compat_set() {
+//Initial features in new superblock.
+//Features here are also automatically upgraded
+CompatSet OSD::get_osd_initial_compat_set() {
   CompatSet::FeatureSet ceph_osd_feature_compat;
   CompatSet::FeatureSet ceph_osd_feature_ro_compat;
   CompatSet::FeatureSet ceph_osd_feature_incompat;
@@ -153,6 +155,13 @@ static CompatSet get_osd_compat_set() {
                   ceph_osd_feature_incompat);
 }
 
+//Features are added here that this OSD supports.
+CompatSet OSD::get_osd_compat_set() {
+  CompatSet compat =  get_osd_initial_compat_set();
+  //Any features here can be set in code, but not in initial superblock
+  return compat;
+}
+
 OSDService::OSDService(OSD *osd) :
   osd(osd),
   whoami(osd->whoami), store(osd->store), clog(osd->clog),
@@ -617,7 +626,7 @@ int OSD::mkfs(const std::string &dev, const std::string &jdev, uuid_d fsid, int
       sb.cluster_fsid = fsid;
       sb.osd_fsid = store->get_fsid();
       sb.whoami = whoami;
-      sb.compat_features = get_osd_compat_set();
+      sb.compat_features = get_osd_initial_compat_set();
 
       // benchmark?
       if (g_conf->osd_auto_weight) {
@@ -1153,11 +1162,12 @@ int OSD::init()
       return r;
   }
 
-  if (osd_compat.compare(superblock.compat_features) != 0) {
+  CompatSet initial = get_osd_initial_compat_set();
+  if (initial.compare(superblock.compat_features) != 0) {
     // We need to persist the new compat_set before we
     // do anything else
     dout(5) << "Upgrading superblock compat_set" << dendl;
-    superblock.compat_features = osd_compat;
+    superblock.compat_features = initial;
     ObjectStore::Transaction t;
     write_superblock(t);
     r = store->apply_transaction(t);
index c6ee04fd7100de3868fc5fb6e4953881a2d5863a..f2bf5c6f55b9a4f10824348fb4421af79b7ad099 100644 (file)
@@ -666,6 +666,25 @@ public:
     return oid;
   }
   static void recursive_remove_collection(ObjectStore *store, coll_t tmp);
+
+  /**
+   * get_osd_initial_compat_set()
+   *
+   * Get the initial feature set for this OSD.  Features
+   * here are automatically upgraded.
+   *
+   * Return value: Initial osd CompatSet
+   */
+  static CompatSet get_osd_initial_compat_set();
+
+  /**
+   * get_osd_compat_set()
+   *
+   * Get all features supported by this OSD
+   *
+   * Return value: CompatSet of all supported features
+   */
+  static CompatSet get_osd_compat_set();
   
 
 private:
index bf04e8e11e384e1257f5b560e803c3b550cb1c01..ddd4c463d2d1699e4b0bc25803f3f31d954f8bd8 100644 (file)
@@ -41,6 +41,7 @@
 #define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO CompatSet::Feature(8, "leveldbinfo")
 #define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG CompatSet::Feature(9, "leveldblog")
 #define CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER CompatSet::Feature(10, "snapmapper")
+#define CEPH_OSD_FEATURE_INCOMPAT_SHARDS CompatSet::Feature(11, "sharded objects")
 
 
 typedef hobject_t collection_list_handle_t;
index 3badc2160b0ca9e5be7d0e495cfc8b7224d84374..ac8a70d57cb934cdee05651e15d25ff0cceabb8d 100644 (file)
@@ -52,6 +52,32 @@ enum {
     END_OF_TYPES,      //Keep at the end
 };
 
+//#define INTERNAL_TEST
+//#define INTERNAL_TEST2
+
+#ifdef INTERNAL_TEST
+CompatSet get_test_compat_set() {
+  CompatSet::FeatureSet ceph_osd_feature_compat;
+  CompatSet::FeatureSet ceph_osd_feature_ro_compat;
+  CompatSet::FeatureSet ceph_osd_feature_incompat;
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
+#ifdef INTERNAL_TEST2
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
+#endif
+  return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
+                  ceph_osd_feature_incompat);
+}
+#endif
+
 typedef uint8_t sectiontype_t;
 typedef uint32_t mymagic_t;
 typedef int64_t mysize_t;
@@ -69,7 +95,7 @@ const int fd_none = INT_MIN;
 //can be added to the export format.
 struct super_header {
   static const uint32_t super_magic = (shortmagic << 16) | shortmagic;
-  static const uint32_t super_ver = 1;
+  static const uint32_t super_ver = 2;
   static const uint32_t FIXED_LENGTH = 16;
   uint32_t magic;
   uint32_t version;
@@ -139,18 +165,25 @@ struct footer {
 
 struct pg_begin {
   pg_t pgid;
+  OSDSuperblock superblock;
 
-  pg_begin(pg_t pg): pgid(pg) { }
+  pg_begin(pg_t pg, OSDSuperblock sb):
+    pgid(pg), superblock(sb) { }
   pg_begin() { }
 
   void encode(bufferlist& bl) const {
-    ENCODE_START(1, 1, bl);
+    // New super_ver prevents decode from ver 1
+    ENCODE_START(2, 2, bl);
     ::encode(pgid, bl);
+    ::encode(superblock, bl);
     ENCODE_FINISH(bl);
   }
   void decode(bufferlist::iterator& bl) {
-    DECODE_START(1, bl);
+    DECODE_START(2, bl);
     ::decode(pgid, bl);
+    if (struct_v > 1) {
+      ::decode(superblock, bl);
+    }
     DECODE_FINISH(bl);
   }
 };
@@ -664,7 +697,7 @@ void write_super()
 }
 
 int do_export(ObjectStore *fs, coll_t coll, pg_t pgid, pg_info_t &info,
-    epoch_t map_epoch, __u8 struct_ver)
+    epoch_t map_epoch, __u8 struct_ver, OSDSuperblock superblock)
 {
   PGLog::IndexedLog log;
   pg_missing_t missing;
@@ -675,7 +708,7 @@ int do_export(ObjectStore *fs, coll_t coll, pg_t pgid, pg_info_t &info,
 
   write_super();
 
-  pg_begin pgb(pgid);
+  pg_begin pgb(pgid, superblock);
   ret = write_section(TYPE_PG_BEGIN, pgb, file_fd);
   if (ret)
     return ret;
@@ -909,7 +942,7 @@ int get_pg_metadata(ObjectStore *store, coll_t coll, bufferlist &bl)
   return 0;
 }
 
-int do_import(ObjectStore *store)
+int do_import(ObjectStore *store, OSDSuperblock sb)
 {
   bufferlist ebl;
   pg_info_t info;
@@ -943,7 +976,16 @@ int do_import(ObjectStore *store)
   pg_begin pgb;
   pgb.decode(ebliter);
   pg_t pgid = pgb.pgid;
-  
+
+  if (debug) {
+    cout << "Exported features: " << pgb.superblock.compat_features << std::endl;
+  }
+  if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) {
+    cout << "Export has incompatible features set "
+      << pgb.superblock.compat_features << std::endl;
+    return 1;
+  }
+
   log_oid = OSD::make_pg_log_oid(pgid);
   biginfo_oid = OSD::make_pg_biginfo_oid(pgid);
 
@@ -1173,11 +1215,45 @@ int main(int argc, char **argv)
   int ret = 0;
   vector<coll_t> ls;
   vector<coll_t>::iterator it;
+  CompatSet supported;
+
+#ifdef INTERNAL_TEST
+  supported = get_test_compat_set();
+#else
+  supported = OSD::get_osd_compat_set();
+#endif
+
+  bufferlist bl;
+  OSDSuperblock superblock;
+  bufferlist::iterator p;
+  ret = fs->read(coll_t::META_COLL, OSD_SUPERBLOCK_POBJECT, 0, 0, bl);
+  if (ret < 0) {
+    cout << "Failure to read OSD superblock error= " << r << std::endl;
+    goto out;
+  }
+
+  p = bl.begin();
+  ::decode(superblock, p);
+
+#ifdef INTERNAL_TEST2
+  superblock.compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
+#endif
+
+  if (debug && file_fd != STDOUT_FILENO) {
+    cout << "Supported features: " << supported << std::endl;
+    cout << "On-disk features: " << superblock.compat_features << std::endl;
+  }
+  if (supported.compare(superblock.compat_features) == -1) {
+    cout << "On-disk OSD incompatible features set "
+      << superblock.compat_features << std::endl;
+    ret = EINVAL;
+    goto out;
+  }
 
   if (type == "import") {
 
     try {
-      ret = do_import(fs);
+      ret = do_import(fs, superblock);
     }
     catch (const buffer::error &e) {
       cout << "do_import threw exception error " << e.what() << std::endl;
@@ -1260,7 +1336,7 @@ int main(int argc, char **argv)
       cerr << "struct_v " << (int)struct_ver << std::endl;
 
     if (type == "export") {
-      ret = do_export(fs, coll, pgid, info, map_epoch, struct_ver);
+      ret = do_export(fs, coll, pgid, info, map_epoch, struct_ver, superblock);
     } else if (type == "info") {
       formatter->open_object_section("info");
       info.dump(formatter);