]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
filestore: can force use of stale snaps
authorYehuda Sadeh <yehuda@hq.newdream.net>
Wed, 27 Oct 2010 20:21:11 +0000 (13:21 -0700)
committerYehuda Sadeh <yehuda@hq.newdream.net>
Wed, 27 Oct 2010 20:21:11 +0000 (13:21 -0700)
also, overwrite the commit_seq with the current version in case we
forced stale snaps.

src/config.cc
src/config.h
src/os/FileStore.cc
src/os/FileStore.h

index 894b3b4822780766270e9ccdc75c0da86c4c990c..e5d3b760b97469e1e15d870dcdd051592f961917 100644 (file)
@@ -532,6 +532,7 @@ static struct config_option config_optionsp[] = {
        OPTION(osd_class_timeout, 0, OPT_DOUBLE, 60*60.0), // seconds
        OPTION(osd_class_tmp, 0, OPT_STR, "/var/lib/ceph/tmp"),
        OPTION(osd_check_for_log_corruption, 0, OPT_BOOL, false),
+       OPTION(osd_use_stale_snap, 0, OPT_BOOL, false),
        OPTION(filestore, 0, OPT_BOOL, false),
        OPTION(filestore_max_sync_interval, 0, OPT_DOUBLE, 5),    // seconds
        OPTION(filestore_min_sync_interval, 0, OPT_DOUBLE, .01),  // seconds
index c6f5214894c4471bf52dfdf435bf0998a969e4d5..3a7f525a00bac8458d9c240440810a1e9a6edbf0 100644 (file)
@@ -382,6 +382,8 @@ struct md_config_t {
 
   bool osd_check_for_log_corruption;  // bleh
 
+  bool osd_use_stale_snap;
+
   // filestore
   bool filestore;
   double   filestore_max_sync_interval;
index b0fc9b763aa9ef0ee9b5b25c12d47f77348747c8..debae9fa9ad2045fe1b27d00600fd56158fa3f33 100644 (file)
@@ -899,6 +899,15 @@ int FileStore::read_op_seq(const char *fn, uint64_t *seq)
   return op_fd;
 }
 
+int FileStore::write_op_seq(int fd, uint64_t seq)
+{
+  char s[30];
+  int ret;
+  sprintf(s, "%lld\n", (long long unsigned)seq);
+  ret = ::pwrite(fd, s, strlen(s), 0);
+  return ret;
+}
+
 int FileStore::mount() 
 {
   char buf[80];
@@ -971,42 +980,71 @@ int FileStore::mount()
       uint64_t curr_seq;
 
       int curr_fd = read_op_seq(current_op_seq_fn, &curr_seq);
-      if (curr_fd >= 0)
-        close(curr_fd);
+      assert(curr_fd >= 0);
+      close(curr_fd);
       dout(10) << "*** curr_seq=" << curr_seq << " cp=" << cp << dendl;
-      
-      if (cp >= curr_seq) {
-        // drop current
-        snapargs.fd = 0;
-        strcpy(snapargs.name, "current");
-        int r = ::ioctl(basedir_fd,
-                       BTRFS_IOC_SNAP_DESTROY,
-                       &snapargs);
-        if (r) {
-         char buf[80];
-         dout(0) << "error removing old current subvol: " << strerror_r(errno, buf, sizeof(buf)) << dendl;
-         char s[PATH_MAX];
-         snprintf(s, sizeof(s), "%s/current.remove.me.%d", basedir.c_str(), rand());
-         r = ::rename(current_fn, s);
-         if (r) {
-           dout(0) << "error renaming old current subvol: " << strerror_r(errno, buf, sizeof(buf)) << dendl;
-           return -errno;
-         }
-        }
-        assert(r == 0);
-      
-        // roll back
-        char s[PATH_MAX];
-        snprintf(s, sizeof(s), "%s/" COMMIT_SNAP_ITEM, basedir.c_str(), (long long unsigned)cp);
-        snapargs.fd = ::open(s, O_RDONLY);
-        r = ::ioctl(basedir_fd, BTRFS_IOC_SNAP_CREATE, &snapargs);
-        assert(r == 0);
-        ::close(snapargs.fd);
-        dout(10) << "mount rolled back to consistent snap " << cp << dendl;
-        snaps.pop_back();
-      } else {
-          dout(0) << "WARNING: skipping revert of current subvol to last snap, curr_seq=" << curr_seq << " snap seq=" << cp << dendl;
+     
+      if (cp != curr_seq && !g_conf.osd_use_stale_snap) { 
+        dout(0) << "\n"
+             << " ** ERROR: current volume data version is not equal to snapshotted version\n"
+            << "           which can lead to data inconsistency. \n"
+            << "           Current version=" << curr_seq << " snapshot version=" << cp << "\n"
+            << "           Startup with snapshotted version can be forced using the\n"
+             <<"            'osd use stale snap = true' config option.\n"
+            << dendl;
+        cerr << TEXT_RED
+            << " ** ERROR: current volume data version is not equal to snapshotted version\n"
+            << "           which can lead to data inconsistency. \n"
+            << "           Current version=" << curr_seq << " snapshot version=" << cp << "\n"
+            << "           Startup with snapshotted version can be forced using the\n"
+             <<"            'osd use stale snap = true' config option.\n"
+            << TEXT_NORMAL;
+        exit(1);
+      }
+
+      if (cp != curr_seq) {
+        dout(0) << "WARNING: user forced start with data sequence mismatch: curr=" << curr_seq << " snap_seq=" << cp << dendl;
+        cerr << TEXT_YELLOW
+            << " ** WARNING: forcing the use of stale snapshot data\n" << TEXT_NORMAL;
+      }
+
+      // drop current
+      snapargs.fd = 0;
+      strcpy(snapargs.name, "current");
+      int r = ::ioctl(basedir_fd,
+                     BTRFS_IOC_SNAP_DESTROY,
+                     &snapargs);
+      if (r) {
+       char buf[80];
+       dout(0) << "error removing old current subvol: " << strerror_r(errno, buf, sizeof(buf)) << dendl;
+       char s[PATH_MAX];
+       snprintf(s, sizeof(s), "%s/current.remove.me.%d", basedir.c_str(), rand());
+       r = ::rename(current_fn, s);
+       if (r) {
+         dout(0) << "error renaming old current subvol: " << strerror_r(errno, buf, sizeof(buf)) << dendl;
+         return -errno;
+       }
+      }
+      assert(r == 0);
+
+      // roll back
+      char s[PATH_MAX];
+      snprintf(s, sizeof(s), "%s/" COMMIT_SNAP_ITEM, basedir.c_str(), (long long unsigned)cp);
+      snapargs.fd = ::open(s, O_RDONLY);
+      r = ::ioctl(basedir_fd, BTRFS_IOC_SNAP_CREATE, &snapargs);
+      assert(r == 0);
+      ::close(snapargs.fd);
+      dout(10) << "mount rolled back to consistent snap " << cp << dendl;
+      snaps.pop_back();
+
+      assert(curr_fd >= 0);
+      if (cp != curr_seq) {
+        curr_fd = read_op_seq(current_op_seq_fn, &curr_seq);
+        /* we'll use the higher version from now on */
+        curr_seq = cp;
+        write_op_seq(curr_fd, curr_seq);
       }
+      close(curr_fd);
     }
   }
 
@@ -2084,9 +2122,7 @@ void FileStore::sync_entry()
       sync_epoch++;
 
       dout(15) << "sync_entry committing " << cp << " sync_epoch " << sync_epoch << dendl;
-      char s[30];
-      sprintf(s, "%lld\n", (long long unsigned)cp);
-      ::pwrite(op_fd, s, strlen(s), 0);
+      write_op_seq(op_fd, cp);
 
       bool do_snap = btrfs && g_conf.filestore_btrfs_snap;
 
index 714ac5e91927aecabfa3f8e516119498072f2d95..9d93bf88c0d4b7265648ab9d1470b6d327ac2b3b 100644 (file)
@@ -197,6 +197,7 @@ class FileStore : public JournalingObjectStore {
   
   bool test_mount_in_use();
   int read_op_seq(const char *fn, uint64_t *seq);
+  int write_op_seq(int, uint64_t seq);
   int mount();
   int umount();
   int wipe_subvol(const char *s);