]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
os/bluestore/BlueFS: fix device_migrate_to_* to handle varying alloc sizes
authorSage Weil <sage@redhat.com>
Thu, 8 Aug 2019 18:30:59 +0000 (13:30 -0500)
committerSage Weil <sage@redhat.com>
Fri, 9 Aug 2019 15:10:12 +0000 (10:10 -0500)
The previous implementation moved extents individually.  This caused
problems when moving an extent with a small alloc_size that wasn't
a multiple of the target device's alloc_size.

Instead, identify files with extents that need to be moved, and then read
the file in its entirety and rewrite it in its entirety.

Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueFS.cc

index 15b217becf17ced2777ce35dd301d01aa15bcf75..4ea80634a13ac7f9598c955c9f3aca9b59c25394 100644 (file)
@@ -1166,6 +1166,8 @@ int BlueFS::device_migrate_to_existing(
   vector<byte> buf;
   bool buffered = cct->_conf->bluefs_buffered_io;
 
+  dout(10) << __func__ << " devs_source " << devs_source
+          << " dev_target " << dev_target << dendl;
   assert(dev_target < (int)MAX_BDEV);
 
   int flags = 0;
@@ -1186,71 +1188,90 @@ int BlueFS::device_migrate_to_existing(
     if (p.second->fnode.ino == 1) {
       continue;
     }
+    dout(10) << __func__ << " " << p.first << " " << p.second->fnode << dendl;
+
     auto& fnode_extents = p.second->fnode.extents;
 
+    bool rewrite = false;
     for (auto ext_it = fnode_extents.begin();
-      ext_it != p.second->fnode.extents.end();
-      ++ext_it) {
+        ext_it != p.second->fnode.extents.end();
+        ++ext_it) {
       if (ext_it->bdev != dev_target && devs_source.count(ext_it->bdev)) {
-       bluefs_extent_t old_ext = *ext_it;
-       PExtentVector extents;
-       auto l =
-         _allocate_without_fallback(dev_target, old_ext.length, &extents);
-       if (l == 0) {
-         buf.resize(old_ext.length);
-         int r = bdev[old_ext.bdev]->read_random(
-           old_ext.offset,
-           old_ext.length,
-           (char*)&buf.at(0),
-           buffered);
-         if (r != 0) {
-           derr << __func__ << " failed to read 0x" << std::hex
-             << old_ext.offset << "~" <<old_ext.length << std::dec
-             << " from " << (int)dev_target << dendl;
-           return -EIO;
-         }
+       rewrite = true;
+       break;
+      }
+    }
+    if (rewrite) {
+      dout(10) << __func__ << "  migrating" << dendl;
+
+      // read entire file
+      bufferlist bl;
+      for (auto old_ext : fnode_extents) {
+       buf.resize(old_ext.length);
+       int r = bdev[old_ext.bdev]->read_random(
+         old_ext.offset,
+         old_ext.length,
+         (char*)&buf.at(0),
+         buffered);
+       if (r != 0) {
+         derr << __func__ << " failed to read 0x" << std::hex
+              << old_ext.offset << "~" << old_ext.length << std::dec
+              << " from " << (int)dev_target << dendl;
+         return -EIO;
+       }
+       bl.append((char*)&buf[0], old_ext.length);
+      }
 
-         assert(extents.size() > 0);
-         uint64_t src_buf_pos = 0;
-         {
-           // overwrite existing extent
-           *ext_it=
-             bluefs_extent_t(dev_target_new, extents[0].offset, extents[0].length);
-           bufferlist bl;
-           bl.append((char*)&buf.at(src_buf_pos), extents[0].length);
-           int r = bdev[dev_target]->write(extents[0].offset, bl, buffered);
-           ceph_assert(r == 0);
-           src_buf_pos += extents[0].length;
-         }
-         // then insert more extents if needed
-         for( size_t i = 1; i < extents.size(); ++i) {
-           bufferlist bl;
-           bl.append((char*)&buf.at(src_buf_pos), extents[i].length);
-           ++ext_it;
-           ext_it = fnode_extents.emplace(ext_it, dev_target_new,
-             extents[i].offset, extents[i].length);
-           int r = bdev[dev_target]->write(extents[i].offset, bl, buffered);
-           ceph_assert(r == 0);
-           src_buf_pos += extents[i].length;
-         }
-         {
-           PExtentVector to_release;
-           to_release.emplace_back(old_ext.offset, old_ext.length);
-           alloc[old_ext.bdev]->release(to_release);
-         }
+      // write entire file
+      PExtentVector extents;
+      auto l = _allocate_without_fallback(dev_target, bl.length(), &extents);
+      if (l < 0) {
+       derr << __func__ << " unable to allocate len 0x" << std::hex
+            << bl.length() << std::dec << " from " << (int)dev_target
+            << ": " << cpp_strerror(l) << dendl;
+       return -ENOSPC;
+      }
 
-       } else {
-         derr << __func__ << " unable to allocate len 0x" << std::hex
-           << old_ext.length << std::dec << " from " << (int)dev_target
-           << dendl;
-         return -ENOSPC;
+      uint64_t off = 0;
+      for (auto& i : extents) {
+       bufferlist cur;
+       uint64_t cur_len = std::min<uint64_t>(i.length, bl.length() - off);
+       ceph_assert(cur_len > 0);
+       cur.substr_of(bl, off, cur_len);
+       int r = bdev[dev_target]->write(i.offset, cur, buffered);
+       ceph_assert(r == 0);
+       off += cur_len;
+      }
+
+      // release old extents
+      for (auto old_ext : fnode_extents) {
+       PExtentVector to_release;
+       to_release.emplace_back(old_ext.offset, old_ext.length);
+       alloc[old_ext.bdev]->release(to_release);
+      }
+
+      // update fnode
+      fnode_extents.clear();
+      for (auto& i : extents) {
+       fnode_extents.emplace_back(dev_target_new, i.offset, i.length);
+      }
+    } else {
+      for (auto ext_it = fnode_extents.begin();
+          ext_it != p.second->fnode.extents.end();
+          ++ext_it) {
+       if (dev_target != dev_target_new && ext_it->bdev == dev_target) {
+         dout(20) << __func__ << "  " << " ... adjusting extent 0x"
+                  << std::hex << ext_it->offset << std::dec
+                  << " bdev " << dev_target << " -> " << dev_target_new
+                  << dendl;
+         ext_it->bdev = dev_target_new;
        }
-      } else if (dev_target != dev_target_new && ext_it->bdev == dev_target) {
-       ext_it->bdev = dev_target_new;
       }
     }
     auto& prefer_bdev = p.second->fnode.prefer_bdev;
     if (prefer_bdev != dev_target && devs_source.count(prefer_bdev)) {
+      dout(20) << __func__ << "  " << " ... adjusting prefer_bdev "
+              << prefer_bdev << " -> " << dev_target_new << dendl;
       prefer_bdev = dev_target_new;
     }
   }
@@ -1294,6 +1315,8 @@ int BlueFS::device_migrate_to_new(
   vector<byte> buf;
   bool buffered = cct->_conf->bluefs_buffered_io;
 
+  dout(10) << __func__ << " devs_source " << devs_source
+          << " dev_target " << dev_target << dendl;
   assert(dev_target == (int)BDEV_NEWDB || (int)BDEV_NEWWAL);
 
   int flags = 0;
@@ -1309,71 +1332,78 @@ int BlueFS::device_migrate_to_new(
     if (p.second->fnode.ino == 1) {
       continue;
     }
+    dout(10) << __func__ << " " << p.first << " " << p.second->fnode << dendl;
+
     auto& fnode_extents = p.second->fnode.extents;
 
+    bool rewrite = false;
     for (auto ext_it = fnode_extents.begin();
-      ext_it != p.second->fnode.extents.end();
-      ++ext_it) {
+        ext_it != p.second->fnode.extents.end();
+        ++ext_it) {
       if (ext_it->bdev != dev_target && devs_source.count(ext_it->bdev)) {
-       bluefs_extent_t old_ext = *ext_it;
-       PExtentVector extents;
-       auto l =
-         _allocate_without_fallback(dev_target, old_ext.length, &extents);
-       if (l == 0) {
-         buf.resize(old_ext.length);
-         int r = bdev[old_ext.bdev]->read_random(
-           old_ext.offset,
-           old_ext.length,
-           (char*)&buf.at(0),
-           buffered);
-         dout(10)<<__func__<<" read = "<<r<<dendl;
-         if (r != 0) {
-           derr << __func__ << " failed to read 0x" << std::hex
-             << old_ext.offset << "~" <<old_ext.length << std::dec
-             << " from " << (int)dev_target << dendl;
-           return -EIO;
-         }
-
-         assert(extents.size() > 0);
-         uint64_t src_buf_pos = 0;
-         {
-           // overwrite existing extent
-           *ext_it=
-             bluefs_extent_t(dev_target_new, extents[0].offset, extents[0].length);
-           bufferlist bl;
-           bl.append((char*)&buf.at(src_buf_pos), extents[0].length);
-           int r = bdev[dev_target]->write(extents[0].offset, bl, buffered);
-           ceph_assert(r == 0);
-           src_buf_pos += extents[0].length;
-         }
-         // then insert more extents if needed
-         for( size_t i = 1; i < extents.size(); ++i) {
-           bufferlist bl;
-           bl.append((char*)&buf.at(src_buf_pos), extents[i].length);
-           ++ext_it;
-           ext_it = fnode_extents.emplace(ext_it, dev_target_new,
-             extents[i].offset, extents[i].length);
-           int r = bdev[dev_target]->write(extents[i].offset, bl, buffered);
-           ceph_assert(r == 0);
-           src_buf_pos += extents[i].length;
-         }
-         {
-           PExtentVector to_release;
-           to_release.emplace_back(old_ext.offset, old_ext.length);
-           alloc[old_ext.bdev]->release(to_release);
-         }
-       } else {
-         derr << __func__ << " unable to allocate len 0x" << std::hex
-           << old_ext.length << std::dec << " from " << (int)dev_target
-           << dendl;
-         return -ENOSPC;
+       rewrite = true;
+       break;
+      }
+    }
+    if (rewrite) {
+      dout(10) << __func__ << "  migrating" << dendl;
+
+      // read entire file
+      bufferlist bl;
+      for (auto old_ext : fnode_extents) {
+       buf.resize(old_ext.length);
+       int r = bdev[old_ext.bdev]->read_random(
+         old_ext.offset,
+         old_ext.length,
+         (char*)&buf.at(0),
+         buffered);
+       if (r != 0) {
+         derr << __func__ << " failed to read 0x" << std::hex
+              << old_ext.offset << "~" << old_ext.length << std::dec
+              << " from " << (int)dev_target << dendl;
+         return -EIO;
        }
-      } else if (dev_target != dev_target_new && ext_it->bdev == dev_target) {
-       ext_it->bdev = dev_target_new;
+       bl.append((char*)&buf[0], old_ext.length);
+      }
+
+      // write entire file
+      PExtentVector extents;
+      auto l = _allocate_without_fallback(dev_target, bl.length(), &extents);
+      if (l < 0) {
+       derr << __func__ << " unable to allocate len 0x" << std::hex
+            << bl.length() << std::dec << " from " << (int)dev_target
+            << ": " << cpp_strerror(l) << dendl;
+       return -ENOSPC;
+      }
+
+      uint64_t off = 0;
+      for (auto& i : extents) {
+       bufferlist cur;
+       uint64_t cur_len = std::min<uint64_t>(i.length, bl.length() - off);
+       ceph_assert(cur_len > 0);
+       cur.substr_of(bl, off, cur_len);
+       int r = bdev[dev_target]->write(i.offset, cur, buffered);
+       ceph_assert(r == 0);
+       off += cur_len;
+      }
+
+      // release old extents
+      for (auto old_ext : fnode_extents) {
+       PExtentVector to_release;
+       to_release.emplace_back(old_ext.offset, old_ext.length);
+       alloc[old_ext.bdev]->release(to_release);
+      }
+
+      // update fnode
+      fnode_extents.clear();
+      for (auto& i : extents) {
+       fnode_extents.emplace_back(dev_target_new, i.offset, i.length);
       }
     }
     auto& prefer_bdev = p.second->fnode.prefer_bdev;
     if (prefer_bdev != dev_target && devs_source.count(prefer_bdev)) {
+      dout(20) << __func__ << "  " << " ... adjusting prefer_bdev "
+              << prefer_bdev << " -> " << dev_target_new << dendl;
       prefer_bdev = dev_target_new;
     }
   }