kclient: file.c cleanup

author Sage Weil <sage@newdream.net>

Tue, 21 Jul 2009 21:43:20 +0000 (14:43 -0700)

committer Sage Weil <sage@newdream.net>

Tue, 21 Jul 2009 21:43:20 +0000 (14:43 -0700)
author Sage Weil <sage@newdream.net>
Tue, 21 Jul 2009 21:43:20 +0000 (14:43 -0700)
committer Sage Weil <sage@newdream.net>
Tue, 21 Jul 2009 21:43:20 +0000 (14:43 -0700)
diff --git a/src/kernel/file.c b/src/kernel/file.c

index f9851690df3b57172637bcfa143574520776eb6d..fbf02c3a2c180be6db4a3c9e87971c488090631b 100644 (file)
--- a/src/kernel/file.c
+++ b/src/kernel/file.c
@@ -109,7 +109,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
   *
   * If we already have the requisite capabilities, we can satisfy
   * the open request locally (no need to request new caps from the
- * MDS).
+ * MDS).  We do, however, need to inform the MDS (asynchronously)
+ * if our wanted caps set expands.
   */
  int ceph_open(struct inode *inode, struct file *file)
  {
@@ -150,9 +151,8 @@ int ceph_open(struct inode *inode, struct file *file)
         }
  
         /*
-        * We re-use existing caps only if already have an open file
-        * that also wants them.  That is, our want for the caps is
-        * registered with the MDS.
+        * No need to block if we have any caps.  Update wanted set
+        * asynchronously.
          */
         spin_lock(&inode->i_lock);
         if (__ceph_is_any_real_caps(ci)) {
@@ -388,12 +388,9 @@ static int copy_page_vector_to_user(struct page **pages, char __user *data,
  
  /*
   * Completely synchronous read and write methods.  Direct from __user
- * buffer to osd.
+ * buffer to osd, or directly to user pages (if O_DIRECT).
   *
- * If read spans object boundary, just do multiple reads.
- *
- * FIXME: for a correct atomic read, we should take read locks on all
- * objects.
+ * If the read spans object boundary, just do multiple reads.
   */
  static ssize_t ceph_sync_read(struct file *file, char __user *data,
                               unsigned left, loff_t *offset)
@@ -525,11 +522,12 @@ out:
  }
  
  /*
- * synchronous write.  from userspace.
+ * Synchronous write, straight from __user pointer or user pages (if
+ * O_DIRECT).
   *
- * FIXME: if write spans object boundary, just do two separate write.
- * for a correct atomic write, we should take write locks on all
- * objects, rollback on failure, etc.
+ * If write spans object boundary, just do multiple writes.  (For a
+ * correct atomic write, we should e.g. take write locks on all
+ * objects, rollback on failure, etc.)
   */
  static ssize_t ceph_sync_write(struct file *file, const char __user *data,
                                size_t left, loff_t *offset)
@@ -678,15 +676,16 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
         ssize_t ret;
         int got = 0;
  
-       dout("aio_read %llx.%llx %llu~%u trying to get caps on %p\n",
-            ceph_vinop(inode), pos, (unsigned)len, inode);
+       dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
+            inode, ceph_vinop(inode), pos, (unsigned)len, inode);
         __ceph_do_pending_vmtruncate(inode);
         ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE,
                             &got, -1);
         if (ret < 0)
                 goto out;
-       dout("aio_read %llx.%llx %llu~%u got cap refs on %s\n",
-            ceph_vinop(inode), pos, (unsigned)len, ceph_cap_string(got));
+       dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
+            inode, ceph_vinop(inode), pos, (unsigned)len,
+            ceph_cap_string(got));
  
         if ((got & CEPH_CAP_FILE_CACHE) == 0 ||
             (iocb->ki_filp->f_flags & O_DIRECT) ||
@@ -697,8 +696,8 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
                 ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
  
  out:
-       dout("aio_read %llx.%llx dropping cap refs on %s\n",
-            ceph_vinop(inode), ceph_cap_string(got));
+       dout("aio_read %p %llx.%llx dropping cap refs on %s\n",
+            inode, ceph_vinop(inode), ceph_cap_string(got));
         ceph_put_cap_refs(ci, got);
         return ret;
  }
@@ -732,15 +731,17 @@ retry_snap:
         if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
                 return -ENOSPC;
         __ceph_do_pending_vmtruncate(inode);
-       dout("aio_write %p %llu~%u getting caps. i_size %llu\n",
-            inode, pos, (unsigned)iov->iov_len, inode->i_size);
+       dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
+            inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+            inode->i_size);
         ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
                             &got, endoff);
         if (ret < 0)
                 goto out;
  
-       dout("aio_write %p %llu~%u  got cap refs on %s\n",
-            inode, pos, (unsigned)iov->iov_len, ceph_cap_string(got));
+       dout("aio_write %p %llx.%llx %llu~%u  got cap refs on %s\n",
+            inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+            ceph_cap_string(got));
  
         if ((got & CEPH_CAP_FILE_BUFFER) == 0 ||
             (iocb->ki_filp->f_flags & O_DIRECT) ||
@@ -761,13 +762,14 @@ retry_snap:
         }
  
  out:
-       dout("aio_write %p %llu~%u  dropping cap refs on %s\n",
-            inode, pos, (unsigned)iov->iov_len, ceph_cap_string(got));
+       dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
+            inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+            ceph_cap_string(got));
         ceph_put_cap_refs(ci, got);
  
         if (ret == -EOLDSNAPC) {
-               dout("aio_write %p %llu~%u got EOLDSNAPC, retrying\n",
-                    inode, pos, (unsigned)iov->iov_len);
+               dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
+                    inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
                 goto retry_snap;
         }
  
diff --git a/src/kernel/import_patch_set_into_linux_git.sh b/src/kernel/import_patch_set_into_linux_git.sh

index 99440028de3cee581e409341a2386274d48b86d5..666c113ea423b75f9dc27ad054843bcae7d96035 100755 (executable)
--- a/src/kernel/import_patch_set_into_linux_git.sh
+++ b/src/kernel/import_patch_set_into_linux_git.sh
@@ -101,6 +101,19 @@ However, if the MDS replies without a trace (e.g., when retrying an
  update after an MDS failure recovery), some operation-specific cleanup
  may be needed.
  
+We can validate cached dentries in two ways.  A per-dentry lease may
+be issued by the MDS, or a per-directory cap may be issued that acts
+as a lease on the entire directory.  In the latter case, a 'gen' value
+is used to determine which dentries belong to the currently leased
+directory contents.
+
+We normally prepopulate the dcache and icache with readdir results.
+This makes subsequent lookups and getattrs avoid any server
+interaction.  It also lets us satisfy readdir operation by peeking at
+the dcache IFF we hold the per-directory cap/lease, previously
+performed a readdir, and haven't dropped any of the resulting
+dentries.
+
  EOF
  
  git add $target/ceph/file.c
@@ -113,6 +126,11 @@ performing IO on a file.  We take references on held capabilities for
  the duration of the read/write to avoid prematurely releasing them
  back to the MDS.
  
+We implement two main paths for read and write: one that is buffered
+(and uses generic_aio_{read,write}), and one that is fully synchronous
+and blocking (operating either on a __user pointer or, if O_DIRECT,
+directly on user pages).
+
  EOF
  
  git add $target/ceph/addr.c
author	Sage Weil <sage@newdream.net>
	Tue, 21 Jul 2009 21:43:20 +0000 (14:43 -0700)
committer	Sage Weil <sage@newdream.net>
	Tue, 21 Jul 2009 21:43:20 +0000 (14:43 -0700)
src/kernel/file.c		patch \| blob \| history
src/kernel/import_patch_set_into_linux_git.sh		patch \| blob \| history