]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: recalculate pgid each time request is sent
authorSage Weil <sage@newdream.net>
Thu, 7 May 2009 21:39:47 +0000 (14:39 -0700)
committerSage Weil <sage@newdream.net>
Thu, 7 May 2009 21:39:47 +0000 (14:39 -0700)
The pg calculation depends on osdmap parameters that are transient.  In
contrast, the rest of calc_layout is concerned with file striping, which
is fixed (at least over the lifetime of the request).

src/TODO
src/kernel/osd_client.c
src/kernel/osd_client.h

index 8c82ca9bf507c695f8bc83f39f8ac9df3ca14e46..14ef8cb771d01b9d54e4f9d315e16b28a4c0ba55 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -73,7 +73,6 @@ repair
 
 
 kernel client
-- osd client needs to recalculate layout if osdmap changes (pg_num etc may change)
 - fix up mds selection, and ESTALE handling
 - make cap import/export efficient
 - simplify mds auth tracking?
index bedad957d2ba880e69998a1c1ba41bb287592008..916bc45b0c7fa4d5e1afbda6571a0fb899239bad 100644 (file)
@@ -23,16 +23,15 @@ int ceph_debug_osdc __read_mostly = -1;
  * request accordingly.  shorten extent as necessary if it crosses an
  * object boundary.
  */
-static int calc_layout(struct ceph_osd_client *osdc,
-                      struct ceph_vino vino, struct ceph_file_layout *layout,
-                      u64 off, u64 *plen,
-                      struct ceph_osd_request *req)
+static void calc_layout(struct ceph_osd_client *osdc,
+                       struct ceph_vino vino, struct ceph_file_layout *layout,
+                       u64 off, u64 *plen,
+                       struct ceph_osd_request *req)
 {
        struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
        struct ceph_osd_op *op = (void *)(reqhead + 1);
        u64 orig_len = *plen;
        u64 objoff, objlen;    /* extent in object */
-       int err;
 
        /* object extent? */
        reqhead->oid.ino = cpu_to_le64(vino.ino);
@@ -47,15 +46,9 @@ static int calc_layout(struct ceph_osd_client *osdc,
        op->length = cpu_to_le64(objlen);
        req->r_num_pages = calc_pages_for(off, *plen);
 
-       /* pgid? */
-       err = ceph_calc_object_layout(&reqhead->layout, &reqhead->oid, layout,
-                                     osdc->osdmap);
-
-       dout(10, "calc_layout %llx.%08x %llu~%llu pgid %llx (%d pages)\n",
+       dout(10, "calc_layout %llx.%08x %llu~%llu (%d pages)\n",
             le64_to_cpu(reqhead->oid.ino), le32_to_cpu(reqhead->oid.bno),
-            objoff, objlen, le64_to_cpu(reqhead->layout.ol_pgid),
-            req->r_num_pages);
-       return err;
+            objoff, objlen, req->r_num_pages);
 }
 
 
@@ -103,7 +96,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
        int do_trunc = truncate_seq && (off + *plen > truncate_size);
        int num_op = 1 + do_sync + do_trunc;
        size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
-       int i, err;
+       int i;
        u64 prevofs;
 
        /* we may overallocate here, if our write extent is shortened below */
@@ -141,14 +134,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
        req->r_request = msg;
        req->r_snapc = ceph_get_snap_context(snapc);
 
-       /* calculate max write size, pgid */
-       err = calc_layout(osdc, vino, layout, off, plen, req);
-       if (err < 0) {
-               ceph_msg_put(msg);
-               kfree(req);
-               return ERR_PTR(err);
-       }
-       req->r_pgid.pg64 = le64_to_cpu(head->layout.ol_pgid);
+       /* calculate max write size */
+       calc_layout(osdc, vino, layout, off, plen, req);
+       req->r_file_layout = *layout;  /* keep a copy */
 
        if (flags & CEPH_OSD_FLAG_MODIFY) {
                req->r_request->hdr.data_off = cpu_to_le16(off);
@@ -323,34 +311,42 @@ static void __unregister_request(struct ceph_osd_client *osdc,
 static int map_osds(struct ceph_osd_client *osdc,
                    struct ceph_osd_request *req)
 {
+       struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
+       union ceph_pg pgid;
+       struct ceph_pg_pool_info *pool;
        int ruleno;
        unsigned pps; /* placement ps */
        int osds[10], osd = -1;
        int i, num;
-       struct ceph_pg_pool_info *pool;
+       int err;
 
-       if (req->r_pgid.pg.pool >= osdc->osdmap->num_pools)
+       err = ceph_calc_object_layout(&reqhead->layout, &reqhead->oid,
+                                     &req->r_file_layout, osdc->osdmap);
+       if (err)
+               return err;
+       pgid.pg64 = le64_to_cpu(reqhead->layout.ol_pgid);
+       if (pgid.pg.pool >= osdc->osdmap->num_pools)
                return -1;
-       pool = &osdc->osdmap->pg_pool[req->r_pgid.pg.pool];
+       pool = &osdc->osdmap->pg_pool[pgid.pg.pool];
        ruleno = crush_find_rule(osdc->osdmap->crush, pool->v.crush_ruleset,
                                 pool->v.type, pool->v.size);
        if (ruleno < 0) {
                derr(0, "map_osds no crush rule for pool %d type %d size %d\n",
-                    req->r_pgid.pg.pool, pool->v.type, pool->v.size);
+                    pgid.pg.pool, pool->v.type, pool->v.size);
                return -1;
        }
 
-       if (req->r_pgid.pg.preferred >= 0)
-               pps = ceph_stable_mod(req->r_pgid.pg.ps,
+       if (pgid.pg.preferred >= 0)
+               pps = ceph_stable_mod(pgid.pg.ps,
                                      le32_to_cpu(pool->v.lpgp_num),
                                      pool->lpgp_num_mask);
        else
-               pps = ceph_stable_mod(req->r_pgid.pg.ps,
+               pps = ceph_stable_mod(pgid.pg.ps,
                                      le32_to_cpu(pool->v.pgp_num),
                                      pool->pgp_num_mask);
        num = crush_do_rule(osdc->osdmap->crush, ruleno, pps, osds,
                            min_t(int, pool->v.size, ARRAY_SIZE(osds)),
-                           req->r_pgid.pg.preferred, osdc->osdmap->osd_weight);
+                           pgid.pg.preferred, osdc->osdmap->osd_weight);
 
        /* primary is first up osd */
        for (i = 0; i < num; i++)
@@ -358,8 +354,8 @@ static int map_osds(struct ceph_osd_client *osdc,
                        osd = osds[i];
                        break;
                }
-       dout(20, "map_osds tid %llu osd%d (was osd%d)\n", req->r_tid, osd,
-            req->r_last_osd);
+       dout(20, "map_osds tid %llu pgid %llx pool %d osd%d (was osd%d)\n",
+            req->r_tid, pgid.pg64, pgid.pg.pool, osd, req->r_last_osd);
        if (req->r_last_osd == osd &&
            (osd < 0 || ceph_entity_addr_equal(&osdc->osdmap->osd_addr[osd],
                                               &req->r_last_osd_addr)))
index 2b3c5f434635f5dc61104dca7616e780a043d731..1c5de22b2c9d2a244be63756e396c885525e83fb 100644 (file)
@@ -65,7 +65,7 @@ struct ceph_osd_request {
        struct ceph_entity_addr r_last_osd_addr;
        unsigned long     r_timeout_stamp;
 
-       union ceph_pg     r_pgid;             /* placement group */
+       struct ceph_file_layout r_file_layout;
        struct ceph_snap_context *r_snapc;    /* snap context for writes */
        unsigned          r_num_pages;        /* size of page array (follows) */
        struct page     **r_pages;            /* pages for data payload */